mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-17 01:56:46 +00:00
feat(core): Improvements/overhaul for nodes working with binary data (#7651)
Github issue / Community forum post (link here to close automatically): --------- Co-authored-by: Giulio Andreini <andreini@netseven.it> Co-authored-by: Marcus <marcus@n8n.io>
This commit is contained in:
194
packages/nodes-base/utils/binary.ts
Normal file
194
packages/nodes-base/utils/binary.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
import type { IBinaryData, IDataObject, IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
|
||||
import { NodeOperationError, BINARY_ENCODING } from 'n8n-workflow';
|
||||
import type { WorkBook, WritingOptions } from 'xlsx';
|
||||
import { utils as xlsxUtils, write as xlsxWrite } from 'xlsx';
|
||||
import { flattenObject } from '@utils/utilities';
|
||||
|
||||
import get from 'lodash/get';
|
||||
import iconv from 'iconv-lite';
|
||||
|
||||
import { getDocument as readPDF, version as pdfJsVersion } from 'pdfjs-dist';
|
||||
|
||||
export type JsonToSpreadsheetBinaryFormat = 'csv' | 'html' | 'rtf' | 'ods' | 'xls' | 'xlsx';
|
||||
|
||||
export type JsonToSpreadsheetBinaryOptions = {
|
||||
headerRow?: boolean;
|
||||
compression?: boolean;
|
||||
fileName?: string;
|
||||
sheetName?: string;
|
||||
};
|
||||
|
||||
export type JsonToBinaryOptions = {
|
||||
fileName?: string;
|
||||
sourceKey?: string;
|
||||
encoding?: string;
|
||||
addBOM?: boolean;
|
||||
mimeType?: string;
|
||||
dataIsBase64?: boolean;
|
||||
itemIndex?: number;
|
||||
};
|
||||
|
||||
type PdfDocument = Awaited<ReturnType<Awaited<typeof readPDF>>['promise']>;
|
||||
type PdfPage = Awaited<ReturnType<Awaited<PdfDocument['getPage']>>>;
|
||||
type PdfTextContent = Awaited<ReturnType<PdfPage['getTextContent']>>;
|
||||
|
||||
export async function convertJsonToSpreadsheetBinary(
|
||||
this: IExecuteFunctions,
|
||||
items: INodeExecutionData[],
|
||||
fileFormat: JsonToSpreadsheetBinaryFormat,
|
||||
options: JsonToSpreadsheetBinaryOptions,
|
||||
defaultFileName = 'spreadsheet',
|
||||
): Promise<IBinaryData> {
|
||||
const itemData: IDataObject[] = [];
|
||||
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
||||
itemData.push(flattenObject(items[itemIndex].json));
|
||||
}
|
||||
|
||||
let sheetToJsonOptions;
|
||||
if (options.headerRow === false) {
|
||||
sheetToJsonOptions = { skipHeader: true };
|
||||
}
|
||||
|
||||
const sheet = xlsxUtils.json_to_sheet(itemData, sheetToJsonOptions);
|
||||
|
||||
const writingOptions: WritingOptions = {
|
||||
bookType: fileFormat,
|
||||
bookSST: false,
|
||||
type: 'buffer',
|
||||
};
|
||||
|
||||
if (['xlsx', 'ods'].includes(fileFormat) && options.compression) {
|
||||
writingOptions.compression = true;
|
||||
}
|
||||
|
||||
// Convert the data in the correct format
|
||||
const sheetName = (options.sheetName as string) || 'Sheet';
|
||||
const workbook: WorkBook = {
|
||||
SheetNames: [sheetName],
|
||||
Sheets: {
|
||||
[sheetName]: sheet,
|
||||
},
|
||||
};
|
||||
|
||||
const buffer: Buffer = xlsxWrite(workbook, writingOptions);
|
||||
const fileName =
|
||||
options.fileName !== undefined ? options.fileName : `${defaultFileName}.${fileFormat}`;
|
||||
const binaryData = await this.helpers.prepareBinaryData(buffer, fileName);
|
||||
|
||||
return binaryData;
|
||||
}
|
||||
|
||||
export async function createBinaryFromJson(
|
||||
this: IExecuteFunctions,
|
||||
data: IDataObject | IDataObject[],
|
||||
options: JsonToBinaryOptions,
|
||||
): Promise<IBinaryData> {
|
||||
let value;
|
||||
if (options.sourceKey) {
|
||||
value = get(data, options.sourceKey) as IDataObject;
|
||||
} else {
|
||||
value = data;
|
||||
}
|
||||
|
||||
if (value === undefined) {
|
||||
throw new NodeOperationError(this.getNode(), `The value in "${options.sourceKey}" is not set`, {
|
||||
itemIndex: options.itemIndex || 0,
|
||||
});
|
||||
}
|
||||
|
||||
let buffer: Buffer;
|
||||
if (!options.dataIsBase64) {
|
||||
let valueAsString = value as unknown as string;
|
||||
|
||||
if (typeof value === 'object') {
|
||||
options.mimeType = 'application/json';
|
||||
valueAsString = JSON.stringify(value);
|
||||
}
|
||||
|
||||
buffer = iconv.encode(valueAsString, options.encoding || 'utf8', {
|
||||
addBOM: options.addBOM,
|
||||
});
|
||||
} else {
|
||||
buffer = Buffer.from(value as unknown as string, BINARY_ENCODING);
|
||||
}
|
||||
|
||||
const binaryData = await this.helpers.prepareBinaryData(
|
||||
buffer,
|
||||
options.fileName,
|
||||
options.mimeType,
|
||||
);
|
||||
|
||||
if (!binaryData.fileName) {
|
||||
const fileExtension = binaryData.fileExtension ? `.${binaryData.fileExtension}` : '';
|
||||
binaryData.fileName = `file${fileExtension}`;
|
||||
}
|
||||
|
||||
return binaryData;
|
||||
}
|
||||
|
||||
const parseText = (textContent: PdfTextContent) => {
|
||||
let lastY = undefined;
|
||||
const text = [];
|
||||
for (const item of textContent.items) {
|
||||
if ('str' in item) {
|
||||
if (lastY == item.transform[5] || !lastY) {
|
||||
text.push(item.str);
|
||||
} else {
|
||||
text.push(`\n${item.str}`);
|
||||
}
|
||||
lastY = item.transform[5];
|
||||
}
|
||||
}
|
||||
return text.join('');
|
||||
};
|
||||
|
||||
export async function extractDataFromPDF(
|
||||
this: IExecuteFunctions,
|
||||
binaryPropertyName: string,
|
||||
password?: string,
|
||||
maxPages?: number,
|
||||
joinPages = true,
|
||||
itemIndex = 0,
|
||||
) {
|
||||
const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
|
||||
|
||||
const params: { password?: string; url?: URL; data?: ArrayBuffer } = { password };
|
||||
|
||||
if (binaryData.id) {
|
||||
const binaryPath = this.helpers.getBinaryPath(binaryData.id);
|
||||
params.url = new URL(`file://${binaryPath}`);
|
||||
} else {
|
||||
params.data = Buffer.from(binaryData.data, BINARY_ENCODING).buffer;
|
||||
}
|
||||
|
||||
const document = await readPDF(params).promise;
|
||||
const { info, metadata } = await document
|
||||
.getMetadata()
|
||||
.catch(() => ({ info: null, metadata: null }));
|
||||
|
||||
const pages = [];
|
||||
if (maxPages !== 0) {
|
||||
let pagesToRead = document.numPages;
|
||||
if (maxPages && maxPages < document.numPages) {
|
||||
pagesToRead = maxPages;
|
||||
}
|
||||
for (let i = 1; i <= pagesToRead; i++) {
|
||||
const page = await document.getPage(i);
|
||||
const text = await page.getTextContent().then(parseText);
|
||||
pages.push(text);
|
||||
}
|
||||
}
|
||||
|
||||
const text = joinPages ? pages.join('\n\n') : pages;
|
||||
|
||||
const returnData = {
|
||||
numpages: document.numPages,
|
||||
numrender: document.numPages,
|
||||
info,
|
||||
metadata: metadata?.getAll(),
|
||||
text,
|
||||
version: pdfJsVersion,
|
||||
};
|
||||
|
||||
return returnData;
|
||||
}
|
||||
Reference in New Issue
Block a user