feat(core): Improvements/overhaul for nodes working with binary data (#7651)

Github issue / Community forum post (link here to close automatically):

---------

Co-authored-by: Giulio Andreini <andreini@netseven.it>
Co-authored-by: Marcus <marcus@n8n.io>
This commit is contained in:
Michael Kret
2024-01-03 13:08:16 +02:00
committed by GitHub
parent 259323b97e
commit 5e16dd4ab4
119 changed files with 4477 additions and 1201 deletions

View File

@@ -0,0 +1,194 @@
import type { IBinaryData, IDataObject, IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import { NodeOperationError, BINARY_ENCODING } from 'n8n-workflow';
import type { WorkBook, WritingOptions } from 'xlsx';
import { utils as xlsxUtils, write as xlsxWrite } from 'xlsx';
import { flattenObject } from '@utils/utilities';
import get from 'lodash/get';
import iconv from 'iconv-lite';
import { getDocument as readPDF, version as pdfJsVersion } from 'pdfjs-dist';
export type JsonToSpreadsheetBinaryFormat = 'csv' | 'html' | 'rtf' | 'ods' | 'xls' | 'xlsx';
export type JsonToSpreadsheetBinaryOptions = {
headerRow?: boolean;
compression?: boolean;
fileName?: string;
sheetName?: string;
};
export type JsonToBinaryOptions = {
fileName?: string;
sourceKey?: string;
encoding?: string;
addBOM?: boolean;
mimeType?: string;
dataIsBase64?: boolean;
itemIndex?: number;
};
type PdfDocument = Awaited<ReturnType<Awaited<typeof readPDF>>['promise']>;
type PdfPage = Awaited<ReturnType<Awaited<PdfDocument['getPage']>>>;
type PdfTextContent = Awaited<ReturnType<PdfPage['getTextContent']>>;
export async function convertJsonToSpreadsheetBinary(
this: IExecuteFunctions,
items: INodeExecutionData[],
fileFormat: JsonToSpreadsheetBinaryFormat,
options: JsonToSpreadsheetBinaryOptions,
defaultFileName = 'spreadsheet',
): Promise<IBinaryData> {
const itemData: IDataObject[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
itemData.push(flattenObject(items[itemIndex].json));
}
let sheetToJsonOptions;
if (options.headerRow === false) {
sheetToJsonOptions = { skipHeader: true };
}
const sheet = xlsxUtils.json_to_sheet(itemData, sheetToJsonOptions);
const writingOptions: WritingOptions = {
bookType: fileFormat,
bookSST: false,
type: 'buffer',
};
if (['xlsx', 'ods'].includes(fileFormat) && options.compression) {
writingOptions.compression = true;
}
// Convert the data in the correct format
const sheetName = (options.sheetName as string) || 'Sheet';
const workbook: WorkBook = {
SheetNames: [sheetName],
Sheets: {
[sheetName]: sheet,
},
};
const buffer: Buffer = xlsxWrite(workbook, writingOptions);
const fileName =
options.fileName !== undefined ? options.fileName : `${defaultFileName}.${fileFormat}`;
const binaryData = await this.helpers.prepareBinaryData(buffer, fileName);
return binaryData;
}
export async function createBinaryFromJson(
this: IExecuteFunctions,
data: IDataObject | IDataObject[],
options: JsonToBinaryOptions,
): Promise<IBinaryData> {
let value;
if (options.sourceKey) {
value = get(data, options.sourceKey) as IDataObject;
} else {
value = data;
}
if (value === undefined) {
throw new NodeOperationError(this.getNode(), `The value in "${options.sourceKey}" is not set`, {
itemIndex: options.itemIndex || 0,
});
}
let buffer: Buffer;
if (!options.dataIsBase64) {
let valueAsString = value as unknown as string;
if (typeof value === 'object') {
options.mimeType = 'application/json';
valueAsString = JSON.stringify(value);
}
buffer = iconv.encode(valueAsString, options.encoding || 'utf8', {
addBOM: options.addBOM,
});
} else {
buffer = Buffer.from(value as unknown as string, BINARY_ENCODING);
}
const binaryData = await this.helpers.prepareBinaryData(
buffer,
options.fileName,
options.mimeType,
);
if (!binaryData.fileName) {
const fileExtension = binaryData.fileExtension ? `.${binaryData.fileExtension}` : '';
binaryData.fileName = `file${fileExtension}`;
}
return binaryData;
}
const parseText = (textContent: PdfTextContent) => {
let lastY = undefined;
const text = [];
for (const item of textContent.items) {
if ('str' in item) {
if (lastY == item.transform[5] || !lastY) {
text.push(item.str);
} else {
text.push(`\n${item.str}`);
}
lastY = item.transform[5];
}
}
return text.join('');
};
export async function extractDataFromPDF(
this: IExecuteFunctions,
binaryPropertyName: string,
password?: string,
maxPages?: number,
joinPages = true,
itemIndex = 0,
) {
const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
const params: { password?: string; url?: URL; data?: ArrayBuffer } = { password };
if (binaryData.id) {
const binaryPath = this.helpers.getBinaryPath(binaryData.id);
params.url = new URL(`file://${binaryPath}`);
} else {
params.data = Buffer.from(binaryData.data, BINARY_ENCODING).buffer;
}
const document = await readPDF(params).promise;
const { info, metadata } = await document
.getMetadata()
.catch(() => ({ info: null, metadata: null }));
const pages = [];
if (maxPages !== 0) {
let pagesToRead = document.numPages;
if (maxPages && maxPages < document.numPages) {
pagesToRead = maxPages;
}
for (let i = 1; i <= pagesToRead; i++) {
const page = await document.getPage(i);
const text = await page.getTextContent().then(parseText);
pages.push(text);
}
}
const text = joinPages ? pages.join('\n\n') : pages;
const returnData = {
numpages: document.numPages,
numrender: document.numPages,
info,
metadata: metadata?.getAll(),
text,
version: pdfJsVersion,
};
return returnData;
}

View File

@@ -1,4 +1,4 @@
import type { INodeProperties } from 'n8n-workflow';
import type { INodeProperties, INodePropertyOptions } from 'n8n-workflow';
export const oldVersionNotice: INodeProperties = {
displayName:
@@ -32,3 +32,414 @@ export const returnAllOrLimit: INodeProperties[] = [
description: 'Max number of results to return',
},
];
export const encodeDecodeOptions: INodePropertyOptions[] = [
{
name: 'armscii8',
value: 'armscii8',
},
{
name: 'ascii',
value: 'ascii',
},
{
name: 'base64',
value: 'base64',
},
{
name: 'big5hkscs',
value: 'big5hkscs',
},
{
name: 'binary',
value: 'binary',
},
{
name: 'cesu8',
value: 'cesu8',
},
{
name: 'cp1046',
value: 'cp1046',
},
{
name: 'cp1124',
value: 'cp1124',
},
{
name: 'cp1125',
value: 'cp1125',
},
{
name: 'cp1129',
value: 'cp1129',
},
{
name: 'cp1133',
value: 'cp1133',
},
{
name: 'cp1161',
value: 'cp1161',
},
{
name: 'cp1162',
value: 'cp1162',
},
{
name: 'cp1163',
value: 'cp1163',
},
{
name: 'cp437',
value: 'cp437',
},
{
name: 'cp720',
value: 'cp720',
},
{
name: 'cp737',
value: 'cp737',
},
{
name: 'cp775',
value: 'cp775',
},
{
name: 'cp808',
value: 'cp808',
},
{
name: 'cp850',
value: 'cp850',
},
{
name: 'cp852',
value: 'cp852',
},
{
name: 'cp855',
value: 'cp855',
},
{
name: 'cp856',
value: 'cp856',
},
{
name: 'cp857',
value: 'cp857',
},
{
name: 'cp858',
value: 'cp858',
},
{
name: 'cp860',
value: 'cp860',
},
{
name: 'cp861',
value: 'cp861',
},
{
name: 'cp862',
value: 'cp862',
},
{
name: 'cp863',
value: 'cp863',
},
{
name: 'cp864',
value: 'cp864',
},
{
name: 'cp865',
value: 'cp865',
},
{
name: 'cp866',
value: 'cp866',
},
{
name: 'cp869',
value: 'cp869',
},
{
name: 'cp922',
value: 'cp922',
},
{
name: 'cp936',
value: 'cp936',
},
{
name: 'cp949',
value: 'cp949',
},
{
name: 'cp950',
value: 'cp950',
},
{
name: 'eucjp',
value: 'eucjp',
},
{
name: 'gb18030',
value: 'gb18030',
},
{
name: 'gbk',
value: 'gbk',
},
{
name: 'georgianacademy',
value: 'georgianacademy',
},
{
name: 'georgianps',
value: 'georgianps',
},
{
name: 'hex',
value: 'hex',
},
{
name: 'hproman8',
value: 'hproman8',
},
{
name: 'iso646cn',
value: 'iso646cn',
},
{
name: 'iso646jp',
value: 'iso646jp',
},
{
name: 'iso88591',
value: 'iso88591',
},
{
name: 'iso885910',
value: 'iso885910',
},
{
name: 'iso885911',
value: 'iso885911',
},
{
name: 'iso885913',
value: 'iso885913',
},
{
name: 'iso885914',
value: 'iso885914',
},
{
name: 'iso885915',
value: 'iso885915',
},
{
name: 'iso885916',
value: 'iso885916',
},
{
name: 'iso88592',
value: 'iso88592',
},
{
name: 'iso88593',
value: 'iso88593',
},
{
name: 'iso88594',
value: 'iso88594',
},
{
name: 'iso88595',
value: 'iso88595',
},
{
name: 'iso88596',
value: 'iso88596',
},
{
name: 'iso88597',
value: 'iso88597',
},
{
name: 'iso88598',
value: 'iso88598',
},
{
name: 'iso88599',
value: 'iso88599',
},
{
name: 'koi8r',
value: 'koi8r',
},
{
name: 'koi8ru',
value: 'koi8ru',
},
{
name: 'koi8t',
value: 'koi8t',
},
{
name: 'koi8u',
value: 'koi8u',
},
{
name: 'maccenteuro',
value: 'maccenteuro',
},
{
name: 'maccroatian',
value: 'maccroatian',
},
{
name: 'maccyrillic',
value: 'maccyrillic',
},
{
name: 'macgreek',
value: 'macgreek',
},
{
name: 'maciceland',
value: 'maciceland',
},
{
name: 'macintosh',
value: 'macintosh',
},
{
name: 'macroman',
value: 'macroman',
},
{
name: 'macromania',
value: 'macromania',
},
{
name: 'macthai',
value: 'macthai',
},
{
name: 'macturkish',
value: 'macturkish',
},
{
name: 'macukraine',
value: 'macukraine',
},
{
name: 'mik',
value: 'mik',
},
{
name: 'pt154',
value: 'pt154',
},
{
name: 'rk1048',
value: 'rk1048',
},
{
name: 'shiftjis',
value: 'shiftjis',
},
{
name: 'tcvn',
value: 'tcvn',
},
{
name: 'tis620',
value: 'tis620',
},
{
name: 'ucs2',
value: 'ucs2',
},
{
name: 'utf16',
value: 'utf16',
},
{
name: 'utf16be',
value: 'utf16be',
},
{
name: 'utf32',
value: 'utf32',
},
{
name: 'utf32be',
value: 'utf32be',
},
{
name: 'utf32le',
value: 'utf32le',
},
{
name: 'utf7',
value: 'utf7',
},
{
name: 'utf7imap',
value: 'utf7imap',
},
{
name: 'utf8',
value: 'utf8',
},
{
name: 'viscii',
value: 'viscii',
},
{
name: 'windows1250',
value: 'windows1250',
},
{
name: 'windows1251',
value: 'windows1251',
},
{
name: 'windows1252',
value: 'windows1252',
},
{
name: 'windows1253',
value: 'windows1253',
},
{
name: 'windows1254',
value: 'windows1254',
},
{
name: 'windows1255',
value: 'windows1255',
},
{
name: 'windows1256',
value: 'windows1256',
},
{
name: 'windows1257',
value: 'windows1257',
},
{
name: 'windows1258',
value: 'windows1258',
},
{
name: 'windows874',
value: 'windows874',
},
];