feat(core): Improvements/overhaul for nodes working with binary data (#7651)

Github issue / Community forum post (link here to close automatically):

---------

Co-authored-by: Giulio Andreini <andreini@netseven.it>
Co-authored-by: Marcus <marcus@n8n.io>
This commit is contained in:
Michael Kret
2024-01-03 13:08:16 +02:00
committed by GitHub
parent 259323b97e
commit 5e16dd4ab4
119 changed files with 4477 additions and 1201 deletions

View File

@@ -0,0 +1,39 @@
{
"node": "n8n-nodes-base.extractFromFile",
"nodeVersion": "1.0",
"codexVersion": "1.0",
"categories": ["Core Nodes"],
"resources": {
"primaryDocumentation": [
{
"url": "https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.extractfromfile/"
}
]
},
"alias": [
"CSV",
"Spreadsheet",
"Excel",
"xls",
"xlsx",
"ods",
"tabular",
"decode",
"decoding",
"Move Binary Data",
"Binary",
"File",
"PDF",
"JSON",
"HTML",
"ICS",
"txt",
"Text",
"RTF",
"XML",
"64"
],
"subcategories": {
"Core Nodes": ["Files", "Data Transformation"]
}
}

View File

@@ -0,0 +1,134 @@
import type {
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
} from 'n8n-workflow';
import * as spreadsheet from './actions/spreadsheet.operation';
import * as moveTo from './actions/moveTo.operation';
import * as pdf from './actions/pdf.operation';
export class ExtractFromFile implements INodeType {
// eslint-disable-next-line n8n-nodes-base/node-class-description-missing-subtitle
description: INodeTypeDescription = {
displayName: 'Extract From File',
name: 'extractFromFile',
icon: 'file:extractFromFile.svg',
group: ['input'],
version: 1,
description: 'Convert binary data to JSON',
defaults: {
name: 'Extract From File',
},
inputs: ['main'],
outputs: ['main'],
properties: [
{
displayName: 'Operation',
name: 'operation',
type: 'options',
noDataExpression: true,
// eslint-disable-next-line n8n-nodes-base/node-param-options-type-unsorted-items
options: [
{
name: 'Extract From CSV',
value: 'csv',
action: 'Extract from CSV',
description: 'Transform a CSV file into output items',
},
{
name: 'Extract From HTML',
value: 'html',
action: 'Extract from HTML',
description: 'Transform a table in an HTML file into output items',
},
{
name: 'Extract From JSON',
value: 'fromJson',
action: 'Extract from JSON',
description: 'Transform a JSON file into output items',
},
{
name: 'Extract From ICS',
value: 'fromIcs',
action: 'Extract from ICS',
description: 'Transform a ICS file into output items',
},
{
name: 'Extract From ODS',
value: 'ods',
action: 'Extract from ODS',
description: 'Transform an ODS file into output items',
},
{
name: 'Extract From PDF',
value: 'pdf',
action: 'Extract from PDF',
description: 'Extracts the content and metadata from a PDF file',
},
{
name: 'Extract From RTF',
value: 'rtf',
action: 'Extract from RTF',
description: 'Transform a table in an RTF file into output items',
},
{
name: 'Extract From Text File',
value: 'text',
action: 'Extract from text file',
description: 'Extracts the content of a text file',
},
{
name: 'Extract From XML',
value: 'xml',
action: 'Extract from XLS',
description: 'Extracts the content of an XML file',
},
{
name: 'Extract From XLS',
value: 'xls',
action: 'Extract from XLS',
description: 'Transform an Excel file into output items',
},
{
name: 'Extract From XLSX',
value: 'xlsx',
action: 'Extract from XLSX',
description: 'Transform an Excel file into output items',
},
{
name: 'Move File to Base64 String',
value: 'binaryToPropery',
action: 'Move file to base64 string',
description: 'Convert a file into a base64-encoded string',
},
],
default: 'csv',
},
...spreadsheet.description,
...moveTo.description,
...pdf.description,
],
};
async execute(this: IExecuteFunctions) {
const items = this.getInputData();
const operation = this.getNodeParameter('operation', 0);
let returnData: INodeExecutionData[] = [];
if (spreadsheet.operations.includes(operation)) {
returnData = await spreadsheet.execute.call(this, items, 'operation');
}
if (['binaryToPropery', 'fromJson', 'text', 'fromIcs', 'xml'].includes(operation)) {
returnData = await moveTo.execute.call(this, items, operation);
}
if (operation === 'pdf') {
returnData = await pdf.execute.call(this, items);
}
return [returnData];
}
}

View File

@@ -0,0 +1,192 @@
import type {
IDataObject,
IExecuteFunctions,
INodeExecutionData,
INodeProperties,
} from 'n8n-workflow';
import { BINARY_ENCODING, NodeOperationError, deepCopy, jsonParse } from 'n8n-workflow';
import { encodeDecodeOptions } from '@utils/descriptions';
import { updateDisplayOptions } from '@utils/utilities';
import get from 'lodash/get';
import set from 'lodash/set';
import unset from 'lodash/unset';
import iconv from 'iconv-lite';
import { icsCalendarToObject } from 'ts-ics';
export const properties: INodeProperties[] = [
{
displayName: 'Input Binary Field',
name: 'binaryPropertyName',
type: 'string',
default: 'data',
required: true,
placeholder: 'e.g data',
hint: 'The name of the input field containing the file data to be processed',
},
{
displayName: 'Destination Output Field',
name: 'destinationKey',
type: 'string',
default: 'data',
required: true,
placeholder: 'e.g data',
description: 'The name of the output field that will contain the extracted data',
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Option',
default: {},
options: [
{
displayName: 'File Encoding',
name: 'encoding',
type: 'options',
options: encodeDecodeOptions,
default: 'utf8',
description: 'Specify the encoding of the file, defaults to UTF-8',
},
{
displayName: 'Strip BOM',
name: 'stripBOM',
displayOptions: {
show: {
encoding: ['utf8', 'cesu8', 'ucs2'],
},
},
type: 'boolean',
default: true,
description:
'Whether to strip the BOM (Byte Order Mark) from the file, this could help in an environment where the presence of the BOM is causing issues or inconsistencies',
},
{
displayName: 'Keep Source',
name: 'keepSource',
type: 'options',
default: 'json',
options: [
{
name: 'JSON',
value: 'json',
description: 'Include JSON data of the input item',
},
{
name: 'Binary',
value: 'binary',
description: 'Include binary data of the input item',
},
{
name: 'Both',
value: 'both',
description: 'Include both JSON and binary data of the input item',
},
],
},
],
},
];
const displayOptions = {
show: {
operation: ['binaryToPropery', 'fromJson', 'text', 'fromIcs', 'xml'],
},
};
export const description = updateDisplayOptions(displayOptions, properties);
export async function execute(
this: IExecuteFunctions,
items: INodeExecutionData[],
operation: string,
) {
const returnData: INodeExecutionData[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
try {
const item = items[itemIndex];
const options = this.getNodeParameter('options', itemIndex);
const binaryPropertyName = this.getNodeParameter('binaryPropertyName', itemIndex);
const newItem: INodeExecutionData = {
json: {},
pairedItem: { item: itemIndex },
};
const value = get(item.binary, binaryPropertyName);
if (!value) continue;
const encoding = (options.encoding as string) || 'utf8';
const buffer = await this.helpers.getBinaryDataBuffer(itemIndex, binaryPropertyName);
if (options.keepSource && options.keepSource !== 'binary') {
newItem.json = deepCopy(item.json);
}
let convertedValue: string | IDataObject;
if (operation !== 'binaryToPropery') {
convertedValue = iconv.decode(buffer, encoding, {
stripBOM: options.stripBOM as boolean,
});
} else {
convertedValue = Buffer.from(buffer).toString(BINARY_ENCODING);
}
if (operation === 'fromJson') {
if (convertedValue === '') {
convertedValue = {};
} else {
convertedValue = jsonParse(convertedValue);
}
}
if (operation === 'fromIcs') {
convertedValue = icsCalendarToObject(convertedValue as string);
}
const destinationKey = this.getNodeParameter('destinationKey', itemIndex, '') as string;
set(newItem.json, destinationKey, convertedValue);
if (options.keepSource === 'binary' || options.keepSource === 'both') {
newItem.binary = item.binary;
} else {
// this binary data would not be included, but there also might be other binary data
// which should be included, copy it over and unset current binary data
newItem.binary = deepCopy(item.binary);
unset(newItem.binary, binaryPropertyName);
}
returnData.push(newItem);
} catch (error) {
let errorDescription;
if (error.message.includes('Unexpected token')) {
error.message = "The file selected in 'Input Binary Field' is not in JSON format";
errorDescription =
"Try to change the operation or select a JSON file in 'Input Binary Field'";
}
if (this.continueOnFail()) {
returnData.push({
json: {
error: error.message,
},
pairedItem: {
item: itemIndex,
},
});
continue;
}
throw new NodeOperationError(this.getNode(), error, {
itemIndex,
description: errorDescription,
});
}
}
return returnData;
}

View File

@@ -0,0 +1,141 @@
import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
import { NodeOperationError, deepCopy } from 'n8n-workflow';
import unset from 'lodash/unset';
import { extractDataFromPDF } from '@utils/binary';
import { updateDisplayOptions } from '@utils/utilities';
export const properties: INodeProperties[] = [
{
displayName: 'Input Binary Field',
name: 'binaryPropertyName',
type: 'string',
default: 'data',
required: true,
placeholder: 'e.g data',
hint: 'The name of the input binary field containing the file to be extracted',
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Option',
default: {},
options: [
{
displayName: 'Join Pages',
name: 'joinPages',
type: 'boolean',
default: true,
description:
'Whether to join the text from all pages or return an array of text from each page',
},
{
displayName: 'Keep Source',
name: 'keepSource',
type: 'options',
default: 'json',
options: [
{
name: 'JSON',
value: 'json',
description: 'Include JSON data of the input item',
},
{
name: 'Binary',
value: 'binary',
description: 'Include binary data of the input item',
},
{
name: 'Both',
value: 'both',
description: 'Include both JSON and binary data of the input item',
},
],
},
{
displayName: 'Max Pages',
name: 'maxPages',
type: 'number',
default: 0,
description: 'Maximum number of pages to include',
},
{
displayName: 'Password',
name: 'password',
type: 'string',
typeOptions: { password: true },
default: '',
description: 'Prowide password, if the PDF is encrypted',
},
],
},
];
const displayOptions = {
show: {
operation: ['pdf'],
},
};
export const description = updateDisplayOptions(displayOptions, properties);
export async function execute(this: IExecuteFunctions, items: INodeExecutionData[]) {
const returnData: INodeExecutionData[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
try {
const item = items[itemIndex];
const options = this.getNodeParameter('options', itemIndex);
const binaryPropertyName = this.getNodeParameter('binaryPropertyName', itemIndex);
const json = await extractDataFromPDF.call(
this,
binaryPropertyName,
options.password as string,
options.maxPages as number,
options.joinPages as boolean,
itemIndex,
);
const newItem: INodeExecutionData = {
json: {},
pairedItem: { item: itemIndex },
};
if (options.keepSource && options.keepSource !== 'binary') {
newItem.json = { ...deepCopy(item.json), ...json };
} else {
newItem.json = json;
}
if (options.keepSource === 'binary' || options.keepSource === 'both') {
newItem.binary = item.binary;
} else {
// this binary data would not be included, but there also might be other binary data
// which should be included, copy it over and unset current binary data
newItem.binary = deepCopy(item.binary);
unset(newItem.binary, binaryPropertyName);
}
returnData.push(newItem);
} catch (error) {
if (this.continueOnFail()) {
returnData.push({
json: {
error: error.message,
},
pairedItem: {
item: itemIndex,
},
});
continue;
}
throw new NodeOperationError(this.getNode(), error, { itemIndex });
}
}
return returnData;
}

View File

@@ -0,0 +1,59 @@
import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
import * as fromFile from '../../../SpreadsheetFile/v2/fromFile.operation';
export const operations = ['csv', 'html', 'rtf', 'ods', 'xls', 'xlsx'];
export const description: INodeProperties[] = fromFile.description
.filter((property) => property.name !== 'fileFormat')
.map((property) => {
const newProperty = { ...property };
newProperty.displayOptions = {
show: {
operation: operations,
},
};
if (newProperty.name === 'options') {
newProperty.options = (newProperty.options as INodeProperties[]).map((option) => {
let newOption = option;
if (['delimiter', 'fromLine', 'maxRowCount', 'enableBOM'].includes(option.name)) {
newOption = { ...option, displayOptions: { show: { '/operation': ['csv'] } } };
}
if (option.name === 'sheetName') {
newOption = {
...option,
displayOptions: { show: { '/operation': ['ods', 'xls', 'xlsx'] } },
description: 'Name of the sheet to read from in the spreadsheet',
};
}
if (option.name === 'range') {
newOption = {
...option,
displayOptions: { show: { '/operation': ['ods', 'xls', 'xlsx'] } },
};
}
if (['includeEmptyCells', 'headerRow'].includes(option.name)) {
newOption = {
...option,
displayOptions: { show: { '/operation': ['ods', 'xls', 'xlsx', 'csv', 'html'] } },
};
}
return newOption;
});
}
return newProperty;
});
export async function execute(
this: IExecuteFunctions,
items: INodeExecutionData[],
fileFormatProperty: string,
) {
const returnData: INodeExecutionData[] = await fromFile.execute.call(
this,
items,
fileFormatProperty,
);
return returnData;
}

View File

@@ -0,0 +1,5 @@
<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M0 39.8158C0 33.2901 5.28667 28 11.8081 28H168.41V189.704C168.41 196.331 173.782 201.704 180.41 201.704H342L342 287H268C243.699 287 224 306.699 224 331V355C224 379.301 243.699 399 268 399L342 399L342 473.184C342 479.71 336.713 485 330.192 485H11.8081C5.28667 485 0 479.71 0 473.184V39.8158Z" fill="#003355"/>
<path d="M199.898 34C199.898 30.6863 202.584 28 205.898 28H208.564C211.7 28 214.708 29.2487 216.923 31.4707L338.551 153.468C340.76 155.683 342 158.684 342 161.813L342 164.195C342 167.509 339.314 170.195 336 170.195H205.898C202.584 170.195 199.898 167.509 199.898 164.195V34Z" fill="#003355"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M381.077 415.982C371.678 425.329 371.636 440.525 380.982 449.923C390.329 459.322 405.525 459.364 414.923 450.018L504.923 360.518C509.453 356.013 512 349.888 512 343.5C512 337.112 509.453 330.987 504.923 326.482L414.923 236.982C405.525 227.636 390.329 227.678 380.982 237.077C371.636 246.475 371.678 261.671 381.077 271.018L429.327 319L268 319C261.373 319 256 324.373 256 331L256 355C256 361.627 261.373 367 268 367L430.333 367L381.077 415.982Z" fill="#003355"/>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB