mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-17 10:02:05 +00:00
feat(core): Improvements/overhaul for nodes working with binary data (#7651)
Github issue / Community forum post (link here to close automatically): --------- Co-authored-by: Giulio Andreini <andreini@netseven.it> Co-authored-by: Marcus <marcus@n8n.io>
This commit is contained in:
@@ -1,35 +1,15 @@
|
||||
import {
|
||||
BINARY_ENCODING,
|
||||
NodeOperationError,
|
||||
type IExecuteFunctions,
|
||||
type INodeExecutionData,
|
||||
type INodeType,
|
||||
type INodeTypeDescription,
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { getDocument as readPDF, version as pdfJsVersion } from 'pdfjs-dist';
|
||||
|
||||
type Document = Awaited<ReturnType<Awaited<typeof readPDF>>['promise']>;
|
||||
type Page = Awaited<ReturnType<Awaited<Document['getPage']>>>;
|
||||
type TextContent = Awaited<ReturnType<Page['getTextContent']>>;
|
||||
|
||||
const parseText = (textContent: TextContent) => {
|
||||
let lastY = undefined;
|
||||
const text = [];
|
||||
for (const item of textContent.items) {
|
||||
if ('str' in item) {
|
||||
if (lastY == item.transform[5] || !lastY) {
|
||||
text.push(item.str);
|
||||
} else {
|
||||
text.push(`\n${item.str}`);
|
||||
}
|
||||
lastY = item.transform[5];
|
||||
}
|
||||
}
|
||||
return text.join('');
|
||||
};
|
||||
import { extractDataFromPDF } from '@utils/binary';
|
||||
|
||||
export class ReadPDF implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
hidden: true,
|
||||
displayName: 'Read PDF',
|
||||
// eslint-disable-next-line n8n-nodes-base/node-class-description-name-miscased
|
||||
name: 'readPDF',
|
||||
@@ -45,7 +25,7 @@ export class ReadPDF implements INodeType {
|
||||
outputs: ['main'],
|
||||
properties: [
|
||||
{
|
||||
displayName: 'Binary Property',
|
||||
displayName: 'Input Binary Field',
|
||||
name: 'binaryPropertyName',
|
||||
type: 'string',
|
||||
default: 'data',
|
||||
@@ -84,46 +64,26 @@ export class ReadPDF implements INodeType {
|
||||
for (let itemIndex = 0; itemIndex < length; itemIndex++) {
|
||||
try {
|
||||
const binaryPropertyName = this.getNodeParameter('binaryPropertyName', itemIndex);
|
||||
const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
|
||||
|
||||
const params: { password?: string; url?: URL; data?: ArrayBuffer } = {};
|
||||
|
||||
let password;
|
||||
if (this.getNodeParameter('encrypted', itemIndex) === true) {
|
||||
params.password = this.getNodeParameter('password', itemIndex) as string;
|
||||
password = this.getNodeParameter('password', itemIndex) as string;
|
||||
}
|
||||
|
||||
if (binaryData.id) {
|
||||
const binaryPath = this.helpers.getBinaryPath(binaryData.id);
|
||||
params.url = new URL(`file://${binaryPath}`);
|
||||
} else {
|
||||
params.data = Buffer.from(binaryData.data, BINARY_ENCODING).buffer;
|
||||
}
|
||||
|
||||
const document = await readPDF(params).promise;
|
||||
const { info, metadata } = await document
|
||||
.getMetadata()
|
||||
.catch(() => ({ info: null, metadata: null }));
|
||||
|
||||
const pages = [];
|
||||
for (let i = 1; i <= document.numPages; i++) {
|
||||
const page = await document.getPage(i);
|
||||
const text = await page.getTextContent().then(parseText);
|
||||
pages.push(text);
|
||||
}
|
||||
const json = await extractDataFromPDF.call(
|
||||
this,
|
||||
binaryPropertyName,
|
||||
password,
|
||||
undefined,
|
||||
undefined,
|
||||
itemIndex,
|
||||
);
|
||||
|
||||
returnData.push({
|
||||
binary: items[itemIndex].binary,
|
||||
json: {
|
||||
numpages: document.numPages,
|
||||
numrender: document.numPages,
|
||||
info,
|
||||
metadata: metadata?.getAll(),
|
||||
text: pages.join('\n\n'),
|
||||
version: pdfJsVersion,
|
||||
},
|
||||
json,
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
if (this.continueOnFail()) {
|
||||
returnData.push({
|
||||
json: {
|
||||
@@ -135,7 +95,7 @@ export class ReadPDF implements INodeType {
|
||||
});
|
||||
continue;
|
||||
}
|
||||
throw error;
|
||||
throw new NodeOperationError(this.getNode(), error, { itemIndex });
|
||||
}
|
||||
}
|
||||
return [returnData];
|
||||
|
||||
Reference in New Issue
Block a user