chore: Upgrade pdfjs-dist (no-changelog) (#15729)

This commit is contained in:
कारतोफ्फेलस्क्रिप्ट™
2025-06-04 18:31:22 +02:00
committed by GitHub
parent d64f2e57f7
commit b772462cea
8 changed files with 187 additions and 36 deletions

View File

@@ -57,7 +57,7 @@
"Title": "sample"
},
"text": "N8N\nSample PDF\nLorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor\ninvidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et\njusto duo dolores et ea rebum.",
"version": "2.16.105"
"version": "5.3.31"
}
}
]

View File

@@ -54,7 +54,7 @@
"ModDate": "D:20230210122750Z"
},
"text": "N8N\nSample PDF\nLorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor\ninvidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et\njusto duo dolores et ea rebum.",
"version": "2.16.105"
"version": "5.3.31"
}
}
]

View File

@@ -924,7 +924,7 @@
"node-ssh": "13.2.0",
"nodemailer": "6.9.9",
"otpauth": "9.1.1",
"pdfjs-dist": "2.16.105",
"pdfjs-dist": "5.3.31",
"pg": "8.12.0",
"pg-promise": "11.9.1",
"promise-ftp": "1.3.5",

View File

@@ -2,8 +2,7 @@ import iconv from 'iconv-lite';
import get from 'lodash/get';
import type { IBinaryData, IDataObject, IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import { NodeOperationError, BINARY_ENCODING } from 'n8n-workflow';
import { getDocument as readPDF, version as pdfJsVersion } from 'pdfjs-dist';
import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api';
import type { TextContent as PdfTextContent } from 'pdfjs-dist/types/src/display/api';
import type { WorkBook, WritingOptions } from 'xlsx';
import { utils as xlsxUtils, write as xlsxWrite } from 'xlsx';
@@ -30,10 +29,6 @@ export type JsonToBinaryOptions = {
format?: boolean;
};
type PdfDocument = Awaited<ReturnType<Awaited<typeof readPDF>>['promise']>;
type PdfPage = Awaited<ReturnType<Awaited<PdfDocument['getPage']>>>;
type PdfTextContent = Awaited<ReturnType<PdfPage['getTextContent']>>;
export async function convertJsonToSpreadsheetBinary(
this: IExecuteFunctions,
items: INodeExecutionData[],
@@ -162,17 +157,22 @@ export async function extractDataFromPDF(
) {
const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
const params: DocumentInitParameters = { password, isEvalSupported: false };
let buffer: Buffer;
if (binaryData.id) {
params.data = await this.helpers.binaryToBuffer(
await this.helpers.getBinaryStream(binaryData.id),
);
const stream = await this.helpers.getBinaryStream(binaryData.id);
buffer = await this.helpers.binaryToBuffer(stream);
} else {
params.data = Buffer.from(binaryData.data, BINARY_ENCODING).buffer;
buffer = Buffer.from(binaryData.data, BINARY_ENCODING);
}
const document = await readPDF(params).promise;
const { getDocument: readPDF, version: pdfJsVersion } = await import(
'pdfjs-dist/legacy/build/pdf.mjs'
);
const document = await readPDF({
password,
isEvalSupported: false,
data: new Uint8Array(buffer),
}).promise;
const { info, metadata } = await document
.getMetadata()
.catch(() => ({ info: null, metadata: null }));
@@ -196,7 +196,7 @@ export async function extractDataFromPDF(
numpages: document.numPages,
numrender: document.numPages,
info,
metadata: metadata?.getAll(),
metadata: (metadata && Object.fromEntries([...metadata])) ?? undefined,
text,
version: pdfJsVersion,
};