chore: Upgrade pdfjs-dist (no-changelog) (#15729)

This commit is contained in:
कारतोफ्फेलस्क्रिप्ट™
2025-06-04 18:31:22 +02:00
committed by GitHub
parent d64f2e57f7
commit b772462cea
8 changed files with 187 additions and 36 deletions

View File

@@ -2,8 +2,7 @@ import iconv from 'iconv-lite';
import get from 'lodash/get';
import type { IBinaryData, IDataObject, IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import { NodeOperationError, BINARY_ENCODING } from 'n8n-workflow';
import { getDocument as readPDF, version as pdfJsVersion } from 'pdfjs-dist';
import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api';
import type { TextContent as PdfTextContent } from 'pdfjs-dist/types/src/display/api';
import type { WorkBook, WritingOptions } from 'xlsx';
import { utils as xlsxUtils, write as xlsxWrite } from 'xlsx';
@@ -30,10 +29,6 @@ export type JsonToBinaryOptions = {
format?: boolean;
};
type PdfDocument = Awaited<ReturnType<Awaited<typeof readPDF>>['promise']>;
type PdfPage = Awaited<ReturnType<Awaited<PdfDocument['getPage']>>>;
type PdfTextContent = Awaited<ReturnType<PdfPage['getTextContent']>>;
export async function convertJsonToSpreadsheetBinary(
this: IExecuteFunctions,
items: INodeExecutionData[],
@@ -162,17 +157,22 @@ export async function extractDataFromPDF(
) {
const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
const params: DocumentInitParameters = { password, isEvalSupported: false };
let buffer: Buffer;
if (binaryData.id) {
params.data = await this.helpers.binaryToBuffer(
await this.helpers.getBinaryStream(binaryData.id),
);
const stream = await this.helpers.getBinaryStream(binaryData.id);
buffer = await this.helpers.binaryToBuffer(stream);
} else {
params.data = Buffer.from(binaryData.data, BINARY_ENCODING).buffer;
buffer = Buffer.from(binaryData.data, BINARY_ENCODING);
}
const document = await readPDF(params).promise;
const { getDocument: readPDF, version: pdfJsVersion } = await import(
'pdfjs-dist/legacy/build/pdf.mjs'
);
const document = await readPDF({
password,
isEvalSupported: false,
data: new Uint8Array(buffer),
}).promise;
const { info, metadata } = await document
.getMetadata()
.catch(() => ({ info: null, metadata: null }));
@@ -196,7 +196,7 @@ export async function extractDataFromPDF(
numpages: document.numPages,
numrender: document.numPages,
info,
metadata: metadata?.getAll(),
metadata: (metadata && Object.fromEntries([...metadata])) ?? undefined,
text,
version: pdfJsVersion,
};