From 64fa0ceea62288bbe76fe378e9c77197d5defab4 Mon Sep 17 00:00:00 2001 From: Shireen Missi <94372015+ShireenMissi@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:48:30 +0200 Subject: [PATCH] fix(AWS Textract Node): Handle all binary data modes correctly (#19258) Co-authored-by: jeanpaul Co-authored-by: Roman Davydchuk --- .../nodes/Aws/Textract/AwsTextract.node.ts | 7 +- .../Textract/test/AwsTextract.node.test.ts | 254 ++++++++++++++++++ .../Textract/test/GenericFunctions.test.ts | 141 ++++++++++ 3 files changed, 400 insertions(+), 2 deletions(-) create mode 100644 packages/nodes-base/nodes/Aws/Textract/test/AwsTextract.node.test.ts create mode 100644 packages/nodes-base/nodes/Aws/Textract/test/GenericFunctions.test.ts diff --git a/packages/nodes-base/nodes/Aws/Textract/AwsTextract.node.ts b/packages/nodes-base/nodes/Aws/Textract/AwsTextract.node.ts index 45201ec773..5d4d8c5562 100644 --- a/packages/nodes-base/nodes/Aws/Textract/AwsTextract.node.ts +++ b/packages/nodes-base/nodes/Aws/Textract/AwsTextract.node.ts @@ -1,4 +1,5 @@ import { + BINARY_ENCODING, NodeConnectionTypes, type ICredentialDataDecryptedObject, type ICredentialsDecrypted, @@ -117,11 +118,13 @@ export class AwsTextract implements INodeType { if (operation === 'analyzeExpense') { const simple = this.getNodeParameter('simple', i) as boolean; const binaryPropertyName = this.getNodeParameter('binaryPropertyName', i); - const binaryData = this.helpers.assertBinaryData(i, binaryPropertyName); + const binaryBuffer = await this.helpers.getBinaryDataBuffer(i, binaryPropertyName); + // Convert the binary buffer to a base64 string + const binaryData = Buffer.from(binaryBuffer).toString(BINARY_ENCODING); const body: IDataObject = { Document: { - Bytes: binaryData.data, + Bytes: binaryData, }, }; diff --git a/packages/nodes-base/nodes/Aws/Textract/test/AwsTextract.node.test.ts b/packages/nodes-base/nodes/Aws/Textract/test/AwsTextract.node.test.ts new file mode 100644 index 0000000000..6cffea9c4a --- /dev/null +++ b/packages/nodes-base/nodes/Aws/Textract/test/AwsTextract.node.test.ts @@ -0,0 +1,254 @@ +import { mockDeep } from 'jest-mock-extended'; +import type { IExecuteFunctions, INode } from 'n8n-workflow'; +import nock from 'nock'; + +import { AwsTextract } from '../AwsTextract.node'; +import * as GenericFunctions from '../GenericFunctions'; + +const mockTextractResponse = { + ExpenseDocuments: [ + { + SummaryFields: [ + { + Type: { + Text: 'VENDOR_NAME', + }, + ValueDetection: { + Text: 'Test Company', + }, + }, + ], + }, + ], +}; + +const mockSimplifiedResponse = { + VENDOR_NAME: 'Test Company', +}; + +describe('AWS Textract Node', () => { + const executeFunctionsMock = mockDeep(); + const awsApiRequestSpy = jest.spyOn(GenericFunctions, 'awsApiRequestREST'); + const simplifySpy = jest.spyOn(GenericFunctions, 'simplify'); + const node = new AwsTextract(); + + beforeEach(() => { + jest.resetAllMocks(); + executeFunctionsMock.getCredentials.mockResolvedValue({ + accessKeyId: 'test-key', + secretAccessKey: 'test-secret', + region: 'us-east-1', + }); + executeFunctionsMock.getNode.mockReturnValue({ + typeVersion: 1, + } as INode); + executeFunctionsMock.getInputData.mockReturnValue([{ json: {} }]); + executeFunctionsMock.continueOnFail.mockReturnValue(false); + executeFunctionsMock.helpers.returnJsonArray.mockImplementation((data) => + Array.isArray(data) ? data.map((item: any) => ({ json: item })) : ([{ json: data }] as any), + ); + }); + + afterEach(() => { + nock.cleanAll(); + }); + + describe('analyzeExpense operation', () => { + beforeEach(() => { + executeFunctionsMock.getNodeParameter.mockImplementation((paramName) => { + switch (paramName) { + case 'operation': + return 'analyzeExpense'; + case 'binaryPropertyName': + return 'data'; + case 'simple': + return true; + default: + return undefined; + } + }); + }); + + it('should process binary image data and return simplified response', async () => { + const testImageBuffer = Buffer.from('test-image-data'); + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testImageBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(executeFunctionsMock.helpers.getBinaryDataBuffer).toHaveBeenCalledWith(0, 'data'); + expect(awsApiRequestSpy).toHaveBeenCalledWith( + 'textract', + 'POST', + '', + JSON.stringify({ + Document: { + Bytes: testImageBuffer.toString('base64'), + }, + }), + { + 'x-amz-target': 'Textract.AnalyzeExpense', + 'Content-Type': 'application/x-amz-json-1.1', + }, + ); + expect(simplifySpy).toHaveBeenCalledWith(mockTextractResponse); + expect(result).toEqual([[{ json: mockSimplifiedResponse }]]); + }); + + it('should return raw response when simple is false', async () => { + executeFunctionsMock.getNodeParameter.mockImplementation((paramName) => { + switch (paramName) { + case 'operation': + return 'analyzeExpense'; + case 'binaryPropertyName': + return 'data'; + case 'simple': + return false; + default: + return undefined; + } + }); + + const testImageBuffer = Buffer.from('test-image-data'); + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testImageBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(simplifySpy).not.toHaveBeenCalled(); + expect(result).toEqual([[{ json: mockTextractResponse }]]); + }); + + it('should handle different binary property names', async () => { + executeFunctionsMock.getNodeParameter.mockImplementation((paramName) => { + switch (paramName) { + case 'operation': + return 'analyzeExpense'; + case 'binaryPropertyName': + return 'document'; + case 'simple': + return true; + default: + return undefined; + } + }); + + const testImageBuffer = Buffer.from('test-document-data'); + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testImageBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(executeFunctionsMock.helpers.getBinaryDataBuffer).toHaveBeenCalledWith(0, 'document'); + expect(result).toEqual([[{ json: mockSimplifiedResponse }]]); + }); + + it('should handle JPEG images', async () => { + const testJpegBuffer = Buffer.from([0xff, 0xd8, 0xff, 0xe0]); // JPEG header bytes + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testJpegBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(awsApiRequestSpy).toHaveBeenCalledWith( + 'textract', + 'POST', + '', + JSON.stringify({ + Document: { + Bytes: testJpegBuffer.toString('base64'), + }, + }), + { + 'x-amz-target': 'Textract.AnalyzeExpense', + 'Content-Type': 'application/x-amz-json-1.1', + }, + ); + expect(result).toEqual([[{ json: mockSimplifiedResponse }]]); + }); + + it('should handle PNG images', async () => { + const testPngBuffer = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); // PNG header bytes + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testPngBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(result).toEqual([[{ json: mockSimplifiedResponse }]]); + }); + + it('should handle multiple input items', async () => { + executeFunctionsMock.getInputData.mockReturnValue([{ json: {} }, { json: {} }]); + + const testImageBuffer = Buffer.from('test-image-data'); + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(testImageBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + const result = await node.execute.call(executeFunctionsMock); + + expect(executeFunctionsMock.helpers.getBinaryDataBuffer).toHaveBeenCalledTimes(2); + expect(awsApiRequestSpy).toHaveBeenCalledTimes(2); + expect(result).toEqual([ + [{ json: mockSimplifiedResponse }, { json: mockSimplifiedResponse }], + ]); + }); + + it('should handle errors and continue on fail', async () => { + executeFunctionsMock.continueOnFail.mockReturnValue(true); + executeFunctionsMock.helpers.getBinaryDataBuffer.mockRejectedValue( + new Error('Binary data not found'), + ); + + const result = await node.execute.call(executeFunctionsMock); + + expect(result).toEqual([[{ json: { error: 'Binary data not found' } }]]); + }); + + it('should throw error when continueOnFail is false', async () => { + executeFunctionsMock.continueOnFail.mockReturnValue(false); + executeFunctionsMock.helpers.getBinaryDataBuffer.mockRejectedValue( + new Error('Binary data not found'), + ); + + await expect(node.execute.call(executeFunctionsMock)).rejects.toThrow( + 'Binary data not found', + ); + }); + + it('should handle empty binary data', async () => { + const emptyBuffer = Buffer.from(''); + + executeFunctionsMock.helpers.getBinaryDataBuffer.mockResolvedValue(emptyBuffer); + awsApiRequestSpy.mockResolvedValue(mockTextractResponse); + simplifySpy.mockReturnValue(mockSimplifiedResponse); + + await node.execute.call(executeFunctionsMock); + + expect(awsApiRequestSpy).toHaveBeenCalledWith( + 'textract', + 'POST', + '', + JSON.stringify({ + Document: { + Bytes: '', + }, + }), + { + 'x-amz-target': 'Textract.AnalyzeExpense', + 'Content-Type': 'application/x-amz-json-1.1', + }, + ); + }); + }); +}); diff --git a/packages/nodes-base/nodes/Aws/Textract/test/GenericFunctions.test.ts b/packages/nodes-base/nodes/Aws/Textract/test/GenericFunctions.test.ts new file mode 100644 index 0000000000..54b18632c9 --- /dev/null +++ b/packages/nodes-base/nodes/Aws/Textract/test/GenericFunctions.test.ts @@ -0,0 +1,141 @@ +import { simplify, type IExpenseDocument } from '../GenericFunctions'; + +describe('AWS Textract Generic Functions', () => { + describe('simplify function', () => { + it('should simplify expense document response correctly', () => { + const input = { + ExpenseDocuments: [ + { + SummaryFields: [ + { + Type: { + Text: 'VENDOR_NAME', + }, + LabelDetection: { + Text: 'Vendor', + }, + ValueDetection: { + Text: 'Acme Corporation', + }, + }, + { + Type: { + Text: 'INVOICE_RECEIPT_DATE', + }, + LabelDetection: { + Text: 'Date', + }, + ValueDetection: { + Text: '2023-12-01', + }, + }, + { + Type: { + Text: 'TOTAL', + }, + LabelDetection: { + Text: 'Total', + }, + ValueDetection: { + Text: '$125.50', + }, + }, + ], + }, + ], + } as unknown as IExpenseDocument; + + const result = simplify(input); + + expect(result).toEqual({ + VENDOR_NAME: 'Acme Corporation', + INVOICE_RECEIPT_DATE: '2023-12-01', + TOTAL: '$125.50', + }); + }); + + it('should handle fields without Type but with LabelDetection', () => { + const input = { + ExpenseDocuments: [ + { + SummaryFields: [ + { + Type: undefined as any, + LabelDetection: { + Text: 'Custom Field', + }, + ValueDetection: { + Text: 'Custom Value', + }, + }, + ], + }, + ], + } as unknown as IExpenseDocument; + + const result = simplify(input); + + expect(result).toEqual({ + 'Custom Field': 'Custom Value', + }); + }); + + it('should handle empty expense documents', () => { + const input = { + ExpenseDocuments: [ + { + SummaryFields: [], + }, + ], + } as any; + + const result = simplify(input); + + expect(result).toEqual({}); + }); + + it('should handle multiple expense documents', () => { + const input = { + ExpenseDocuments: [ + { + SummaryFields: [ + { + Type: { + Text: 'VENDOR_NAME', + }, + LabelDetection: { + Text: 'Vendor', + }, + ValueDetection: { + Text: 'First Company', + }, + }, + ], + }, + { + SummaryFields: [ + { + Type: { + Text: 'TOTAL', + }, + LabelDetection: { + Text: 'Total', + }, + ValueDetection: { + Text: '$50.00', + }, + }, + ], + }, + ], + } as any; + + const result = simplify(input); + + expect(result).toEqual({ + VENDOR_NAME: 'First Company', + TOTAL: '$50.00', + }); + }); + }); +});