From bb2f675817ccfc2e11ce9b758c4f9dd80a992cd5 Mon Sep 17 00:00:00 2001
From: Benjamin Schroth <68321970+schrothbn@users.noreply.github.com>
Date: Thu, 22 May 2025 14:52:23 +0200
Subject: [PATCH] fix(Information Extractor Node): Improve error handling for
 empty inputs (#15590)

---
 .../InformationExtractor/processItem.ts       |   7 +-
 .../test/InformationExtraction.node.test.ts   | 139 +----------------
 .../test/processItem.test.ts                  | 146 ++++++++++++++++++
 3 files changed, 154 insertions(+), 138 deletions(-)
 create mode 100644 packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/processItem.test.ts
diff --git a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts
index a7156e7518..988b369591 100644
--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts
@@ -2,7 +2,7 @@ import type { BaseLanguageModel } from '@langchain/core/language_models/base';
 import { HumanMessage } from '@langchain/core/messages';
 import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
 import type { OutputFixingParser } from 'langchain/output_parsers';
-import type { IExecuteFunctions } from 'n8n-workflow';
+import { NodeOperationError, type IExecuteFunctions } from 'n8n-workflow';
 
 import { getTracingConfig } from '@utils/tracing';
 
@@ -15,6 +15,11 @@ export async function processItem(
 	parser: OutputFixingParser<object>,
 ) {
 	const input = ctx.getNodeParameter('text', itemIndex) as string;
+	if (!input?.trim()) {
+		throw new NodeOperationError(ctx.getNode(), `Text for item ${itemIndex} is not defined`, {
+			itemIndex,
+		});
+	}
 	const inputPrompt = new HumanMessage(input);
 
 	const options = ctx.getNodeParameter('options', itemIndex, {}) as {
diff --git a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts
index ef216c05cd..76b1b22cf6 100644
--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts
@@ -1,5 +1,5 @@
 import type { BaseLanguageModel } from '@langchain/core/language_models/base';
-import { FakeLLM, FakeListChatModel } from '@langchain/core/utils/testing';
+import { FakeListChatModel } from '@langchain/core/utils/testing';
 import get from 'lodash/get';
 import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
 
@@ -78,7 +78,7 @@ const createExecuteFunctionsMock = (
 };
 
 describe('InformationExtractor', () => {
-	describe('From Attribute Descriptions', () => {
+	describe('Schema Generation', () => {
 		it('should generate a schema from attribute descriptions with optional fields', async () => {
 			const schema = makeZodSchemaFromAttributes(mockPersonAttributes);
 
@@ -86,140 +86,6 @@ describe('InformationExtractor', () => {
 			expect(schema.parse({ name: 'John' })).toEqual({ name: 'John' });
 			expect(schema.parse({ age: 30 })).toEqual({ age: 30 });
 		});
-
-		it('should make a request to LLM and return the extracted attributes', async () => {
-			const node = new InformationExtractor();
-
-			const response = await node.execute.call(
-				createExecuteFunctionsMock(
-					{
-						text: 'John is 30 years old',
-						attributes: {
-							attributes: mockPersonAttributes,
-						},
-						options: {},
-						schemaType: 'fromAttributes',
-					},
-					new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) }),
-				),
-			);
-
-			expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
-		});
-
-		it('should not fail if LLM could not extract some attribute', async () => {
-			const node = new InformationExtractor();
-
-			const response = await node.execute.call(
-				createExecuteFunctionsMock(
-					{
-						text: 'John is 30 years old',
-						attributes: {
-							attributes: mockPersonAttributes,
-						},
-						options: {},
-						schemaType: 'fromAttributes',
-					},
-					new FakeLLM({ response: formatFakeLlmResponse({ name: 'John' }) }),
-				),
-			);
-
-			expect(response).toEqual([[{ json: { output: { name: 'John' } } }]]);
-		});
-
-		it('should fail if LLM could not extract some required attribute', async () => {
-			const node = new InformationExtractor();
-
-			try {
-				await node.execute.call(
-					createExecuteFunctionsMock(
-						{
-							text: 'John is 30 years old',
-							attributes: {
-								attributes: mockPersonAttributesRequired,
-							},
-							options: {},
-							schemaType: 'fromAttributes',
-						},
-						new FakeLLM({ response: formatFakeLlmResponse({ name: 'John' }) }),
-					),
-				);
-			} catch (error) {
-				expect(error.message).toContain('Failed to parse');
-			}
-		});
-
-		it('should fail if LLM extracted an attribute with the wrong type', async () => {
-			const node = new InformationExtractor();
-
-			try {
-				await node.execute.call(
-					createExecuteFunctionsMock(
-						{
-							text: 'John is 30 years old',
-							attributes: {
-								attributes: mockPersonAttributes,
-							},
-							options: {},
-							schemaType: 'fromAttributes',
-						},
-						new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: '30' }) }),
-					),
-				);
-			} catch (error) {
-				expect(error.message).toContain('Failed to parse');
-			}
-		});
-
-		it('retries if LLM fails to extract some required attribute', async () => {
-			const node = new InformationExtractor();
-
-			const response = await node.execute.call(
-				createExecuteFunctionsMock(
-					{
-						text: 'John is 30 years old',
-						attributes: {
-							attributes: mockPersonAttributesRequired,
-						},
-						options: {},
-						schemaType: 'fromAttributes',
-					},
-					new FakeListChatModel({
-						responses: [
-							formatFakeLlmResponse({ name: 'John' }),
-							formatFakeLlmResponse({ name: 'John', age: 30 }),
-						],
-					}),
-				),
-			);
-
-			expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
-		});
-
-		it('retries if LLM extracted an attribute with a wrong type', async () => {
-			const node = new InformationExtractor();
-
-			const response = await node.execute.call(
-				createExecuteFunctionsMock(
-					{
-						text: 'John is 30 years old',
-						attributes: {
-							attributes: mockPersonAttributesRequired,
-						},
-						options: {},
-						schemaType: 'fromAttributes',
-					},
-					new FakeListChatModel({
-						responses: [
-							formatFakeLlmResponse({ name: 'John', age: '30' }),
-							formatFakeLlmResponse({ name: 'John', age: 30 }),
-						],
-					}),
-				),
-			);
-
-			expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
-		});
 	});
 
 	describe('Batch Processing', () => {
@@ -303,7 +169,6 @@ describe('InformationExtractor', () => {
 
 			const response = await node.execute.call(mockExecuteFunctions);
 
-			//expect(response).toBe({});
 			expect(response[0]).toHaveLength(3);
 			expect(response[0][0]).toEqual({ json: { output: { name: 'John', age: 30 } } });
 			expect(response[0][1]).toEqual({
diff --git a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/processItem.test.ts b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/processItem.test.ts
new file mode 100644
index 0000000000..76504d6a98
--- /dev/null
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/processItem.test.ts
@@ -0,0 +1,146 @@
+import { FakeLLM, FakeListChatModel } from '@langchain/core/utils/testing';
+import { OutputFixingParser, StructuredOutputParser } from 'langchain/output_parsers';
+import { NodeOperationError } from 'n8n-workflow';
+
+import { makeZodSchemaFromAttributes } from '../helpers';
+import { processItem } from '../processItem';
+import type { AttributeDefinition } from '../types';
+
+jest.mock('@utils/tracing', () => ({
+	getTracingConfig: () => ({}),
+}));
+
+const mockPersonAttributes: AttributeDefinition[] = [
+	{
+		name: 'name',
+		type: 'string',
+		description: 'The name of the person',
+		required: false,
+	},
+	{
+		name: 'age',
+		type: 'number',
+		description: 'The age of the person',
+		required: false,
+	},
+];
+
+const mockPersonAttributesRequired: AttributeDefinition[] = [
+	{
+		name: 'name',
+		type: 'string',
+		description: 'The name of the person',
+		required: true,
+	},
+	{
+		name: 'age',
+		type: 'number',
+		description: 'The age of the person',
+		required: true,
+	},
+];
+
+function formatFakeLlmResponse(object: Record<string, any>) {
+	return `\`\`\`json\n${JSON.stringify(object, null, 2)}\n\`\`\``;
+}
+
+describe('processItem', () => {
+	it('should process a single item and return extracted attributes', async () => {
+		const mockExecuteFunctions = {
+			getNodeParameter: (param: string) => {
+				if (param === 'text') return 'John is 30 years old';
+				if (param === 'options') return {};
+				return undefined;
+			},
+			getNode: () => ({ typeVersion: 1.1 }),
+		};
+
+		const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
+		const parser = OutputFixingParser.fromLLM(
+			llm,
+			StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
+		);
+
+		const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
+
+		expect(result).toEqual({ name: 'John', age: 30 });
+	});
+
+	it('should throw error if input is undefined or empty', async () => {
+		const mockExecuteFunctions = {
+			getNodeParameter: (param: string, itemIndex: number) => {
+				if (param === 'text') {
+					if (itemIndex === 0) return undefined;
+					if (itemIndex === 1) return '';
+					if (itemIndex === 2) return ' ';
+					return null;
+				}
+				if (param === 'options') return {};
+				return undefined;
+			},
+			getNode: () => ({ typeVersion: 1.1 }),
+		};
+
+		const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
+		const parser = OutputFixingParser.fromLLM(
+			llm,
+			StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
+		);
+
+		for (let itemIndex = 0; itemIndex < 4; itemIndex++) {
+			await expect(
+				processItem(mockExecuteFunctions as any, itemIndex, llm, parser),
+			).rejects.toThrow(NodeOperationError);
+		}
+	});
+
+	it('should use custom system prompt template if provided', async () => {
+		const customTemplate = 'Custom template {format_instructions}';
+		const mockExecuteFunctions = {
+			getNodeParameter: (param: string) => {
+				if (param === 'text') return 'John is 30 years old';
+				if (param === 'options') return { systemPromptTemplate: customTemplate };
+				return undefined;
+			},
+			getNode: () => ({ typeVersion: 1.1 }),
+		};
+
+		const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
+		const parser = OutputFixingParser.fromLLM(
+			llm,
+			StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
+		);
+
+		const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
+
+		expect(result).toEqual({ name: 'John', age: 30 });
+	});
+
+	it('should handle retries when LLM returns invalid data', async () => {
+		const mockExecuteFunctions = {
+			getNodeParameter: (param: string) => {
+				if (param === 'text') return 'John is 30 years old';
+				if (param === 'options') return {};
+				return undefined;
+			},
+			getNode: () => ({ typeVersion: 1.1 }),
+		};
+
+		const llm = new FakeListChatModel({
+			responses: [
+				formatFakeLlmResponse({ name: 'John', age: '30' }), // Wrong type
+				formatFakeLlmResponse({ name: 'John', age: 30 }), // Correct type
+			],
+		});
+		const parser = OutputFixingParser.fromLLM(
+			llm,
+			StructuredOutputParser.fromZodSchema(
+				makeZodSchemaFromAttributes(mockPersonAttributesRequired),
+			),
+		);
+
+		const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
+
+		expect(result).toEqual({ name: 'John', age: 30 });
+	});
+});