fix(Information Extractor Node): Improve error handling for empty inputs (#15590)

This commit is contained in:
Benjamin Schroth
2025-05-22 14:52:23 +02:00
committed by GitHub
parent 4661e39427
commit bb2f675817
3 changed files with 154 additions and 138 deletions

View File

@@ -2,7 +2,7 @@ import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { HumanMessage } from '@langchain/core/messages';
import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
import type { OutputFixingParser } from 'langchain/output_parsers';
import type { IExecuteFunctions } from 'n8n-workflow';
import { NodeOperationError, type IExecuteFunctions } from 'n8n-workflow';
import { getTracingConfig } from '@utils/tracing';
@@ -15,6 +15,11 @@ export async function processItem(
parser: OutputFixingParser<object>,
) {
const input = ctx.getNodeParameter('text', itemIndex) as string;
if (!input?.trim()) {
throw new NodeOperationError(ctx.getNode(), `Text for item ${itemIndex} is not defined`, {
itemIndex,
});
}
const inputPrompt = new HumanMessage(input);
const options = ctx.getNodeParameter('options', itemIndex, {}) as {

View File

@@ -1,5 +1,5 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { FakeLLM, FakeListChatModel } from '@langchain/core/utils/testing';
import { FakeListChatModel } from '@langchain/core/utils/testing';
import get from 'lodash/get';
import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
@@ -78,7 +78,7 @@ const createExecuteFunctionsMock = (
};
describe('InformationExtractor', () => {
describe('From Attribute Descriptions', () => {
describe('Schema Generation', () => {
it('should generate a schema from attribute descriptions with optional fields', async () => {
const schema = makeZodSchemaFromAttributes(mockPersonAttributes);
@@ -86,140 +86,6 @@ describe('InformationExtractor', () => {
expect(schema.parse({ name: 'John' })).toEqual({ name: 'John' });
expect(schema.parse({ age: 30 })).toEqual({ age: 30 });
});
it('should make a request to LLM and return the extracted attributes', async () => {
const node = new InformationExtractor();
const response = await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributes,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) }),
),
);
expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
});
it('should not fail if LLM could not extract some attribute', async () => {
const node = new InformationExtractor();
const response = await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributes,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeLLM({ response: formatFakeLlmResponse({ name: 'John' }) }),
),
);
expect(response).toEqual([[{ json: { output: { name: 'John' } } }]]);
});
it('should fail if LLM could not extract some required attribute', async () => {
const node = new InformationExtractor();
try {
await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributesRequired,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeLLM({ response: formatFakeLlmResponse({ name: 'John' }) }),
),
);
} catch (error) {
expect(error.message).toContain('Failed to parse');
}
});
it('should fail if LLM extracted an attribute with the wrong type', async () => {
const node = new InformationExtractor();
try {
await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributes,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: '30' }) }),
),
);
} catch (error) {
expect(error.message).toContain('Failed to parse');
}
});
it('retries if LLM fails to extract some required attribute', async () => {
const node = new InformationExtractor();
const response = await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributesRequired,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John' }),
formatFakeLlmResponse({ name: 'John', age: 30 }),
],
}),
),
);
expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
});
it('retries if LLM extracted an attribute with a wrong type', async () => {
const node = new InformationExtractor();
const response = await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributesRequired,
},
options: {},
schemaType: 'fromAttributes',
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John', age: '30' }),
formatFakeLlmResponse({ name: 'John', age: 30 }),
],
}),
),
);
expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
});
});
describe('Batch Processing', () => {
@@ -303,7 +169,6 @@ describe('InformationExtractor', () => {
const response = await node.execute.call(mockExecuteFunctions);
//expect(response).toBe({});
expect(response[0]).toHaveLength(3);
expect(response[0][0]).toEqual({ json: { output: { name: 'John', age: 30 } } });
expect(response[0][1]).toEqual({

View File

@@ -0,0 +1,146 @@
import { FakeLLM, FakeListChatModel } from '@langchain/core/utils/testing';
import { OutputFixingParser, StructuredOutputParser } from 'langchain/output_parsers';
import { NodeOperationError } from 'n8n-workflow';
import { makeZodSchemaFromAttributes } from '../helpers';
import { processItem } from '../processItem';
import type { AttributeDefinition } from '../types';
jest.mock('@utils/tracing', () => ({
getTracingConfig: () => ({}),
}));
const mockPersonAttributes: AttributeDefinition[] = [
{
name: 'name',
type: 'string',
description: 'The name of the person',
required: false,
},
{
name: 'age',
type: 'number',
description: 'The age of the person',
required: false,
},
];
const mockPersonAttributesRequired: AttributeDefinition[] = [
{
name: 'name',
type: 'string',
description: 'The name of the person',
required: true,
},
{
name: 'age',
type: 'number',
description: 'The age of the person',
required: true,
},
];
function formatFakeLlmResponse(object: Record<string, any>) {
return `\`\`\`json\n${JSON.stringify(object, null, 2)}\n\`\`\``;
}
describe('processItem', () => {
it('should process a single item and return extracted attributes', async () => {
const mockExecuteFunctions = {
getNodeParameter: (param: string) => {
if (param === 'text') return 'John is 30 years old';
if (param === 'options') return {};
return undefined;
},
getNode: () => ({ typeVersion: 1.1 }),
};
const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
const parser = OutputFixingParser.fromLLM(
llm,
StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
);
const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
expect(result).toEqual({ name: 'John', age: 30 });
});
it('should throw error if input is undefined or empty', async () => {
const mockExecuteFunctions = {
getNodeParameter: (param: string, itemIndex: number) => {
if (param === 'text') {
if (itemIndex === 0) return undefined;
if (itemIndex === 1) return '';
if (itemIndex === 2) return ' ';
return null;
}
if (param === 'options') return {};
return undefined;
},
getNode: () => ({ typeVersion: 1.1 }),
};
const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
const parser = OutputFixingParser.fromLLM(
llm,
StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
);
for (let itemIndex = 0; itemIndex < 4; itemIndex++) {
await expect(
processItem(mockExecuteFunctions as any, itemIndex, llm, parser),
).rejects.toThrow(NodeOperationError);
}
});
it('should use custom system prompt template if provided', async () => {
const customTemplate = 'Custom template {format_instructions}';
const mockExecuteFunctions = {
getNodeParameter: (param: string) => {
if (param === 'text') return 'John is 30 years old';
if (param === 'options') return { systemPromptTemplate: customTemplate };
return undefined;
},
getNode: () => ({ typeVersion: 1.1 }),
};
const llm = new FakeLLM({ response: formatFakeLlmResponse({ name: 'John', age: 30 }) });
const parser = OutputFixingParser.fromLLM(
llm,
StructuredOutputParser.fromZodSchema(makeZodSchemaFromAttributes(mockPersonAttributes)),
);
const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
expect(result).toEqual({ name: 'John', age: 30 });
});
it('should handle retries when LLM returns invalid data', async () => {
const mockExecuteFunctions = {
getNodeParameter: (param: string) => {
if (param === 'text') return 'John is 30 years old';
if (param === 'options') return {};
return undefined;
},
getNode: () => ({ typeVersion: 1.1 }),
};
const llm = new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John', age: '30' }), // Wrong type
formatFakeLlmResponse({ name: 'John', age: 30 }), // Correct type
],
});
const parser = OutputFixingParser.fromLLM(
llm,
StructuredOutputParser.fromZodSchema(
makeZodSchemaFromAttributes(mockPersonAttributesRequired),
),
);
const result = await processItem(mockExecuteFunctions as any, 0, llm, parser);
expect(result).toEqual({ name: 'John', age: 30 });
});
});