feat: Optimise langchain calls in batching mode (#15243)

2025-12-20 11:22:15 +00:00 · 2025-05-13 13:58:38 +02:00
parent 8591c2e0d1
commit ff156930c5
35 changed files with 2946 additions and 1171 deletions
--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/InformationExtractor.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/InformationExtractor.node.ts
@@ -1,9 +1,7 @@
 import type { BaseLanguageModel } from '@langchain/core/language_models/base';
-import { HumanMessage } from '@langchain/core/messages';
-import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
 import type { JSONSchema7 } from 'json-schema';
 import { OutputFixingParser, StructuredOutputParser } from 'langchain/output_parsers';
-import { jsonParse, NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
+import { jsonParse, NodeConnectionTypes, NodeOperationError, sleep } from 'n8n-workflow';
 import type {
 	INodeType,
 	INodeTypeDescription,
@@ -15,15 +13,13 @@ import type { z } from 'zod';

 import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
 import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
-import { getTracingConfig } from '@utils/tracing';
+import { getBatchingOptionFields } from '@utils/sharedFields';

+import { SYSTEM_PROMPT_TEMPLATE } from './constants';
 import { makeZodSchemaFromAttributes } from './helpers';
+import { processItem } from './processItem';
 import type { AttributeDefinition } from './types';

-const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
-Only extract relevant information from the text.
-If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;
-
 export class InformationExtractor implements INodeType {
 	description: INodeTypeDescription = {
 		displayName: 'Information Extractor',
@@ -31,7 +27,7 @@ export class InformationExtractor implements INodeType {
 		icon: 'fa:project-diagram',
 		iconColor: 'black',
 		group: ['transform'],
-		version: 1,
+		version: [1, 1.1],
 		description: 'Extract information from text in a structured format',
 		codex: {
 			alias: ['NER', 'parse', 'parsing', 'JSON', 'data extraction', 'structured'],
@@ -213,6 +209,11 @@ export class InformationExtractor implements INodeType {
 							rows: 6,
 						},
 					},
+					getBatchingOptionFields({
+						show: {
+							'@version': [{ _cnd: { gte: 1.1 } }],
+						},
+					}),
 				],
 			},
 		],
@@ -265,38 +266,59 @@ export class InformationExtractor implements INodeType {
 		}

 		const resultData: INodeExecutionData[] = [];
-		for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
-			const input = this.getNodeParameter('text', itemIndex) as string;
-			const inputPrompt = new HumanMessage(input);
+		const batchSize = this.getNodeParameter('options.batching.batchSize', 0, 5) as number;
+		const delayBetweenBatches = this.getNodeParameter(
+			'options.batching.delayBetweenBatches',
+			0,
+			0,
+		) as number;
+		if (this.getNode().typeVersion >= 1.1 && batchSize >= 1) {
+			// Batch processing
+			for (let i = 0; i < items.length; i += batchSize) {
+				const batch = items.slice(i, i + batchSize);
+				const batchPromises = batch.map(async (_item, batchItemIndex) => {
+					const itemIndex = i + batchItemIndex;
+					return await processItem(this, itemIndex, llm, parser);
+				});

-			const options = this.getNodeParameter('options', itemIndex, {}) as {
-				systemPromptTemplate?: string;
-			};
+				const batchResults = await Promise.allSettled(batchPromises);

-			const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
-				`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
-{format_instructions}`,
-			);
+				batchResults.forEach((response, index) => {
+					if (response.status === 'rejected') {
+						const error = response.reason as Error;
+						if (this.continueOnFail()) {
+							resultData.push({
+								json: { error: error.message },
+								pairedItem: { item: i + index },
+							});
+							return;
+						} else {
+							throw new NodeOperationError(this.getNode(), error.message);
+						}
+					}
+					const output = response.value;
+					resultData.push({ json: { output } });
+				});

-			const messages = [
-				await systemPromptTemplate.format({
-					format_instructions: parser.getFormatInstructions(),
-				}),
-				inputPrompt,
-			];
-			const prompt = ChatPromptTemplate.fromMessages(messages);
-			const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(this));
-
-			try {
-				const output = await chain.invoke(messages);
-				resultData.push({ json: { output } });
-			} catch (error) {
-				if (this.continueOnFail()) {
-					resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
-					continue;
+				// Add delay between batches if not the last batch
+				if (i + batchSize < items.length && delayBetweenBatches > 0) {
+					await sleep(delayBetweenBatches);
 				}
+			}
+		} else {
+			// Sequential processing
+			for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
+				try {
+					const output = await processItem(this, itemIndex, llm, parser);
+					resultData.push({ json: { output } });
+				} catch (error) {
+					if (this.continueOnFail()) {
+						resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
+						continue;
+					}

-				throw error;
+					throw error;
+				}
 			}
 		}

--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/constants.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/constants.ts
@@ -0,0 +1,3 @@
+export const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
+Only extract relevant information from the text.
+If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;
--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/processItem.ts
@@ -0,0 +1,39 @@
+import type { BaseLanguageModel } from '@langchain/core/language_models/base';
+import { HumanMessage } from '@langchain/core/messages';
+import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
+import type { OutputFixingParser } from 'langchain/output_parsers';
+import type { IExecuteFunctions } from 'n8n-workflow';
+
+import { getTracingConfig } from '@utils/tracing';
+
+import { SYSTEM_PROMPT_TEMPLATE } from './constants';
+
+export async function processItem(
+	ctx: IExecuteFunctions,
+	itemIndex: number,
+	llm: BaseLanguageModel,
+	parser: OutputFixingParser<object>,
+) {
+	const input = ctx.getNodeParameter('text', itemIndex) as string;
+	const inputPrompt = new HumanMessage(input);
+
+	const options = ctx.getNodeParameter('options', itemIndex, {}) as {
+		systemPromptTemplate?: string;
+	};
+
+	const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
+		`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
+{format_instructions}`,
+	);
+
+	const messages = [
+		await systemPromptTemplate.format({
+			format_instructions: parser.getFormatInstructions(),
+		}),
+		inputPrompt,
+	];
+	const prompt = ChatPromptTemplate.fromMessages(messages);
+	const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(ctx));
+
+	return await chain.invoke(messages);
+}
--- a/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/InformationExtractor/test/InformationExtraction.node.test.ts
@@ -41,7 +41,11 @@ function formatFakeLlmResponse(object: Record<string, any>) {
 	return `\`\`\`json\n${JSON.stringify(object, null, 2)}\n\`\`\``;
 }

-const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLanguageModel) => {
+const createExecuteFunctionsMock = (
+	parameters: IDataObject,
+	fakeLlm: BaseLanguageModel,
+	inputData = [{ json: {} }],
+) => {
 	const nodeParameters = parameters;

 	return {
@@ -49,13 +53,15 @@ const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLangua
 			return get(nodeParameters, parameter);
 		},
 		getNode() {
-			return {};
+			return {
+				typeVersion: 1.1,
+			};
 		},
 		getInputConnectionData() {
 			return fakeLlm;
 		},
 		getInputData() {
-			return [{ json: {} }];
+			return inputData;
 		},
 		getWorkflow() {
 			return {
@@ -215,4 +221,132 @@ describe('InformationExtractor', () => {
 			expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
 		});
 	});
+
+	describe('Batch Processing', () => {
+		it('should process multiple items in batches', async () => {
+			const node = new InformationExtractor();
+			const inputData = [
+				{ json: { text: 'John is 30 years old' } },
+				{ json: { text: 'Alice is 25 years old' } },
+				{ json: { text: 'Bob is 40 years old' } },
+			];
+
+			const response = await node.execute.call(
+				createExecuteFunctionsMock(
+					{
+						text: 'John is 30 years old',
+						attributes: {
+							attributes: mockPersonAttributes,
+						},
+						options: {
+							batching: {
+								batchSize: 2,
+								delayBetweenBatches: 0,
+							},
+						},
+						schemaType: 'fromAttributes',
+					},
+					new FakeListChatModel({
+						responses: [
+							formatFakeLlmResponse({ name: 'John', age: 30 }),
+							formatFakeLlmResponse({ name: 'Alice', age: 25 }),
+							formatFakeLlmResponse({ name: 'Bob', age: 40 }),
+						],
+					}),
+					inputData,
+				),
+			);
+
+			expect(response).toEqual([
+				[
+					{ json: { output: { name: 'John', age: 30 } } },
+					{ json: { output: { name: 'Alice', age: 25 } } },
+					{ json: { output: { name: 'Bob', age: 40 } } },
+				],
+			]);
+		});
+
+		it('should handle errors in batch processing', async () => {
+			const node = new InformationExtractor();
+			const inputData = [
+				{ json: { text: 'John is 30 years old' } },
+				{ json: { text: 'Invalid text' } },
+				{ json: { text: 'Bob is 40 years old' } },
+			];
+
+			const mockExecuteFunctions = createExecuteFunctionsMock(
+				{
+					text: 'John is 30 years old',
+					attributes: {
+						attributes: mockPersonAttributesRequired,
+					},
+					options: {
+						batching: {
+							batchSize: 2,
+							delayBetweenBatches: 0,
+						},
+					},
+					schemaType: 'fromAttributes',
+				},
+				new FakeListChatModel({
+					responses: [
+						formatFakeLlmResponse({ name: 'John', age: 30 }),
+						formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
+						formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
+						formatFakeLlmResponse({ name: 'Bob', age: 40 }),
+					],
+				}),
+				inputData,
+			);
+
+			mockExecuteFunctions.continueOnFail = () => true;
+
+			const response = await node.execute.call(mockExecuteFunctions);
+
+			//expect(response).toBe({});
+			expect(response[0]).toHaveLength(3);
+			expect(response[0][0]).toEqual({ json: { output: { name: 'John', age: 30 } } });
+			expect(response[0][1]).toEqual({
+				json: { error: expect.stringContaining('Failed to parse') },
+				pairedItem: { item: 1 },
+			});
+			expect(response[0][2]).toEqual({ json: { output: { name: 'Bob', age: 40 } } });
+		});
+
+		it('should throw error if batch processing fails and continueOnFail is false', async () => {
+			const node = new InformationExtractor();
+			const inputData = [
+				{ json: { text: 'John is 30 years old' } },
+				{ json: { text: 'Invalid text' } },
+				{ json: { text: 'Bob is 40 years old' } },
+			];
+
+			const mockExecuteFunctions = createExecuteFunctionsMock(
+				{
+					text: 'John is 30 years old',
+					attributes: {
+						attributes: mockPersonAttributesRequired,
+					},
+					options: {
+						batching: {
+							batchSize: 2,
+							delayBetweenBatches: 0,
+						},
+					},
+					schemaType: 'fromAttributes',
+				},
+				new FakeListChatModel({
+					responses: [
+						formatFakeLlmResponse({ name: 'John', age: 30 }),
+						formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
+						formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
+						formatFakeLlmResponse({ name: 'Bob', age: 40 }),
+					],
+				}),
+				inputData,
+			);
+
+			await expect(node.execute.call(mockExecuteFunctions)).rejects.toThrow('Failed to parse');
+		});
+	});
 });