feat: Optimise langchain calls in batching mode (#15243)

This commit is contained in:
Benjamin Schroth
2025-05-13 13:58:38 +02:00
committed by GitHub
parent 8591c2e0d1
commit ff156930c5
35 changed files with 2946 additions and 1171 deletions

View File

@@ -1,9 +1,7 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { HumanMessage } from '@langchain/core/messages';
import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
import type { JSONSchema7 } from 'json-schema';
import { OutputFixingParser, StructuredOutputParser } from 'langchain/output_parsers';
import { jsonParse, NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
import { jsonParse, NodeConnectionTypes, NodeOperationError, sleep } from 'n8n-workflow';
import type {
INodeType,
INodeTypeDescription,
@@ -15,15 +13,13 @@ import type { z } from 'zod';
import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
import { getTracingConfig } from '@utils/tracing';
import { getBatchingOptionFields } from '@utils/sharedFields';
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
import { makeZodSchemaFromAttributes } from './helpers';
import { processItem } from './processItem';
import type { AttributeDefinition } from './types';
const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
Only extract relevant information from the text.
If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;
export class InformationExtractor implements INodeType {
description: INodeTypeDescription = {
displayName: 'Information Extractor',
@@ -31,7 +27,7 @@ export class InformationExtractor implements INodeType {
icon: 'fa:project-diagram',
iconColor: 'black',
group: ['transform'],
version: 1,
version: [1, 1.1],
description: 'Extract information from text in a structured format',
codex: {
alias: ['NER', 'parse', 'parsing', 'JSON', 'data extraction', 'structured'],
@@ -213,6 +209,11 @@ export class InformationExtractor implements INodeType {
rows: 6,
},
},
getBatchingOptionFields({
show: {
'@version': [{ _cnd: { gte: 1.1 } }],
},
}),
],
},
],
@@ -265,38 +266,59 @@ export class InformationExtractor implements INodeType {
}
const resultData: INodeExecutionData[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
const input = this.getNodeParameter('text', itemIndex) as string;
const inputPrompt = new HumanMessage(input);
const batchSize = this.getNodeParameter('options.batching.batchSize', 0, 5) as number;
const delayBetweenBatches = this.getNodeParameter(
'options.batching.delayBetweenBatches',
0,
0,
) as number;
if (this.getNode().typeVersion >= 1.1 && batchSize >= 1) {
// Batch processing
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
const batchPromises = batch.map(async (_item, batchItemIndex) => {
const itemIndex = i + batchItemIndex;
return await processItem(this, itemIndex, llm, parser);
});
const options = this.getNodeParameter('options', itemIndex, {}) as {
systemPromptTemplate?: string;
};
const batchResults = await Promise.allSettled(batchPromises);
const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
{format_instructions}`,
);
batchResults.forEach((response, index) => {
if (response.status === 'rejected') {
const error = response.reason as Error;
if (this.continueOnFail()) {
resultData.push({
json: { error: error.message },
pairedItem: { item: i + index },
});
return;
} else {
throw new NodeOperationError(this.getNode(), error.message);
}
}
const output = response.value;
resultData.push({ json: { output } });
});
const messages = [
await systemPromptTemplate.format({
format_instructions: parser.getFormatInstructions(),
}),
inputPrompt,
];
const prompt = ChatPromptTemplate.fromMessages(messages);
const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(this));
try {
const output = await chain.invoke(messages);
resultData.push({ json: { output } });
} catch (error) {
if (this.continueOnFail()) {
resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
continue;
// Add delay between batches if not the last batch
if (i + batchSize < items.length && delayBetweenBatches > 0) {
await sleep(delayBetweenBatches);
}
}
} else {
// Sequential processing
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
try {
const output = await processItem(this, itemIndex, llm, parser);
resultData.push({ json: { output } });
} catch (error) {
if (this.continueOnFail()) {
resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
continue;
}
throw error;
throw error;
}
}
}

View File

@@ -0,0 +1,3 @@
export const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
Only extract relevant information from the text.
If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;

View File

@@ -0,0 +1,39 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { HumanMessage } from '@langchain/core/messages';
import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
import type { OutputFixingParser } from 'langchain/output_parsers';
import type { IExecuteFunctions } from 'n8n-workflow';
import { getTracingConfig } from '@utils/tracing';
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
export async function processItem(
ctx: IExecuteFunctions,
itemIndex: number,
llm: BaseLanguageModel,
parser: OutputFixingParser<object>,
) {
const input = ctx.getNodeParameter('text', itemIndex) as string;
const inputPrompt = new HumanMessage(input);
const options = ctx.getNodeParameter('options', itemIndex, {}) as {
systemPromptTemplate?: string;
};
const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
{format_instructions}`,
);
const messages = [
await systemPromptTemplate.format({
format_instructions: parser.getFormatInstructions(),
}),
inputPrompt,
];
const prompt = ChatPromptTemplate.fromMessages(messages);
const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(ctx));
return await chain.invoke(messages);
}

View File

@@ -41,7 +41,11 @@ function formatFakeLlmResponse(object: Record<string, any>) {
return `\`\`\`json\n${JSON.stringify(object, null, 2)}\n\`\`\``;
}
const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLanguageModel) => {
const createExecuteFunctionsMock = (
parameters: IDataObject,
fakeLlm: BaseLanguageModel,
inputData = [{ json: {} }],
) => {
const nodeParameters = parameters;
return {
@@ -49,13 +53,15 @@ const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLangua
return get(nodeParameters, parameter);
},
getNode() {
return {};
return {
typeVersion: 1.1,
};
},
getInputConnectionData() {
return fakeLlm;
},
getInputData() {
return [{ json: {} }];
return inputData;
},
getWorkflow() {
return {
@@ -215,4 +221,132 @@ describe('InformationExtractor', () => {
expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
});
});
describe('Batch Processing', () => {
it('should process multiple items in batches', async () => {
const node = new InformationExtractor();
const inputData = [
{ json: { text: 'John is 30 years old' } },
{ json: { text: 'Alice is 25 years old' } },
{ json: { text: 'Bob is 40 years old' } },
];
const response = await node.execute.call(
createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributes,
},
options: {
batching: {
batchSize: 2,
delayBetweenBatches: 0,
},
},
schemaType: 'fromAttributes',
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John', age: 30 }),
formatFakeLlmResponse({ name: 'Alice', age: 25 }),
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
],
}),
inputData,
),
);
expect(response).toEqual([
[
{ json: { output: { name: 'John', age: 30 } } },
{ json: { output: { name: 'Alice', age: 25 } } },
{ json: { output: { name: 'Bob', age: 40 } } },
],
]);
});
it('should handle errors in batch processing', async () => {
const node = new InformationExtractor();
const inputData = [
{ json: { text: 'John is 30 years old' } },
{ json: { text: 'Invalid text' } },
{ json: { text: 'Bob is 40 years old' } },
];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributesRequired,
},
options: {
batching: {
batchSize: 2,
delayBetweenBatches: 0,
},
},
schemaType: 'fromAttributes',
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John', age: 30 }),
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
],
}),
inputData,
);
mockExecuteFunctions.continueOnFail = () => true;
const response = await node.execute.call(mockExecuteFunctions);
//expect(response).toBe({});
expect(response[0]).toHaveLength(3);
expect(response[0][0]).toEqual({ json: { output: { name: 'John', age: 30 } } });
expect(response[0][1]).toEqual({
json: { error: expect.stringContaining('Failed to parse') },
pairedItem: { item: 1 },
});
expect(response[0][2]).toEqual({ json: { output: { name: 'Bob', age: 40 } } });
});
it('should throw error if batch processing fails and continueOnFail is false', async () => {
const node = new InformationExtractor();
const inputData = [
{ json: { text: 'John is 30 years old' } },
{ json: { text: 'Invalid text' } },
{ json: { text: 'Bob is 40 years old' } },
];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John is 30 years old',
attributes: {
attributes: mockPersonAttributesRequired,
},
options: {
batching: {
batchSize: 2,
delayBetweenBatches: 0,
},
},
schemaType: 'fromAttributes',
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({ name: 'John', age: 30 }),
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
],
}),
inputData,
);
await expect(node.execute.call(mockExecuteFunctions)).rejects.toThrow('Failed to parse');
});
});
});