mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-20 11:22:15 +00:00
feat: Optimise langchain calls in batching mode (#15243)
This commit is contained in:
@@ -1,9 +1,7 @@
|
||||
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
|
||||
import { HumanMessage } from '@langchain/core/messages';
|
||||
import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
|
||||
import type { JSONSchema7 } from 'json-schema';
|
||||
import { OutputFixingParser, StructuredOutputParser } from 'langchain/output_parsers';
|
||||
import { jsonParse, NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
|
||||
import { jsonParse, NodeConnectionTypes, NodeOperationError, sleep } from 'n8n-workflow';
|
||||
import type {
|
||||
INodeType,
|
||||
INodeTypeDescription,
|
||||
@@ -15,15 +13,13 @@ import type { z } from 'zod';
|
||||
|
||||
import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
|
||||
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
|
||||
import { getTracingConfig } from '@utils/tracing';
|
||||
import { getBatchingOptionFields } from '@utils/sharedFields';
|
||||
|
||||
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
|
||||
import { makeZodSchemaFromAttributes } from './helpers';
|
||||
import { processItem } from './processItem';
|
||||
import type { AttributeDefinition } from './types';
|
||||
|
||||
const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
|
||||
Only extract relevant information from the text.
|
||||
If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;
|
||||
|
||||
export class InformationExtractor implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
displayName: 'Information Extractor',
|
||||
@@ -31,7 +27,7 @@ export class InformationExtractor implements INodeType {
|
||||
icon: 'fa:project-diagram',
|
||||
iconColor: 'black',
|
||||
group: ['transform'],
|
||||
version: 1,
|
||||
version: [1, 1.1],
|
||||
description: 'Extract information from text in a structured format',
|
||||
codex: {
|
||||
alias: ['NER', 'parse', 'parsing', 'JSON', 'data extraction', 'structured'],
|
||||
@@ -213,6 +209,11 @@ export class InformationExtractor implements INodeType {
|
||||
rows: 6,
|
||||
},
|
||||
},
|
||||
getBatchingOptionFields({
|
||||
show: {
|
||||
'@version': [{ _cnd: { gte: 1.1 } }],
|
||||
},
|
||||
}),
|
||||
],
|
||||
},
|
||||
],
|
||||
@@ -265,38 +266,59 @@ export class InformationExtractor implements INodeType {
|
||||
}
|
||||
|
||||
const resultData: INodeExecutionData[] = [];
|
||||
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
||||
const input = this.getNodeParameter('text', itemIndex) as string;
|
||||
const inputPrompt = new HumanMessage(input);
|
||||
const batchSize = this.getNodeParameter('options.batching.batchSize', 0, 5) as number;
|
||||
const delayBetweenBatches = this.getNodeParameter(
|
||||
'options.batching.delayBetweenBatches',
|
||||
0,
|
||||
0,
|
||||
) as number;
|
||||
if (this.getNode().typeVersion >= 1.1 && batchSize >= 1) {
|
||||
// Batch processing
|
||||
for (let i = 0; i < items.length; i += batchSize) {
|
||||
const batch = items.slice(i, i + batchSize);
|
||||
const batchPromises = batch.map(async (_item, batchItemIndex) => {
|
||||
const itemIndex = i + batchItemIndex;
|
||||
return await processItem(this, itemIndex, llm, parser);
|
||||
});
|
||||
|
||||
const options = this.getNodeParameter('options', itemIndex, {}) as {
|
||||
systemPromptTemplate?: string;
|
||||
};
|
||||
const batchResults = await Promise.allSettled(batchPromises);
|
||||
|
||||
const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
|
||||
`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
|
||||
{format_instructions}`,
|
||||
);
|
||||
batchResults.forEach((response, index) => {
|
||||
if (response.status === 'rejected') {
|
||||
const error = response.reason as Error;
|
||||
if (this.continueOnFail()) {
|
||||
resultData.push({
|
||||
json: { error: error.message },
|
||||
pairedItem: { item: i + index },
|
||||
});
|
||||
return;
|
||||
} else {
|
||||
throw new NodeOperationError(this.getNode(), error.message);
|
||||
}
|
||||
}
|
||||
const output = response.value;
|
||||
resultData.push({ json: { output } });
|
||||
});
|
||||
|
||||
const messages = [
|
||||
await systemPromptTemplate.format({
|
||||
format_instructions: parser.getFormatInstructions(),
|
||||
}),
|
||||
inputPrompt,
|
||||
];
|
||||
const prompt = ChatPromptTemplate.fromMessages(messages);
|
||||
const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(this));
|
||||
|
||||
try {
|
||||
const output = await chain.invoke(messages);
|
||||
resultData.push({ json: { output } });
|
||||
} catch (error) {
|
||||
if (this.continueOnFail()) {
|
||||
resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
|
||||
continue;
|
||||
// Add delay between batches if not the last batch
|
||||
if (i + batchSize < items.length && delayBetweenBatches > 0) {
|
||||
await sleep(delayBetweenBatches);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Sequential processing
|
||||
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
||||
try {
|
||||
const output = await processItem(this, itemIndex, llm, parser);
|
||||
resultData.push({ json: { output } });
|
||||
} catch (error) {
|
||||
if (this.continueOnFail()) {
|
||||
resultData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
|
||||
continue;
|
||||
}
|
||||
|
||||
throw error;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
export const SYSTEM_PROMPT_TEMPLATE = `You are an expert extraction algorithm.
|
||||
Only extract relevant information from the text.
|
||||
If you do not know the value of an attribute asked to extract, you may omit the attribute's value.`;
|
||||
@@ -0,0 +1,39 @@
|
||||
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
|
||||
import { HumanMessage } from '@langchain/core/messages';
|
||||
import { ChatPromptTemplate, SystemMessagePromptTemplate } from '@langchain/core/prompts';
|
||||
import type { OutputFixingParser } from 'langchain/output_parsers';
|
||||
import type { IExecuteFunctions } from 'n8n-workflow';
|
||||
|
||||
import { getTracingConfig } from '@utils/tracing';
|
||||
|
||||
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
|
||||
|
||||
export async function processItem(
|
||||
ctx: IExecuteFunctions,
|
||||
itemIndex: number,
|
||||
llm: BaseLanguageModel,
|
||||
parser: OutputFixingParser<object>,
|
||||
) {
|
||||
const input = ctx.getNodeParameter('text', itemIndex) as string;
|
||||
const inputPrompt = new HumanMessage(input);
|
||||
|
||||
const options = ctx.getNodeParameter('options', itemIndex, {}) as {
|
||||
systemPromptTemplate?: string;
|
||||
};
|
||||
|
||||
const systemPromptTemplate = SystemMessagePromptTemplate.fromTemplate(
|
||||
`${options.systemPromptTemplate ?? SYSTEM_PROMPT_TEMPLATE}
|
||||
{format_instructions}`,
|
||||
);
|
||||
|
||||
const messages = [
|
||||
await systemPromptTemplate.format({
|
||||
format_instructions: parser.getFormatInstructions(),
|
||||
}),
|
||||
inputPrompt,
|
||||
];
|
||||
const prompt = ChatPromptTemplate.fromMessages(messages);
|
||||
const chain = prompt.pipe(llm).pipe(parser).withConfig(getTracingConfig(ctx));
|
||||
|
||||
return await chain.invoke(messages);
|
||||
}
|
||||
@@ -41,7 +41,11 @@ function formatFakeLlmResponse(object: Record<string, any>) {
|
||||
return `\`\`\`json\n${JSON.stringify(object, null, 2)}\n\`\`\``;
|
||||
}
|
||||
|
||||
const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLanguageModel) => {
|
||||
const createExecuteFunctionsMock = (
|
||||
parameters: IDataObject,
|
||||
fakeLlm: BaseLanguageModel,
|
||||
inputData = [{ json: {} }],
|
||||
) => {
|
||||
const nodeParameters = parameters;
|
||||
|
||||
return {
|
||||
@@ -49,13 +53,15 @@ const createExecuteFunctionsMock = (parameters: IDataObject, fakeLlm: BaseLangua
|
||||
return get(nodeParameters, parameter);
|
||||
},
|
||||
getNode() {
|
||||
return {};
|
||||
return {
|
||||
typeVersion: 1.1,
|
||||
};
|
||||
},
|
||||
getInputConnectionData() {
|
||||
return fakeLlm;
|
||||
},
|
||||
getInputData() {
|
||||
return [{ json: {} }];
|
||||
return inputData;
|
||||
},
|
||||
getWorkflow() {
|
||||
return {
|
||||
@@ -215,4 +221,132 @@ describe('InformationExtractor', () => {
|
||||
expect(response).toEqual([[{ json: { output: { name: 'John', age: 30 } } }]]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Batch Processing', () => {
|
||||
it('should process multiple items in batches', async () => {
|
||||
const node = new InformationExtractor();
|
||||
const inputData = [
|
||||
{ json: { text: 'John is 30 years old' } },
|
||||
{ json: { text: 'Alice is 25 years old' } },
|
||||
{ json: { text: 'Bob is 40 years old' } },
|
||||
];
|
||||
|
||||
const response = await node.execute.call(
|
||||
createExecuteFunctionsMock(
|
||||
{
|
||||
text: 'John is 30 years old',
|
||||
attributes: {
|
||||
attributes: mockPersonAttributes,
|
||||
},
|
||||
options: {
|
||||
batching: {
|
||||
batchSize: 2,
|
||||
delayBetweenBatches: 0,
|
||||
},
|
||||
},
|
||||
schemaType: 'fromAttributes',
|
||||
},
|
||||
new FakeListChatModel({
|
||||
responses: [
|
||||
formatFakeLlmResponse({ name: 'John', age: 30 }),
|
||||
formatFakeLlmResponse({ name: 'Alice', age: 25 }),
|
||||
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
|
||||
],
|
||||
}),
|
||||
inputData,
|
||||
),
|
||||
);
|
||||
|
||||
expect(response).toEqual([
|
||||
[
|
||||
{ json: { output: { name: 'John', age: 30 } } },
|
||||
{ json: { output: { name: 'Alice', age: 25 } } },
|
||||
{ json: { output: { name: 'Bob', age: 40 } } },
|
||||
],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle errors in batch processing', async () => {
|
||||
const node = new InformationExtractor();
|
||||
const inputData = [
|
||||
{ json: { text: 'John is 30 years old' } },
|
||||
{ json: { text: 'Invalid text' } },
|
||||
{ json: { text: 'Bob is 40 years old' } },
|
||||
];
|
||||
|
||||
const mockExecuteFunctions = createExecuteFunctionsMock(
|
||||
{
|
||||
text: 'John is 30 years old',
|
||||
attributes: {
|
||||
attributes: mockPersonAttributesRequired,
|
||||
},
|
||||
options: {
|
||||
batching: {
|
||||
batchSize: 2,
|
||||
delayBetweenBatches: 0,
|
||||
},
|
||||
},
|
||||
schemaType: 'fromAttributes',
|
||||
},
|
||||
new FakeListChatModel({
|
||||
responses: [
|
||||
formatFakeLlmResponse({ name: 'John', age: 30 }),
|
||||
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
|
||||
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
|
||||
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
|
||||
],
|
||||
}),
|
||||
inputData,
|
||||
);
|
||||
|
||||
mockExecuteFunctions.continueOnFail = () => true;
|
||||
|
||||
const response = await node.execute.call(mockExecuteFunctions);
|
||||
|
||||
//expect(response).toBe({});
|
||||
expect(response[0]).toHaveLength(3);
|
||||
expect(response[0][0]).toEqual({ json: { output: { name: 'John', age: 30 } } });
|
||||
expect(response[0][1]).toEqual({
|
||||
json: { error: expect.stringContaining('Failed to parse') },
|
||||
pairedItem: { item: 1 },
|
||||
});
|
||||
expect(response[0][2]).toEqual({ json: { output: { name: 'Bob', age: 40 } } });
|
||||
});
|
||||
|
||||
it('should throw error if batch processing fails and continueOnFail is false', async () => {
|
||||
const node = new InformationExtractor();
|
||||
const inputData = [
|
||||
{ json: { text: 'John is 30 years old' } },
|
||||
{ json: { text: 'Invalid text' } },
|
||||
{ json: { text: 'Bob is 40 years old' } },
|
||||
];
|
||||
|
||||
const mockExecuteFunctions = createExecuteFunctionsMock(
|
||||
{
|
||||
text: 'John is 30 years old',
|
||||
attributes: {
|
||||
attributes: mockPersonAttributesRequired,
|
||||
},
|
||||
options: {
|
||||
batching: {
|
||||
batchSize: 2,
|
||||
delayBetweenBatches: 0,
|
||||
},
|
||||
},
|
||||
schemaType: 'fromAttributes',
|
||||
},
|
||||
new FakeListChatModel({
|
||||
responses: [
|
||||
formatFakeLlmResponse({ name: 'John', age: 30 }),
|
||||
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age
|
||||
formatFakeLlmResponse({ name: 'Invalid' }), // Missing required age on retry
|
||||
formatFakeLlmResponse({ name: 'Bob', age: 40 }),
|
||||
],
|
||||
}),
|
||||
inputData,
|
||||
);
|
||||
|
||||
await expect(node.execute.call(mockExecuteFunctions)).rejects.toThrow('Failed to parse');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user