refactor(Basic LLM Chain Node): Refactor Basic LLM Chain & add tests (#13850)

This commit is contained in:
oleg
2025-03-14 11:38:22 +01:00
committed by GitHub
parent 311553926a
commit 1bfd128717
24 changed files with 1754 additions and 609 deletions

View File

@@ -1,262 +1,32 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage } from '@langchain/core/messages';
import {
AIMessagePromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
ChatPromptTemplate,
} from '@langchain/core/prompts';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { ChatOllama } from '@langchain/ollama';
import { LLMChain } from 'langchain/chains';
import { CombiningOutputParser } from 'langchain/output_parsers';
import type {
IBinaryData,
IDataObject,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
} from 'n8n-workflow';
import { NodeApiError, NodeConnectionType, NodeOperationError } from 'n8n-workflow';
import { getPromptInputByType } from '@utils/helpers';
import { getOptionalOutputParser } from '@utils/output_parsers/N8nOutputParser';
// Import from centralized module
import {
ApplicationError,
NodeApiError,
NodeConnectionType,
NodeOperationError,
} from 'n8n-workflow';
import { promptTypeOptions, textFromPreviousNode } from '@utils/descriptions';
import { getPromptInputByType, isChatInstance } from '@utils/helpers';
import type { N8nOutputParser } from '@utils/output_parsers/N8nOutputParser';
import { getOptionalOutputParsers } from '@utils/output_parsers/N8nOutputParser';
import { getTemplateNoticeField } from '@utils/sharedFields';
import { getTracingConfig } from '@utils/tracing';
import { dataUriFromImageData, UnsupportedMimeTypeError } from './utils';
executeChain,
formatResponse,
getInputs,
nodeProperties,
type MessageTemplate,
} from './methods';
import {
getCustomErrorMessage as getCustomOpenAiErrorMessage,
isOpenAiError,
} from '../../vendors/OpenAi/helpers/error-handling';
interface MessagesTemplate {
type: string;
message: string;
messageType: 'text' | 'imageBinary' | 'imageUrl';
binaryImageDataKey?: string;
imageUrl?: string;
imageDetail?: 'auto' | 'low' | 'high';
}
async function getImageMessage(
context: IExecuteFunctions,
itemIndex: number,
message: MessagesTemplate,
) {
if (message.messageType !== 'imageBinary' && message.messageType !== 'imageUrl') {
// eslint-disable-next-line n8n-nodes-base/node-execute-block-wrong-error-thrown
throw new NodeOperationError(
context.getNode(),
'Invalid message type. Only imageBinary and imageUrl are supported',
);
}
const detail = message.imageDetail === 'auto' ? undefined : message.imageDetail;
if (message.messageType === 'imageUrl' && message.imageUrl) {
return new HumanMessage({
content: [
{
type: 'image_url',
image_url: {
url: message.imageUrl,
detail,
},
},
],
});
}
const binaryDataKey = message.binaryImageDataKey ?? 'data';
const inputData = context.getInputData()[itemIndex];
const binaryData = inputData.binary?.[binaryDataKey] as IBinaryData;
if (!binaryData) {
throw new NodeOperationError(context.getNode(), 'No binary data set.');
}
const bufferData = await context.helpers.getBinaryDataBuffer(itemIndex, binaryDataKey);
const model = (await context.getInputConnectionData(
NodeConnectionType.AiLanguageModel,
0,
)) as BaseLanguageModel;
try {
const dataURI = dataUriFromImageData(binaryData, bufferData);
const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
const imageUrl = directUriModels.some((i) => model instanceof i)
? dataURI
: { url: dataURI, detail };
return new HumanMessage({
content: [
{
type: 'image_url',
image_url: imageUrl,
},
],
});
} catch (error) {
if (error instanceof UnsupportedMimeTypeError)
throw new NodeOperationError(context.getNode(), error.message);
throw error;
}
}
async function getChainPromptTemplate(
context: IExecuteFunctions,
itemIndex: number,
llm: BaseLanguageModel | BaseChatModel,
messages?: MessagesTemplate[],
formatInstructions?: string,
query?: string,
) {
const queryTemplate = new PromptTemplate({
template: `{query}${formatInstructions ? '\n{formatInstructions}' : ''}`,
inputVariables: ['query'],
partialVariables: formatInstructions ? { formatInstructions } : undefined,
});
if (isChatInstance(llm)) {
const parsedMessages = await Promise.all(
(messages ?? []).map(async (message) => {
const messageClass = [
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
].find((m) => m.lc_name() === message.type);
if (!messageClass) {
// eslint-disable-next-line n8n-nodes-base/node-execute-block-wrong-error-thrown
throw new ApplicationError('Invalid message type', {
extra: { messageType: message.type },
});
}
if (messageClass === HumanMessagePromptTemplate && message.messageType !== 'text') {
const test = await getImageMessage(context, itemIndex, message);
return test;
}
const res = messageClass.fromTemplate(
// Since we're using the message as template, we need to escape any curly braces
// so LangChain doesn't try to parse them as variables
(message.message || '').replace(/[{}]/g, (match) => match + match),
);
return res;
}),
);
const lastMessage = parsedMessages[parsedMessages.length - 1];
// If the last message is a human message and it has an array of content, we need to add the query to the last message
if (lastMessage instanceof HumanMessage && Array.isArray(lastMessage.content)) {
const humanMessage = new HumanMessagePromptTemplate(queryTemplate);
const test = await humanMessage.format({ query });
lastMessage.content.push({ text: test.content.toString(), type: 'text' });
} else {
parsedMessages.push(new HumanMessagePromptTemplate(queryTemplate));
}
return ChatPromptTemplate.fromMessages(parsedMessages);
}
return queryTemplate;
}
async function createSimpleLLMChain(
context: IExecuteFunctions,
llm: BaseLanguageModel,
query: string,
prompt: ChatPromptTemplate | PromptTemplate,
): Promise<string[]> {
const chain = new LLMChain({
llm,
prompt,
}).withConfig(getTracingConfig(context));
const response = (await chain.invoke({
query,
signal: context.getExecutionCancelSignal(),
})) as string[];
return Array.isArray(response) ? response : [response];
}
async function getChain(
context: IExecuteFunctions,
itemIndex: number,
query: string,
llm: BaseLanguageModel,
outputParsers: N8nOutputParser[],
messages?: MessagesTemplate[],
): Promise<unknown[]> {
const chatTemplate: ChatPromptTemplate | PromptTemplate = await getChainPromptTemplate(
context,
itemIndex,
llm,
messages,
undefined,
query,
);
// If there are no output parsers, create a simple LLM chain and execute the query
if (!outputParsers.length) {
return await createSimpleLLMChain(context, llm, query, chatTemplate);
}
// If there's only one output parser, use it; otherwise, create a combined output parser
const combinedOutputParser =
outputParsers.length === 1 ? outputParsers[0] : new CombiningOutputParser(...outputParsers);
const formatInstructions = combinedOutputParser.getFormatInstructions();
// Create a prompt template incorporating the format instructions and query
const prompt = await getChainPromptTemplate(
context,
itemIndex,
llm,
messages,
formatInstructions,
query,
);
const chain = prompt.pipe(llm).pipe(combinedOutputParser);
const response = (await chain.withConfig(getTracingConfig(context)).invoke({ query })) as
| string
| string[];
return Array.isArray(response) ? response : [response];
}
function getInputs(parameters: IDataObject) {
const hasOutputParser = parameters?.hasOutputParser;
const inputs = [
{ displayName: '', type: NodeConnectionType.Main },
{
displayName: 'Model',
maxConnections: 1,
type: NodeConnectionType.AiLanguageModel,
required: true,
},
];
// If `hasOutputParser` is undefined it must be version 1.3 or earlier so we
// always add the output parser input
if (hasOutputParser === undefined || hasOutputParser === true) {
inputs.push({ displayName: 'Output Parser', type: NodeConnectionType.AiOutputParser });
}
return inputs;
}
/**
* Basic LLM Chain Node Implementation
* Allows connecting to language models with optional structured output parsing
*/
export class ChainLlm implements INodeType {
description: INodeTypeDescription = {
displayName: 'Basic LLM Chain',
@@ -287,253 +57,32 @@ export class ChainLlm implements INodeType {
inputs: `={{ ((parameter) => { ${getInputs.toString()}; return getInputs(parameter) })($parameter) }}`,
outputs: [NodeConnectionType.Main],
credentials: [],
properties: [
getTemplateNoticeField(1978),
{
displayName: 'Prompt',
name: 'prompt',
type: 'string',
required: true,
default: '={{ $json.input }}',
displayOptions: {
show: {
'@version': [1],
},
},
},
{
displayName: 'Prompt',
name: 'prompt',
type: 'string',
required: true,
default: '={{ $json.chat_input }}',
displayOptions: {
show: {
'@version': [1.1, 1.2],
},
},
},
{
displayName: 'Prompt',
name: 'prompt',
type: 'string',
required: true,
default: '={{ $json.chatInput }}',
displayOptions: {
show: {
'@version': [1.3],
},
},
},
{
...promptTypeOptions,
displayOptions: {
hide: {
'@version': [1, 1.1, 1.2, 1.3],
},
},
},
{
...textFromPreviousNode,
displayOptions: { show: { promptType: ['auto'], '@version': [{ _cnd: { gte: 1.5 } }] } },
},
{
displayName: 'Prompt (User Message)',
name: 'text',
type: 'string',
required: true,
default: '',
placeholder: 'e.g. Hello, how can you help me?',
typeOptions: {
rows: 2,
},
displayOptions: {
show: {
promptType: ['define'],
},
},
},
{
displayName: 'Require Specific Output Format',
name: 'hasOutputParser',
type: 'boolean',
default: false,
noDataExpression: true,
displayOptions: {
hide: {
'@version': [1, 1.1, 1.3],
},
},
},
{
displayName: 'Chat Messages (if Using a Chat Model)',
name: 'messages',
type: 'fixedCollection',
typeOptions: {
multipleValues: true,
},
default: {},
placeholder: 'Add prompt',
options: [
{
name: 'messageValues',
displayName: 'Prompt',
values: [
{
displayName: 'Type Name or ID',
name: 'type',
type: 'options',
options: [
{
name: 'AI',
value: AIMessagePromptTemplate.lc_name(),
},
{
name: 'System',
value: SystemMessagePromptTemplate.lc_name(),
},
{
name: 'User',
value: HumanMessagePromptTemplate.lc_name(),
},
],
default: SystemMessagePromptTemplate.lc_name(),
},
{
displayName: 'Message Type',
name: 'messageType',
type: 'options',
displayOptions: {
show: {
type: [HumanMessagePromptTemplate.lc_name()],
},
},
options: [
{
name: 'Text',
value: 'text',
description: 'Simple text message',
},
{
name: 'Image (Binary)',
value: 'imageBinary',
description: 'Process the binary input from the previous node',
},
{
name: 'Image (URL)',
value: 'imageUrl',
description: 'Process the image from the specified URL',
},
],
default: 'text',
},
{
displayName: 'Image Data Field Name',
name: 'binaryImageDataKey',
type: 'string',
default: 'data',
required: true,
description:
'The name of the field in the chains input that contains the binary image file to be processed',
displayOptions: {
show: {
messageType: ['imageBinary'],
},
},
},
{
displayName: 'Image URL',
name: 'imageUrl',
type: 'string',
default: '',
required: true,
description: 'URL to the image to be processed',
displayOptions: {
show: {
messageType: ['imageUrl'],
},
},
},
{
displayName: 'Image Details',
description:
'Control how the model processes the image and generates its textual understanding',
name: 'imageDetail',
type: 'options',
displayOptions: {
show: {
type: [HumanMessagePromptTemplate.lc_name()],
messageType: ['imageBinary', 'imageUrl'],
},
},
options: [
{
name: 'Auto',
value: 'auto',
description:
'Model will use the auto setting which will look at the image input size and decide if it should use the low or high setting',
},
{
name: 'Low',
value: 'low',
description:
'The model will receive a low-res 512px x 512px version of the image, and represent the image with a budget of 65 tokens. This allows the API to return faster responses and consume fewer input tokens for use cases that do not require high detail.',
},
{
name: 'High',
value: 'high',
description:
'Allows the model to see the low res image and then creates detailed crops of input images as 512px squares based on the input image size. Each of the detailed crops uses twice the token budget (65 tokens) for a total of 129 tokens.',
},
],
default: 'auto',
},
{
displayName: 'Message',
name: 'message',
type: 'string',
required: true,
displayOptions: {
hide: {
messageType: ['imageBinary', 'imageUrl'],
},
},
default: '',
},
],
},
],
},
{
displayName: `Connect an <a data-action='openSelectiveNodeCreator' data-action-parameter-connectiontype='${NodeConnectionType.AiOutputParser}'>output parser</a> on the canvas to specify the output format you require`,
name: 'notice',
type: 'notice',
default: '',
displayOptions: {
show: {
hasOutputParser: [true],
},
},
},
],
properties: nodeProperties,
};
/**
* Main execution method for the node
*/
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
this.logger.debug('Executing LLM Chain');
this.logger.debug('Executing Basic LLM Chain');
const items = this.getInputData();
const returnData: INodeExecutionData[] = [];
// Process each input item
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
try {
let prompt: string;
// Get the language model
const llm = (await this.getInputConnectionData(
NodeConnectionType.AiLanguageModel,
0,
)) as BaseLanguageModel;
const outputParsers = await getOptionalOutputParsers(this);
// Get output parser if configured
const outputParser = await getOptionalOutputParser(this);
// Get user prompt based on node version
let prompt: string;
if (this.getNode().typeVersion <= 1.3) {
prompt = this.getNodeParameter('prompt', itemIndex) as string;
} else {
@@ -544,47 +93,37 @@ export class ChainLlm implements INodeType {
promptTypeKey: 'promptType',
});
}
const messages = this.getNodeParameter(
'messages.messageValues',
itemIndex,
[],
) as MessagesTemplate[];
// Validate prompt
if (prompt === undefined) {
throw new NodeOperationError(this.getNode(), "The 'prompt' parameter is empty.");
}
const responses = await getChain(this, itemIndex, prompt, llm, outputParsers, messages);
// Get chat messages if configured
const messages = this.getNodeParameter(
'messages.messageValues',
itemIndex,
[],
) as MessageTemplate[];
// Execute the chain
const responses = await executeChain({
context: this,
itemIndex,
query: prompt,
llm,
outputParser,
messages,
});
// Process each response and add to return data
responses.forEach((response) => {
let data: IDataObject;
if (typeof response === 'string') {
data = {
response: {
text: response.trim(),
},
};
} else if (Array.isArray(response)) {
data = {
data: response,
};
} else if (response instanceof Object) {
data = response as IDataObject;
} else {
data = {
response: {
text: response,
},
};
}
returnData.push({
json: data,
json: formatResponse(response),
});
});
} catch (error) {
// If the error is an OpenAI's rate limit error, we want to handle it differently
// because OpenAI has multiple different rate limit errors
// Handle OpenAI specific rate limit errors
if (error instanceof NodeApiError && isOpenAiError(error.cause)) {
const openAiErrorCode: string | undefined = (error.cause as any).error?.code;
if (openAiErrorCode) {
@@ -595,6 +134,7 @@ export class ChainLlm implements INodeType {
}
}
// Continue on failure if configured
if (this.continueOnFail()) {
returnData.push({ json: { error: error.message }, pairedItem: { item: itemIndex } });
continue;