mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-17 01:56:46 +00:00
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311)
Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
@@ -9,8 +9,9 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { ChatAnthropic } from '@langchain/anthropic';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import type { LLMResult } from '@langchain/core/outputs';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
const modelField: INodeProperties = {
|
||||
displayName: 'Model',
|
||||
@@ -166,6 +167,17 @@ export class LmChatAnthropic implements INodeType {
|
||||
topP: number;
|
||||
};
|
||||
|
||||
const tokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
|
||||
const usage = (llmOutput?.usage as { input_tokens: number; output_tokens: number }) ?? {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
};
|
||||
return {
|
||||
completionTokens: usage.output_tokens,
|
||||
promptTokens: usage.input_tokens,
|
||||
totalTokens: usage.input_tokens + usage.output_tokens,
|
||||
};
|
||||
};
|
||||
const model = new ChatAnthropic({
|
||||
anthropicApiKey: credentials.apiKey as string,
|
||||
modelName,
|
||||
@@ -173,10 +185,11 @@ export class LmChatAnthropic implements INodeType {
|
||||
temperature: options.temperature,
|
||||
topK: options.topK,
|
||||
topP: options.topP,
|
||||
callbacks: [new N8nLlmTracing(this, { tokensUsageParser })],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@ import {
|
||||
|
||||
import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
|
||||
import { ChatOllama } from '@langchain/community/chat_models/ollama';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { ollamaModel, ollamaOptions, ollamaDescription } from '../LMOllama/description';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatOllama implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -62,10 +62,11 @@ export class LmChatOllama implements INodeType {
|
||||
baseUrl: credentials.baseUrl as string,
|
||||
model: modelName,
|
||||
format: options.format === 'default' ? undefined : options.format,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { ChatOpenAI, type ClientOptions } from '@langchain/openai';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatOpenAi implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -247,6 +247,7 @@ export class LmChatOpenAi implements INodeType {
|
||||
timeout: options.timeout ?? 60000,
|
||||
maxRetries: options.maxRetries ?? 2,
|
||||
configuration,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
modelKwargs: options.responseFormat
|
||||
? {
|
||||
response_format: { type: options.responseFormat },
|
||||
@@ -255,7 +256,7 @@ export class LmChatOpenAi implements INodeType {
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { Cohere } from '@langchain/cohere';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmCohere implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -97,10 +97,11 @@ export class LmCohere implements INodeType {
|
||||
const model = new Cohere({
|
||||
apiKey: credentials.apiKey as string,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { Ollama } from '@langchain/community/llms/ollama';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
import { ollamaDescription, ollamaModel, ollamaOptions } from './description';
|
||||
|
||||
export class LmOllama implements INodeType {
|
||||
@@ -60,10 +60,11 @@ export class LmOllama implements INodeType {
|
||||
baseUrl: credentials.baseUrl as string,
|
||||
model: modelName,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ import type {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { OpenAI, type ClientOptions } from '@langchain/openai';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
type LmOpenAiOptions = {
|
||||
baseURL?: string;
|
||||
@@ -240,10 +240,11 @@ export class LmOpenAi implements INodeType {
|
||||
configuration,
|
||||
timeout: options.timeout ?? 60000,
|
||||
maxRetries: options.maxRetries ?? 2,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { HuggingFaceInference } from '@langchain/community/llms/hf';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmOpenHuggingFaceInference implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -141,10 +141,11 @@ export class LmOpenHuggingFaceInference implements INodeType {
|
||||
model: modelName,
|
||||
apiKey: credentials.apiKey as string,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,12 +7,12 @@ import {
|
||||
type SupplyData,
|
||||
} from 'n8n-workflow';
|
||||
import { BedrockChat } from '@langchain/community/chat_models/bedrock';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
// Dependencies needed underneath the hood. We add them
|
||||
// here only to track where what dependency is used
|
||||
import '@aws-sdk/credential-provider-node';
|
||||
import '@aws-sdk/client-bedrock-runtime';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatAwsBedrock implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -152,10 +152,11 @@ export class LmChatAwsBedrock implements INodeType {
|
||||
accessKeyId: credentials.accessKeyId as string,
|
||||
sessionToken: credentials.sessionToken as string,
|
||||
},
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ import {
|
||||
|
||||
import type { ClientOptions } from '@langchain/openai';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatAzureOpenAi implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -160,10 +160,11 @@ export class LmChatAzureOpenAi implements INodeType {
|
||||
timeout: options.timeout ?? 60000,
|
||||
maxRetries: options.maxRetries ?? 2,
|
||||
configuration,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
|
||||
import type { HarmBlockThreshold, HarmCategory, SafetySetting } from '@google/generative-ai';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
import { harmCategories, harmThresholds } from './options';
|
||||
|
||||
export class LmChatGoogleGemini implements INodeType {
|
||||
@@ -224,10 +224,11 @@ export class LmChatGoogleGemini implements INodeType {
|
||||
temperature: options.temperature,
|
||||
maxOutputTokens: options.maxOutputTokens,
|
||||
safetySettings,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,8 @@ import {
|
||||
type SupplyData,
|
||||
} from 'n8n-workflow';
|
||||
import { ChatGooglePaLM } from '@langchain/community/chat_models/googlepalm';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatGooglePalm implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -156,10 +156,11 @@ export class LmChatGooglePalm implements INodeType {
|
||||
apiKey: credentials.apiKey as string,
|
||||
modelName,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ import {
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { ChatGroq } from '@langchain/groq';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatGroq implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -142,10 +142,11 @@ export class LmChatGroq implements INodeType {
|
||||
modelName,
|
||||
maxTokens: options.maxTokensToSample,
|
||||
temperature: options.temperature,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ import {
|
||||
|
||||
import type { ChatMistralAIInput } from '@langchain/mistralai';
|
||||
import { ChatMistralAI } from '@langchain/mistralai';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmChatMistralCloud implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -188,10 +188,11 @@ export class LmChatMistralCloud implements INodeType {
|
||||
apiKey: credentials.apiKey as string,
|
||||
modelName,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,8 @@ import {
|
||||
type SupplyData,
|
||||
} from 'n8n-workflow';
|
||||
import { GooglePaLM } from '@langchain/community/llms/googlepalm';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { N8nLlmTracing } from '../N8nLlmTracing';
|
||||
|
||||
export class LmGooglePalm implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
@@ -163,10 +163,11 @@ export class LmGooglePalm implements INodeType {
|
||||
apiKey: credentials.apiKey as string,
|
||||
modelName,
|
||||
...options,
|
||||
callbacks: [new N8nLlmTracing(this)],
|
||||
});
|
||||
|
||||
return {
|
||||
response: logWrapper(model, this),
|
||||
response: model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
193
packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
Normal file
193
packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
|
||||
import { getModelNameForTiktoken } from '@langchain/core/language_models/base';
|
||||
import { encodingForModel } from '@langchain/core/utils/tiktoken';
|
||||
import type {
|
||||
Serialized,
|
||||
SerializedNotImplemented,
|
||||
SerializedSecret,
|
||||
} from '@langchain/core/load/serializable';
|
||||
import type { LLMResult } from '@langchain/core/outputs';
|
||||
import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
|
||||
import { NodeConnectionType } from 'n8n-workflow';
|
||||
import { pick } from 'lodash';
|
||||
import type { BaseMessage } from '@langchain/core/messages';
|
||||
import type { SerializedFields } from '@langchain/core/dist/load/map_keys';
|
||||
import { logAiEvent } from '../../utils/helpers';
|
||||
|
||||
type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
|
||||
completionTokens: number;
|
||||
promptTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
|
||||
type LastInput = {
|
||||
index: number;
|
||||
messages: BaseMessage[] | string[] | string;
|
||||
options: SerializedSecret | SerializedNotImplemented | SerializedFields;
|
||||
};
|
||||
|
||||
const TIKTOKEN_ESTIMATE_MODEL = 'gpt-3.5-turbo';
|
||||
export class N8nLlmTracing extends BaseCallbackHandler {
|
||||
name = 'N8nLlmTracing';
|
||||
|
||||
executionFunctions: IExecuteFunctions;
|
||||
|
||||
connectionType = NodeConnectionType.AiLanguageModel;
|
||||
|
||||
promptTokensEstimate = 0;
|
||||
|
||||
completionTokensEstimate = 0;
|
||||
|
||||
lastInput: LastInput = {
|
||||
index: 0,
|
||||
messages: [],
|
||||
options: {},
|
||||
};
|
||||
|
||||
options = {
|
||||
// Default(OpenAI format) parser
|
||||
tokensUsageParser: (llmOutput: LLMResult['llmOutput']) => {
|
||||
const completionTokens = (llmOutput?.tokenUsage?.completionTokens as number) ?? 0;
|
||||
const promptTokens = (llmOutput?.tokenUsage?.promptTokens as number) ?? 0;
|
||||
|
||||
return {
|
||||
completionTokens,
|
||||
promptTokens,
|
||||
totalTokens: completionTokens + promptTokens,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
constructor(
|
||||
executionFunctions: IExecuteFunctions,
|
||||
options?: { tokensUsageParser: TokensUsageParser },
|
||||
) {
|
||||
super();
|
||||
this.executionFunctions = executionFunctions;
|
||||
this.options = { ...this.options, ...options };
|
||||
}
|
||||
|
||||
async estimateTokensFromGeneration(generations: LLMResult['generations']) {
|
||||
const messages = generations.flatMap((gen) => gen.map((g) => g.text));
|
||||
return await this.estimateTokensFromStringList(messages);
|
||||
}
|
||||
|
||||
async estimateTokensFromStringList(list: string[]) {
|
||||
const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
|
||||
const encoder = await encodingForModel(embeddingModel);
|
||||
|
||||
const encodedListLength = await Promise.all(
|
||||
list.map(async (text) => encoder.encode(text).length),
|
||||
);
|
||||
|
||||
return encodedListLength.reduce((acc, curr) => acc + curr, 0);
|
||||
}
|
||||
|
||||
async handleLLMEnd(output: LLMResult) {
|
||||
output.generations = output.generations.map((gen) =>
|
||||
gen.map((g) => pick(g, ['text', 'generationInfo'])),
|
||||
);
|
||||
|
||||
const tokenUsageEstimate = {
|
||||
completionTokens: 0,
|
||||
promptTokens: 0,
|
||||
totalTokens: 0,
|
||||
};
|
||||
const tokenUsage = this.options.tokensUsageParser(output.llmOutput);
|
||||
|
||||
if (output.generations.length > 0) {
|
||||
tokenUsageEstimate.completionTokens = await this.estimateTokensFromGeneration(
|
||||
output.generations,
|
||||
);
|
||||
|
||||
tokenUsageEstimate.promptTokens = this.promptTokensEstimate;
|
||||
tokenUsageEstimate.totalTokens =
|
||||
tokenUsageEstimate.completionTokens + this.promptTokensEstimate;
|
||||
}
|
||||
const response: {
|
||||
response: { generations: LLMResult['generations'] };
|
||||
tokenUsageEstimate?: typeof tokenUsageEstimate;
|
||||
tokenUsage?: typeof tokenUsage;
|
||||
} = {
|
||||
response: { generations: output.generations },
|
||||
};
|
||||
|
||||
// If the LLM response contains actual tokens usage, otherwise fallback to the estimate
|
||||
if (tokenUsage.completionTokens > 0) {
|
||||
response.tokenUsage = tokenUsage;
|
||||
} else {
|
||||
response.tokenUsageEstimate = tokenUsageEstimate;
|
||||
}
|
||||
|
||||
const parsedMessages =
|
||||
typeof this.lastInput.messages === 'string'
|
||||
? this.lastInput.messages
|
||||
: this.lastInput.messages.map((message) => {
|
||||
if (typeof message === 'string') return message;
|
||||
if (typeof message?.toJSON === 'function') return message.toJSON();
|
||||
|
||||
return message;
|
||||
});
|
||||
|
||||
this.executionFunctions.addOutputData(this.connectionType, this.lastInput.index, [
|
||||
[{ json: { ...response } }],
|
||||
]);
|
||||
void logAiEvent(this.executionFunctions, 'n8n.ai.llm.generated', {
|
||||
messages: parsedMessages,
|
||||
options: this.lastInput.options,
|
||||
response,
|
||||
});
|
||||
}
|
||||
|
||||
async handleLLMStart(llm: Serialized, prompts: string[]) {
|
||||
const estimatedTokens = await this.estimateTokensFromStringList(prompts);
|
||||
|
||||
const options = llm.type === 'constructor' ? llm.kwargs : llm;
|
||||
const { index } = this.executionFunctions.addInputData(
|
||||
this.connectionType,
|
||||
[
|
||||
[
|
||||
{
|
||||
json: {
|
||||
messages: prompts,
|
||||
estimatedTokens,
|
||||
options,
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
this.lastInput.index + 1,
|
||||
);
|
||||
|
||||
// Save the last input for later use when processing `handleLLMEnd` event
|
||||
this.lastInput = {
|
||||
index,
|
||||
options,
|
||||
messages: prompts,
|
||||
};
|
||||
this.promptTokensEstimate = estimatedTokens;
|
||||
}
|
||||
|
||||
async handleLLMError(
|
||||
error: IDataObject | Error,
|
||||
runId: string,
|
||||
parentRunId?: string | undefined,
|
||||
) {
|
||||
// Filter out non-x- headers to avoid leaking sensitive information in logs
|
||||
if (typeof error === 'object' && error?.hasOwnProperty('headers')) {
|
||||
const errorWithHeaders = error as { headers: Record<string, unknown> };
|
||||
|
||||
Object.keys(errorWithHeaders.headers).forEach((key) => {
|
||||
if (!key.startsWith('x-')) {
|
||||
delete errorWithHeaders.headers[key];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void logAiEvent(this.executionFunctions, 'n8n.ai.llm.error', {
|
||||
error: Object.keys(error).length === 0 ? error.toString() : error,
|
||||
runId,
|
||||
parentRunId,
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user