refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
2025-12-17 01:56:46 +00:00 · 2024-05-12 21:12:07 +02:00
parent 244520547b
commit 359ade45bc
19 changed files with 282 additions and 111 deletions
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatAnthropic/LmChatAnthropic.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatAnthropic/LmChatAnthropic.node.ts
@@ -9,8 +9,9 @@ import {
 } from 'n8n-workflow';

 import { ChatAnthropic } from '@langchain/anthropic';
-import { logWrapper } from '../../../utils/logWrapper';
+import type { LLMResult } from '@langchain/core/outputs';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 const modelField: INodeProperties = {
 	displayName: 'Model',
@@ -166,6 +167,17 @@ export class LmChatAnthropic implements INodeType {
 			topP: number;
 		};

+		const tokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
+			const usage = (llmOutput?.usage as { input_tokens: number; output_tokens: number }) ?? {
+				input_tokens: 0,
+				output_tokens: 0,
+			};
+			return {
+				completionTokens: usage.output_tokens,
+				promptTokens: usage.input_tokens,
+				totalTokens: usage.input_tokens + usage.output_tokens,
+			};
+		};
 		const model = new ChatAnthropic({
 			anthropicApiKey: credentials.apiKey as string,
 			modelName,
@@ -173,10 +185,11 @@ export class LmChatAnthropic implements INodeType {
 			temperature: options.temperature,
 			topK: options.topK,
 			topP: options.topP,
+			callbacks: [new N8nLlmTracing(this, { tokensUsageParser })],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
@@ -9,9 +9,9 @@ import {

 import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
 import { ChatOllama } from '@langchain/community/chat_models/ollama';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { ollamaModel, ollamaOptions, ollamaDescription } from '../LMOllama/description';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatOllama implements INodeType {
 	description: INodeTypeDescription = {
@@ -62,10 +62,11 @@ export class LmChatOllama implements INodeType {
 			baseUrl: credentials.baseUrl as string,
 			model: modelName,
 			format: options.format === 'default' ? undefined : options.format,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOpenAi/LmChatOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOpenAi/LmChatOpenAi.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';

 import { ChatOpenAI, type ClientOptions } from '@langchain/openai';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatOpenAi implements INodeType {
 	description: INodeTypeDescription = {
@@ -247,6 +247,7 @@ export class LmChatOpenAi implements INodeType {
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
 			configuration,
+			callbacks: [new N8nLlmTracing(this)],
 			modelKwargs: options.responseFormat
 				? {
 						response_format: { type: options.responseFormat },
@@ -255,7 +256,7 @@ export class LmChatOpenAi implements INodeType {
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMCohere/LmCohere.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMCohere/LmCohere.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';

 import { Cohere } from '@langchain/cohere';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmCohere implements INodeType {
 	description: INodeTypeDescription = {
@@ -97,10 +97,11 @@ export class LmCohere implements INodeType {
 		const model = new Cohere({
 			apiKey: credentials.apiKey as string,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/LmOllama.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/LmOllama.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';

 import { Ollama } from '@langchain/community/llms/ollama';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';
 import { ollamaDescription, ollamaModel, ollamaOptions } from './description';

 export class LmOllama implements INodeType {
@@ -60,10 +60,11 @@ export class LmOllama implements INodeType {
 			baseUrl: credentials.baseUrl as string,
 			model: modelName,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOpenAi/LmOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOpenAi/LmOpenAi.node.ts
@@ -9,8 +9,8 @@ import type {
 } from 'n8n-workflow';

 import { OpenAI, type ClientOptions } from '@langchain/openai';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 type LmOpenAiOptions = {
 	baseURL?: string;
@@ -240,10 +240,11 @@ export class LmOpenAi implements INodeType {
 			configuration,
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOpenHuggingFaceInference/LmOpenHuggingFaceInference.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOpenHuggingFaceInference/LmOpenHuggingFaceInference.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';

 import { HuggingFaceInference } from '@langchain/community/llms/hf';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmOpenHuggingFaceInference implements INodeType {
 	description: INodeTypeDescription = {
@@ -141,10 +141,11 @@ export class LmOpenHuggingFaceInference implements INodeType {
 			model: modelName,
 			apiKey: credentials.apiKey as string,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatAwsBedrock/LmChatAwsBedrock.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatAwsBedrock/LmChatAwsBedrock.node.ts
@@ -7,12 +7,12 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { BedrockChat } from '@langchain/community/chat_models/bedrock';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 // Dependencies needed underneath the hood. We add them
 // here only to track where what dependency is used
 import '@aws-sdk/credential-provider-node';
 import '@aws-sdk/client-bedrock-runtime';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatAwsBedrock implements INodeType {
 	description: INodeTypeDescription = {
@@ -152,10 +152,11 @@ export class LmChatAwsBedrock implements INodeType {
 				accessKeyId: credentials.accessKeyId as string,
 				sessionToken: credentials.sessionToken as string,
 			},
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatAzureOpenAi/LmChatAzureOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatAzureOpenAi/LmChatAzureOpenAi.node.ts
@@ -9,8 +9,8 @@ import {

 import type { ClientOptions } from '@langchain/openai';
 import { ChatOpenAI } from '@langchain/openai';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatAzureOpenAi implements INodeType {
 	description: INodeTypeDescription = {
@@ -160,10 +160,11 @@ export class LmChatAzureOpenAi implements INodeType {
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
 			configuration,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGoogleGemini/LmChatGoogleGemini.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGoogleGemini/LmChatGoogleGemini.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
 import type { HarmBlockThreshold, HarmCategory, SafetySetting } from '@google/generative-ai';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';
 import { harmCategories, harmThresholds } from './options';

 export class LmChatGoogleGemini implements INodeType {
@@ -224,10 +224,11 @@ export class LmChatGoogleGemini implements INodeType {
 			temperature: options.temperature,
 			maxOutputTokens: options.maxOutputTokens,
 			safetySettings,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGooglePalm/LmChatGooglePalm.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGooglePalm/LmChatGooglePalm.node.ts
@@ -7,8 +7,8 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { ChatGooglePaLM } from '@langchain/community/chat_models/googlepalm';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatGooglePalm implements INodeType {
 	description: INodeTypeDescription = {
@@ -156,10 +156,11 @@ export class LmChatGooglePalm implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGroq/LmChatGroq.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGroq/LmChatGroq.node.ts
@@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';

 import { ChatGroq } from '@langchain/groq';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatGroq implements INodeType {
 	description: INodeTypeDescription = {
@@ -142,10 +142,11 @@ export class LmChatGroq implements INodeType {
 			modelName,
 			maxTokens: options.maxTokensToSample,
 			temperature: options.temperature,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatMistralCloud/LmChatMistralCloud.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatMistralCloud/LmChatMistralCloud.node.ts
@@ -9,8 +9,8 @@ import {

 import type { ChatMistralAIInput } from '@langchain/mistralai';
 import { ChatMistralAI } from '@langchain/mistralai';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmChatMistralCloud implements INodeType {
 	description: INodeTypeDescription = {
@@ -188,10 +188,11 @@ export class LmChatMistralCloud implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmGooglePalm/LmGooglePalm.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmGooglePalm/LmGooglePalm.node.ts
@@ -7,8 +7,8 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { GooglePaLM } from '@langchain/community/llms/googlepalm';
-import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
+import { N8nLlmTracing } from '../N8nLlmTracing';

 export class LmGooglePalm implements INodeType {
 	description: INodeTypeDescription = {
@@ -163,10 +163,11 @@ export class LmGooglePalm implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
+			callbacks: [new N8nLlmTracing(this)],
 		});

 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
@@ -0,0 +1,193 @@
+import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
+import { getModelNameForTiktoken } from '@langchain/core/language_models/base';
+import { encodingForModel } from '@langchain/core/utils/tiktoken';
+import type {
+	Serialized,
+	SerializedNotImplemented,
+	SerializedSecret,
+} from '@langchain/core/load/serializable';
+import type { LLMResult } from '@langchain/core/outputs';
+import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
+import { NodeConnectionType } from 'n8n-workflow';
+import { pick } from 'lodash';
+import type { BaseMessage } from '@langchain/core/messages';
+import type { SerializedFields } from '@langchain/core/dist/load/map_keys';
+import { logAiEvent } from '../../utils/helpers';
+
+type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
+	completionTokens: number;
+	promptTokens: number;
+	totalTokens: number;
+};
+
+type LastInput = {
+	index: number;
+	messages: BaseMessage[] | string[] | string;
+	options: SerializedSecret | SerializedNotImplemented | SerializedFields;
+};
+
+const TIKTOKEN_ESTIMATE_MODEL = 'gpt-3.5-turbo';
+export class N8nLlmTracing extends BaseCallbackHandler {
+	name = 'N8nLlmTracing';
+
+	executionFunctions: IExecuteFunctions;
+
+	connectionType = NodeConnectionType.AiLanguageModel;
+
+	promptTokensEstimate = 0;
+
+	completionTokensEstimate = 0;
+
+	lastInput: LastInput = {
+		index: 0,
+		messages: [],
+		options: {},
+	};
+
+	options = {
+		// Default(OpenAI format) parser
+		tokensUsageParser: (llmOutput: LLMResult['llmOutput']) => {
+			const completionTokens = (llmOutput?.tokenUsage?.completionTokens as number) ?? 0;
+			const promptTokens = (llmOutput?.tokenUsage?.promptTokens as number) ?? 0;
+
+			return {
+				completionTokens,
+				promptTokens,
+				totalTokens: completionTokens + promptTokens,
+			};
+		},
+	};
+
+	constructor(
+		executionFunctions: IExecuteFunctions,
+		options?: { tokensUsageParser: TokensUsageParser },
+	) {
+		super();
+		this.executionFunctions = executionFunctions;
+		this.options = { ...this.options, ...options };
+	}
+
+	async estimateTokensFromGeneration(generations: LLMResult['generations']) {
+		const messages = generations.flatMap((gen) => gen.map((g) => g.text));
+		return await this.estimateTokensFromStringList(messages);
+	}
+
+	async estimateTokensFromStringList(list: string[]) {
+		const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
+		const encoder = await encodingForModel(embeddingModel);
+
+		const encodedListLength = await Promise.all(
+			list.map(async (text) => encoder.encode(text).length),
+		);
+
+		return encodedListLength.reduce((acc, curr) => acc + curr, 0);
+	}
+
+	async handleLLMEnd(output: LLMResult) {
+		output.generations = output.generations.map((gen) =>
+			gen.map((g) => pick(g, ['text', 'generationInfo'])),
+		);
+
+		const tokenUsageEstimate = {
+			completionTokens: 0,
+			promptTokens: 0,
+			totalTokens: 0,
+		};
+		const tokenUsage = this.options.tokensUsageParser(output.llmOutput);
+
+		if (output.generations.length > 0) {
+			tokenUsageEstimate.completionTokens = await this.estimateTokensFromGeneration(
+				output.generations,
+			);
+
+			tokenUsageEstimate.promptTokens = this.promptTokensEstimate;
+			tokenUsageEstimate.totalTokens =
+				tokenUsageEstimate.completionTokens + this.promptTokensEstimate;
+		}
+		const response: {
+			response: { generations: LLMResult['generations'] };
+			tokenUsageEstimate?: typeof tokenUsageEstimate;
+			tokenUsage?: typeof tokenUsage;
+		} = {
+			response: { generations: output.generations },
+		};
+
+		// If the LLM response contains actual tokens usage, otherwise fallback to the estimate
+		if (tokenUsage.completionTokens > 0) {
+			response.tokenUsage = tokenUsage;
+		} else {
+			response.tokenUsageEstimate = tokenUsageEstimate;
+		}
+
+		const parsedMessages =
+			typeof this.lastInput.messages === 'string'
+				? this.lastInput.messages
+				: this.lastInput.messages.map((message) => {
+						if (typeof message === 'string') return message;
+						if (typeof message?.toJSON === 'function') return message.toJSON();
+
+						return message;
+					});
+
+		this.executionFunctions.addOutputData(this.connectionType, this.lastInput.index, [
+			[{ json: { ...response } }],
+		]);
+		void logAiEvent(this.executionFunctions, 'n8n.ai.llm.generated', {
+			messages: parsedMessages,
+			options: this.lastInput.options,
+			response,
+		});
+	}
+
+	async handleLLMStart(llm: Serialized, prompts: string[]) {
+		const estimatedTokens = await this.estimateTokensFromStringList(prompts);
+
+		const options = llm.type === 'constructor' ? llm.kwargs : llm;
+		const { index } = this.executionFunctions.addInputData(
+			this.connectionType,
+			[
+				[
+					{
+						json: {
+							messages: prompts,
+							estimatedTokens,
+							options,
+						},
+					},
+				],
+			],
+			this.lastInput.index + 1,
+		);
+
+		// Save the last input for later use when processing `handleLLMEnd` event
+		this.lastInput = {
+			index,
+			options,
+			messages: prompts,
+		};
+		this.promptTokensEstimate = estimatedTokens;
+	}
+
+	async handleLLMError(
+		error: IDataObject | Error,
+		runId: string,
+		parentRunId?: string | undefined,
+	) {
+		// Filter out non-x- headers to avoid leaking sensitive information in logs
+		if (typeof error === 'object' && error?.hasOwnProperty('headers')) {
+			const errorWithHeaders = error as { headers: Record<string, unknown> };
+
+			Object.keys(errorWithHeaders.headers).forEach((key) => {
+				if (!key.startsWith('x-')) {
+					delete errorWithHeaders.headers[key];
+				}
+			});
+		}
+
+		void logAiEvent(this.executionFunctions, 'n8n.ai.llm.error', {
+			error: Object.keys(error).length === 0 ? error.toString() : error,
+			runId,
+			parentRunId,
+		});
+	}
+}