fix(Token Splitter Node): Prevent tiktoken blocking on repetitive content (#16769)

This commit is contained in:
oleg
2025-06-27 16:08:14 +02:00
committed by GitHub
parent edf0fec444
commit c5ec056eb5
7 changed files with 812 additions and 27 deletions

View File

@@ -13,7 +13,7 @@ import type { IDataObject, ISupplyDataFunctions, JsonObject } from 'n8n-workflow
import { NodeConnectionTypes, NodeError, NodeOperationError } from 'n8n-workflow';
import { logAiEvent } from '@utils/helpers';
import { encodingForModel } from '@utils/tokenizer/tiktoken';
import { estimateTokensFromStringList } from '@utils/tokenizer/token-estimator';
type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
completionTokens: number;
@@ -84,13 +84,7 @@ export class N8nLlmTracing extends BaseCallbackHandler {
async estimateTokensFromStringList(list: string[]) {
const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
const encoder = await encodingForModel(embeddingModel);
const encodedListLength = await Promise.all(
list.map(async (text) => encoder.encode(text).length),
);
return encodedListLength.reduce((acc, curr) => acc + curr, 0);
return await estimateTokensFromStringList(list, embeddingModel);
}
async handleLLMEnd(output: LLMResult, runId: string) {