mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-17 01:56:46 +00:00
fix(Token Splitter Node): Prevent tiktoken blocking on repetitive content (#16769)
This commit is contained in:
@@ -13,7 +13,7 @@ import type { IDataObject, ISupplyDataFunctions, JsonObject } from 'n8n-workflow
|
||||
import { NodeConnectionTypes, NodeError, NodeOperationError } from 'n8n-workflow';
|
||||
|
||||
import { logAiEvent } from '@utils/helpers';
|
||||
import { encodingForModel } from '@utils/tokenizer/tiktoken';
|
||||
import { estimateTokensFromStringList } from '@utils/tokenizer/token-estimator';
|
||||
|
||||
type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
|
||||
completionTokens: number;
|
||||
@@ -84,13 +84,7 @@ export class N8nLlmTracing extends BaseCallbackHandler {
|
||||
|
||||
async estimateTokensFromStringList(list: string[]) {
|
||||
const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
|
||||
const encoder = await encodingForModel(embeddingModel);
|
||||
|
||||
const encodedListLength = await Promise.all(
|
||||
list.map(async (text) => encoder.encode(text).length),
|
||||
);
|
||||
|
||||
return encodedListLength.reduce((acc, curr) => acc + curr, 0);
|
||||
return await estimateTokensFromStringList(list, embeddingModel);
|
||||
}
|
||||
|
||||
async handleLLMEnd(output: LLMResult, runId: string) {
|
||||
|
||||
Reference in New Issue
Block a user