mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-18 02:21:13 +00:00
fix(Token Splitter Node): Cache tokenizer JSONs in memory (#17201)
This commit is contained in:
@@ -1,10 +1,9 @@
|
||||
import type { TokenTextSplitterParams } from '@langchain/textsplitters';
|
||||
import { TextSplitter } from '@langchain/textsplitters';
|
||||
import type * as tiktoken from 'js-tiktoken';
|
||||
|
||||
import { hasLongSequentialRepeat } from '@utils/helpers';
|
||||
import { getEncoding } from '@utils/tokenizer/tiktoken';
|
||||
import { estimateTextSplitsByTokens } from '@utils/tokenizer/token-estimator';
|
||||
import type * as tiktoken from 'js-tiktoken';
|
||||
|
||||
/**
|
||||
* Implementation of splitter which looks at tokens.
|
||||
@@ -52,9 +51,7 @@ export class TokenTextSplitter extends TextSplitter implements TokenTextSplitter
|
||||
|
||||
// Use tiktoken for normal text
|
||||
try {
|
||||
if (!this.tokenizer) {
|
||||
this.tokenizer = await getEncoding(this.encodingName);
|
||||
}
|
||||
this.tokenizer ??= getEncoding(this.encodingName);
|
||||
|
||||
const splits: string[] = [];
|
||||
const input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);
|
||||
|
||||
Reference in New Issue
Block a user