mirror of
https://github.com/Abdulazizzn/n8n-enterprise-unlocked.git
synced 2025-12-17 10:02:05 +00:00
fix(Token Splitter Node): Prevent tiktoken blocking on repetitive content (#16769)
This commit is contained in:
@@ -258,3 +258,50 @@ export function unwrapNestedOutput(output: Record<string, unknown>): Record<stri
|
||||
export function nodeNameToToolName(node: INode): string {
|
||||
return node.name.replace(/[\s.?!=+#@&*()[\]{}:;,<>\/\\'"^%$]/g, '_').replace(/_+/g, '_');
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if a text contains a character that repeats sequentially for a specified threshold.
|
||||
* This is used to prevent performance issues with tiktoken on highly repetitive content.
|
||||
* @param text The text to check
|
||||
* @param threshold The minimum number of sequential repeats to detect (default: 1000)
|
||||
* @returns true if a character repeats sequentially for at least the threshold amount
|
||||
*/
|
||||
export function hasLongSequentialRepeat(text: string, threshold = 1000): boolean {
|
||||
try {
|
||||
// Validate inputs
|
||||
if (
|
||||
text === null ||
|
||||
typeof text !== 'string' ||
|
||||
text.length === 0 ||
|
||||
threshold <= 0 ||
|
||||
text.length < threshold
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
// Use string iterator to avoid creating array copy (memory efficient)
|
||||
const iterator = text[Symbol.iterator]();
|
||||
let prev = iterator.next();
|
||||
|
||||
if (prev.done) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let count = 1;
|
||||
for (const char of iterator) {
|
||||
if (char === prev.value) {
|
||||
count++;
|
||||
if (count >= threshold) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
count = 1;
|
||||
prev = { value: char, done: false };
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
// On any error, return false to allow normal processing
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user