feat(core): Dedupe (#10101)

Co-authored-by: Jan Oberhauser <jan@n8n.io>
Co-authored-by: Giulio Andreini <g.andreini@gmail.com>
Co-authored-by: Tomi Turtiainen <10324676+tomi@users.noreply.github.com>
Co-authored-by: Elias Meire <elias@meire.dev>
This commit is contained in:
Shireen Missi
2024-10-10 16:12:05 +01:00
committed by GitHub
parent 6823e8f2dd
commit 52dd2c7619
32 changed files with 2298 additions and 233 deletions

View File

@@ -102,6 +102,13 @@ import type {
EnsureTypeOptions,
SSHTunnelFunctions,
SchedulingFunctions,
DeduplicationHelperFunctions,
IDeduplicationOutput,
IDeduplicationOutputItems,
ICheckProcessedOptions,
DeduplicationScope,
DeduplicationItemTypes,
ICheckProcessedContextData,
AiEvent,
} from 'n8n-workflow';
import {
@@ -149,6 +156,7 @@ import {
UM_EMAIL_TEMPLATES_PWRESET,
} from './Constants';
import { createNodeAsTool } from './CreateNodeAsTool';
import { DataDeduplicationService } from './data-deduplication-service';
import {
getAllWorkflowExecutionMetadata,
getWorkflowExecutionMetadata,
@@ -1284,6 +1292,72 @@ async function prepareBinaryData(
return await setBinaryDataBuffer(returnData, binaryData, workflowId, executionId);
}
export async function checkProcessedAndRecord(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutput> {
return await DataDeduplicationService.getInstance().checkProcessedAndRecord(
items,
scope,
contextData,
options,
);
}
export async function checkProcessedItemsAndRecord(
key: string,
items: IDataObject[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutputItems> {
return await DataDeduplicationService.getInstance().checkProcessedItemsAndRecord(
key,
items,
scope,
contextData,
options,
);
}
export async function removeProcessed(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<void> {
return await DataDeduplicationService.getInstance().removeProcessed(
items,
scope,
contextData,
options,
);
}
export async function clearAllProcessedItems(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<void> {
return await DataDeduplicationService.getInstance().clearAllProcessedItems(
scope,
contextData,
options,
);
}
export async function getProcessedDataCount(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<number> {
return await DataDeduplicationService.getInstance().getProcessedDataCount(
scope,
contextData,
options,
);
}
function applyPaginationRequestData(
requestData: IRequestOptions,
paginationRequestData: PaginationOptions['request'],
@@ -3453,6 +3527,52 @@ const getBinaryHelperFunctions = (
},
});
const getCheckProcessedHelperFunctions = (
workflow: Workflow,
node: INode,
): DeduplicationHelperFunctions => ({
async checkProcessedAndRecord(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutput> {
return await checkProcessedAndRecord(items, scope, { node, workflow }, options);
},
async checkProcessedItemsAndRecord(
propertyName: string,
items: IDataObject[],
scope: DeduplicationScope,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutputItems> {
return await checkProcessedItemsAndRecord(
propertyName,
items,
scope,
{ node, workflow },
options,
);
},
async removeProcessed(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
options: ICheckProcessedOptions,
): Promise<void> {
return await removeProcessed(items, scope, { node, workflow }, options);
},
async clearAllProcessedItems(
scope: DeduplicationScope,
options: ICheckProcessedOptions,
): Promise<void> {
return await clearAllProcessedItems(scope, { node, workflow }, options);
},
async getProcessedDataCount(
scope: DeduplicationScope,
options: ICheckProcessedOptions,
): Promise<number> {
return await getProcessedDataCount(scope, { node, workflow }, options);
},
});
/**
* Returns a copy of the items which only contains the json data and
* of that only the defined properties
@@ -3896,6 +4016,7 @@ export function getExecuteFunctions(
...getSSHTunnelFunctions(),
...getFileSystemHelperFunctions(node),
...getBinaryHelperFunctions(additionalData, workflow.id),
...getCheckProcessedHelperFunctions(workflow, node),
assertBinaryData: (itemIndex, propertyName) =>
assertBinaryData(inputData, node, itemIndex, propertyName, 0),
getBinaryDataBuffer: async (itemIndex, propertyName) =>

View File

@@ -0,0 +1,124 @@
import get from 'lodash/get';
import type {
IDataDeduplicator,
ICheckProcessedOptions,
IDeduplicationOutput,
IDeduplicationOutputItems,
IDataObject,
DeduplicationScope,
DeduplicationItemTypes,
ICheckProcessedContextData,
} from 'n8n-workflow';
import * as assert from 'node:assert/strict';
/**
* A singleton service responsible for data deduplication.
* This service wraps around the IDataDeduplicator interface and provides methods to handle
* deduplication-related operations such as checking, recording, and clearing processed data.
*/
export class DataDeduplicationService {
private static instance: DataDeduplicationService;
private deduplicator: IDataDeduplicator;
private constructor(deduplicator: IDataDeduplicator) {
this.deduplicator = deduplicator;
}
private assertDeduplicator() {
assert.ok(
this.deduplicator,
'Manager needs to initialized before use. Make sure to call init()',
);
}
private static assertInstance() {
assert.ok(
DataDeduplicationService.instance,
'Instance needs to initialized before use. Make sure to call init()',
);
}
private static assertSingleInstance() {
assert.ok(
!DataDeduplicationService.instance,
'Instance already initialized. Multiple initializations are not allowed.',
);
}
static async init(deduplicator: IDataDeduplicator): Promise<void> {
this.assertSingleInstance();
DataDeduplicationService.instance = new DataDeduplicationService(deduplicator);
}
static getInstance(): DataDeduplicationService {
this.assertInstance();
return DataDeduplicationService.instance;
}
async checkProcessedItemsAndRecord(
propertyName: string,
items: IDataObject[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutputItems> {
this.assertDeduplicator();
let value;
const itemLookup = items.reduce((acc, cur, index) => {
value = JSON.stringify(get(cur, propertyName));
acc[value ? value.toString() : ''] = index;
return acc;
}, {});
const checkedItems = await this.deduplicator.checkProcessedAndRecord(
Object.keys(itemLookup),
scope,
contextData,
options,
);
return {
new: checkedItems.new.map((key) => items[itemLookup[key] as number]),
processed: checkedItems.processed.map((key) => items[itemLookup[key] as number]),
};
}
async checkProcessedAndRecord(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutput> {
this.assertDeduplicator();
return await this.deduplicator.checkProcessedAndRecord(items, scope, contextData, options);
}
async removeProcessed(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<void> {
this.assertDeduplicator();
return await this.deduplicator.removeProcessed(items, scope, contextData, options);
}
async clearAllProcessedItems(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<void> {
this.assertDeduplicator();
return await this.deduplicator.clearAllProcessedItems(scope, contextData, options);
}
async getProcessedDataCount(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<number> {
this.assertDeduplicator();
return await this.deduplicator.getProcessedDataCount(scope, contextData, options);
}
}

View File

@@ -14,6 +14,7 @@ export { InstanceSettings, InstanceType } from './InstanceSettings';
export * from './NodeExecuteFunctions';
export * from './WorkflowExecute';
export { NodeExecuteFunctions };
export * from './data-deduplication-service';
export * from './errors';
export { ObjectStoreService } from './ObjectStore/ObjectStore.service.ee';
export { BinaryData } from './BinaryData/types';