feat(core): Dedupe (#10101)

Co-authored-by: Jan Oberhauser <jan@n8n.io>
Co-authored-by: Giulio Andreini <g.andreini@gmail.com>
Co-authored-by: Tomi Turtiainen <10324676+tomi@users.noreply.github.com>
Co-authored-by: Elias Meire <elias@meire.dev>
This commit is contained in:
Shireen Missi
2024-10-10 16:12:05 +01:00
committed by GitHub
parent 6823e8f2dd
commit 52dd2c7619
32 changed files with 2298 additions and 233 deletions

View File

@@ -1,234 +1,25 @@
import get from 'lodash/get';
import isEqual from 'lodash/isEqual';
import lt from 'lodash/lt';
import pick from 'lodash/pick';
import {
NodeOperationError,
NodeConnectionType,
type IExecuteFunctions,
type INodeExecutionData,
type INodeType,
type INodeTypeDescription,
} from 'n8n-workflow';
import { prepareFieldsArray } from '../utils/utils';
import { validateInputData } from './utils';
import { compareItems, flattenKeys } from '@utils/utilities';
import type { INodeTypeBaseDescription, IVersionedNodeType } from 'n8n-workflow';
import { VersionedNodeType } from 'n8n-workflow';
export class RemoveDuplicates implements INodeType {
description: INodeTypeDescription = {
displayName: 'Remove Duplicates',
name: 'removeDuplicates',
icon: 'file:removeDuplicates.svg',
group: ['transform'],
subtitle: '',
version: [1, 1.1],
description: 'Delete items with matching field values',
defaults: {
name: 'Remove Duplicates',
},
inputs: [NodeConnectionType.Main],
outputs: [NodeConnectionType.Main],
properties: [
{
displayName: 'Compare',
name: 'compare',
type: 'options',
options: [
{
name: 'All Fields',
value: 'allFields',
},
{
name: 'All Fields Except',
value: 'allFieldsExcept',
},
{
name: 'Selected Fields',
value: 'selectedFields',
},
],
default: 'allFields',
description: 'The fields of the input items to compare to see if they are the same',
},
{
displayName: 'Fields To Exclude',
name: 'fieldsToExclude',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to exclude from the comparison',
default: '',
displayOptions: {
show: {
compare: ['allFieldsExcept'],
},
},
},
{
displayName: 'Fields To Compare',
name: 'fieldsToCompare',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to add to the comparison',
default: '',
displayOptions: {
show: {
compare: ['selectedFields'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Field',
default: {},
displayOptions: {
show: {
compare: ['allFieldsExcept', 'selectedFields'],
},
},
options: [
{
displayName: 'Disable Dot Notation',
name: 'disableDotNotation',
type: 'boolean',
default: false,
description:
'Whether to disallow referencing child fields using `parent.child` in the field name',
},
{
displayName: 'Remove Other Fields',
name: 'removeOtherFields',
type: 'boolean',
default: false,
description:
'Whether to remove any fields that are not being compared. If disabled, will keep the values from the first of the duplicates.',
},
],
},
],
};
import { RemoveDuplicatesV1 } from './v1/RemoveDuplicatesV1.node';
import { RemoveDuplicatesV2 } from './v2/RemoveDuplicatesV2.node';
export class RemoveDuplicates extends VersionedNodeType {
constructor() {
const baseDescription: INodeTypeBaseDescription = {
displayName: 'Remove Duplicates',
name: 'removeDuplicates',
icon: 'file:removeDuplicates.svg',
group: ['transform'],
defaultVersion: 2,
description: 'Delete items with matching field values',
};
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
const items = this.getInputData();
const compare = this.getNodeParameter('compare', 0) as string;
const disableDotNotation = this.getNodeParameter(
'options.disableDotNotation',
0,
false,
) as boolean;
const removeOtherFields = this.getNodeParameter(
'options.removeOtherFields',
0,
false,
) as boolean;
const nodeVersions: IVersionedNodeType['nodeVersions'] = {
1: new RemoveDuplicatesV1(baseDescription),
1.1: new RemoveDuplicatesV1(baseDescription),
2: new RemoveDuplicatesV2(baseDescription),
};
let keys = disableDotNotation
? Object.keys(items[0].json)
: Object.keys(flattenKeys(items[0].json));
for (const item of items) {
for (const key of disableDotNotation
? Object.keys(item.json)
: Object.keys(flattenKeys(item.json))) {
if (!keys.includes(key)) {
keys.push(key);
}
}
}
if (compare === 'allFieldsExcept') {
const fieldsToExclude = prepareFieldsArray(
this.getNodeParameter('fieldsToExclude', 0, '') as string,
'Fields To Exclude',
);
if (!fieldsToExclude.length) {
throw new NodeOperationError(
this.getNode(),
'No fields specified. Please add a field to exclude from comparison',
);
}
if (!disableDotNotation) {
keys = Object.keys(flattenKeys(items[0].json));
}
keys = keys.filter((key) => !fieldsToExclude.includes(key));
}
if (compare === 'selectedFields') {
const fieldsToCompare = prepareFieldsArray(
this.getNodeParameter('fieldsToCompare', 0, '') as string,
'Fields To Compare',
);
if (!fieldsToCompare.length) {
throw new NodeOperationError(
this.getNode(),
'No fields specified. Please add a field to compare on',
);
}
if (!disableDotNotation) {
keys = Object.keys(flattenKeys(items[0].json));
}
keys = fieldsToCompare.map((key) => key.trim());
}
// This solution is O(nlogn)
// add original index to the items
const newItems = items.map(
(item, index) =>
({
json: { ...item.json, __INDEX: index },
pairedItem: { item: index },
}) as INodeExecutionData,
);
//sort items using the compare keys
newItems.sort((a, b) => {
let result = 0;
for (const key of keys) {
let equal;
if (!disableDotNotation) {
equal = isEqual(get(a.json, key), get(b.json, key));
} else {
equal = isEqual(a.json[key], b.json[key]);
}
if (!equal) {
let lessThan;
if (!disableDotNotation) {
lessThan = lt(get(a.json, key), get(b.json, key));
} else {
lessThan = lt(a.json[key], b.json[key]);
}
result = lessThan ? -1 : 1;
break;
}
}
return result;
});
validateInputData(this.getNode(), newItems, keys, disableDotNotation);
// collect the original indexes of items to be removed
const removedIndexes: number[] = [];
let temp = newItems[0];
for (let index = 1; index < newItems.length; index++) {
if (compareItems(newItems[index], temp, keys, disableDotNotation)) {
removedIndexes.push(newItems[index].json.__INDEX as unknown as number);
} else {
temp = newItems[index];
}
}
let returnData = items.filter((_, index) => !removedIndexes.includes(index));
if (removeOtherFields) {
returnData = returnData.map((item, index) => ({
json: pick(item.json, ...keys),
pairedItem: { item: index },
}));
}
return [returnData];
super(nodeVersions, baseDescription);
}
}

View File

@@ -1,7 +1,9 @@
import type { INode } from 'n8n-workflow';
import { validateInputData } from '../utils';
import { testWorkflows, getWorkflowFilenames } from '@test/nodes/Helpers';
import { validateInputData } from '../utils';
const workflows = getWorkflowFilenames(__dirname);
describe('Test Remove Duplicates Node', () => testWorkflows(workflows));

View File

@@ -1,5 +1,11 @@
import { isEqual, lt, pick } from 'lodash';
import get from 'lodash/get';
import { NodeOperationError, type INode, type INodeExecutionData } from 'n8n-workflow';
import { NodeOperationError } from 'n8n-workflow';
import type { IExecuteFunctions, INode, INodeExecutionData } from 'n8n-workflow';
import { compareItems, flattenKeys } from '@utils/utilities';
import { prepareFieldsArray } from '../utils/utils';
export const validateInputData = (
node: INode,
@@ -39,3 +45,124 @@ export const validateInputData = (
}
}
};
export function removeDuplicateInputItems(context: IExecuteFunctions, items: INodeExecutionData[]) {
const compare = context.getNodeParameter('compare', 0) as string;
const disableDotNotation = context.getNodeParameter(
'options.disableDotNotation',
0,
false,
) as boolean;
const removeOtherFields = context.getNodeParameter(
'options.removeOtherFields',
0,
false,
) as boolean;
let keys = disableDotNotation
? Object.keys(items[0].json)
: Object.keys(flattenKeys(items[0].json));
for (const item of items) {
const itemKeys = disableDotNotation
? Object.keys(item.json)
: Object.keys(flattenKeys(item.json));
for (const key of itemKeys) {
if (!keys.includes(key)) {
keys.push(key);
}
}
}
if (compare === 'allFieldsExcept') {
const fieldsToExclude = prepareFieldsArray(
context.getNodeParameter('fieldsToExclude', 0, '') as string,
'Fields To Exclude',
);
if (!fieldsToExclude.length) {
throw new NodeOperationError(
context.getNode(),
'No fields specified. Please add a field to exclude from comparison',
);
}
if (!disableDotNotation) {
keys = Object.keys(flattenKeys(items[0].json));
}
keys = keys.filter((key) => !fieldsToExclude.includes(key));
}
if (compare === 'selectedFields') {
const fieldsToCompare = prepareFieldsArray(
context.getNodeParameter('fieldsToCompare', 0, '') as string,
'Fields To Compare',
);
if (!fieldsToCompare.length) {
throw new NodeOperationError(
context.getNode(),
'No fields specified. Please add a field to compare on',
);
}
if (!disableDotNotation) {
keys = Object.keys(flattenKeys(items[0].json));
}
keys = fieldsToCompare.map((key) => key.trim());
}
// This solution is O(nlogn)
// add original index to the items
const newItems = items.map(
(item, index) =>
({
json: { ...item.json, __INDEX: index },
pairedItem: { item: index },
}) as INodeExecutionData,
);
//sort items using the compare keys
newItems.sort((a, b) => {
let result = 0;
for (const key of keys) {
let equal;
if (!disableDotNotation) {
equal = isEqual(get(a.json, key), get(b.json, key));
} else {
equal = isEqual(a.json[key], b.json[key]);
}
if (!equal) {
let lessThan;
if (!disableDotNotation) {
lessThan = lt(get(a.json, key), get(b.json, key));
} else {
lessThan = lt(a.json[key], b.json[key]);
}
result = lessThan ? -1 : 1;
break;
}
}
return result;
});
validateInputData(context.getNode(), newItems, keys, disableDotNotation);
// collect the original indexes of items to be removed
const removedIndexes: number[] = [];
let temp = newItems[0];
for (let index = 1; index < newItems.length; index++) {
if (compareItems(newItems[index], temp, keys, disableDotNotation)) {
removedIndexes.push(newItems[index].json.__INDEX as unknown as number);
} else {
temp = newItems[index];
}
}
let updatedItems: INodeExecutionData[] = items.filter(
(_, index) => !removedIndexes.includes(index),
);
if (removeOtherFields) {
updatedItems = updatedItems.map((item, index) => ({
json: pick(item.json, ...keys),
pairedItem: { item: index },
}));
}
return [updatedItems];
}

View File

@@ -0,0 +1,122 @@
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
import { NodeConnectionType } from 'n8n-workflow';
import type {
INodeTypeBaseDescription,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
} from 'n8n-workflow';
import { removeDuplicateInputItems } from '../utils';
const versionDescription: INodeTypeDescription = {
displayName: 'Remove Duplicates',
name: 'removeDuplicates',
icon: 'file:removeDuplicates.svg',
group: ['transform'],
subtitle: '',
version: [1, 1.1],
description: 'Delete items with matching field values',
defaults: {
name: 'Remove Duplicates',
},
inputs: [NodeConnectionType.Main],
outputs: [NodeConnectionType.Main],
properties: [
{
displayName: 'Compare',
name: 'compare',
type: 'options',
options: [
{
name: 'All Fields',
value: 'allFields',
},
{
name: 'All Fields Except',
value: 'allFieldsExcept',
},
{
name: 'Selected Fields',
value: 'selectedFields',
},
],
default: 'allFields',
description: 'The fields of the input items to compare to see if they are the same',
},
{
displayName: 'Fields To Exclude',
name: 'fieldsToExclude',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to exclude from the comparison',
default: '',
displayOptions: {
show: {
compare: ['allFieldsExcept'],
},
},
},
{
displayName: 'Fields To Compare',
name: 'fieldsToCompare',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to add to the comparison',
default: '',
displayOptions: {
show: {
compare: ['selectedFields'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Field',
default: {},
displayOptions: {
show: {
compare: ['allFieldsExcept', 'selectedFields'],
},
},
options: [
{
displayName: 'Disable Dot Notation',
name: 'disableDotNotation',
type: 'boolean',
default: false,
description:
'Whether to disallow referencing child fields using `parent.child` in the field name',
},
{
displayName: 'Remove Other Fields',
name: 'removeOtherFields',
type: 'boolean',
default: false,
description:
'Whether to remove any fields that are not being compared. If disabled, will keep the values from the first of the duplicates.',
},
],
},
],
};
export class RemoveDuplicatesV1 implements INodeType {
description: INodeTypeDescription;
constructor(baseDescription: INodeTypeBaseDescription) {
this.description = {
...baseDescription,
...versionDescription,
};
}
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
const items = this.getInputData();
return removeDuplicateInputItems(this, items);
}
}

View File

@@ -0,0 +1,278 @@
import type { INodeProperties } from 'n8n-workflow';
const operationOptions = [
{
name: 'Remove Items Repeated Within Current Input',
value: 'removeDuplicateInputItems',
description: 'Remove duplicates from incoming items',
action: 'Remove items repeated within current input',
},
{
name: 'Remove Items Processed in Previous Executions',
value: 'removeItemsSeenInPreviousExecutions',
description: 'Deduplicate items already seen in previous executions',
action: 'Remove items processed in previous executions',
},
{
name: 'Clear Deduplication History',
value: 'clearDeduplicationHistory',
description: 'Wipe the store of previous items',
action: 'Clear deduplication history',
},
];
const compareOptions = [
{
name: 'All Fields',
value: 'allFields',
},
{
name: 'All Fields Except',
value: 'allFieldsExcept',
},
{
name: 'Selected Fields',
value: 'selectedFields',
},
];
const logicOptions = [
{
name: 'Value Is New',
value: 'removeItemsWithAlreadySeenKeyValues',
description: 'Remove all input items with values matching those already processed',
},
{
name: 'Value Is Higher than Any Previous Value',
value: 'removeItemsUpToStoredIncrementalKey',
description:
'Works with incremental values, removes all input items with values up to the stored value',
},
{
name: 'Value Is a Date Later than Any Previous Date',
value: 'removeItemsUpToStoredDate',
description:
'Works with date values, removes all input items with values up to the stored date',
},
];
const manageDatabaseModeOptions = [
{
name: 'Clean Database',
value: 'cleanDatabase',
description: 'Clear all values stored for a key in the database',
},
];
export const removeDuplicatesNodeFields: INodeProperties[] = [
{
displayName: 'Operation',
name: 'operation',
type: 'options',
noDataExpression: true,
options: operationOptions,
default: 'removeDuplicateInputItems',
},
{
displayName: 'Compare',
name: 'compare',
type: 'options',
options: compareOptions,
default: 'allFields',
description: 'The fields of the input items to compare to see if they are the same',
displayOptions: {
show: {
operation: ['removeDuplicateInputItems'],
},
},
},
{
displayName: 'Fields To Exclude',
name: 'fieldsToExclude',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to exclude from the comparison',
default: '',
displayOptions: {
show: {
compare: ['allFieldsExcept'],
},
},
},
{
displayName: 'Fields To Compare',
name: 'fieldsToCompare',
type: 'string',
placeholder: 'e.g. email, name',
requiresDataPath: 'multiple',
description: 'Fields in the input to add to the comparison',
default: '',
displayOptions: {
show: {
compare: ['selectedFields'],
},
},
},
// ----------------------------------
{
displayName: 'Keep Items Where',
name: 'logic',
type: 'options',
noDataExpression: true,
options: logicOptions,
default: 'removeItemsWithAlreadySeenKeyValues',
description:
'How to select input items to remove by comparing them with key values previously processed',
displayOptions: {
show: {
operation: ['removeItemsSeenInPreviousExecutions'],
},
},
},
{
displayName: 'Value to Dedupe On',
name: 'dedupeValue',
type: 'string',
default: '',
description: 'Use an input field (or a combination of fields) that has a unique ID value',
hint: 'The input field value to compare between items',
placeholder: 'e.g. ID',
required: true,
displayOptions: {
show: {
logic: ['removeItemsWithAlreadySeenKeyValues'],
'/operation': ['removeItemsSeenInPreviousExecutions'],
},
},
},
{
displayName: 'Value to Dedupe On',
name: 'incrementalDedupeValue',
type: 'number',
default: '',
description: 'Use an input field (or a combination of fields) that has an incremental value',
hint: 'The input field value to compare between items, an incremental value is expected',
placeholder: 'e.g. ID',
displayOptions: {
show: {
logic: ['removeItemsUpToStoredIncrementalKey'],
'/operation': ['removeItemsSeenInPreviousExecutions'],
},
},
},
{
displayName: 'Value to Dedupe On',
name: 'dateDedupeValue',
type: 'dateTime',
default: '',
description: 'Use an input field that has a date value in ISO format',
hint: 'The input field value to compare between items, a date is expected',
placeholder: ' e.g. 2024-08-09T13:44:16Z',
displayOptions: {
show: {
logic: ['removeItemsUpToStoredDate'],
'/operation': ['removeItemsSeenInPreviousExecutions'],
},
},
},
{
displayName: 'Mode',
name: 'mode',
type: 'options',
default: 'cleanDatabase',
description:
'How you want to modify the key values stored on the database. None of these modes removes input items.',
displayOptions: {
show: {
operation: ['clearDeduplicationHistory'],
},
},
options: manageDatabaseModeOptions,
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Field',
default: {},
displayOptions: {
show: {
operation: [
'removeDuplicateInputItems',
'removeItemsSeenInPreviousExecutions',
'clearDeduplicationHistory',
],
},
},
options: [
{
displayName: 'Disable Dot Notation',
name: 'disableDotNotation',
type: 'boolean',
default: false,
displayOptions: {
show: {
'/operation': ['removeDuplicateInputItems'],
},
hide: {
'/compare': ['allFields'],
},
},
description:
'Whether to disallow referencing child fields using `parent.child` in the field name',
},
{
displayName: 'Remove Other Fields',
name: 'removeOtherFields',
type: 'boolean',
default: false,
displayOptions: {
show: {
'/operation': ['removeDuplicateInputItems'],
},
hide: {
'/compare': ['allFields'],
},
},
description:
'Whether to remove any fields that are not being compared. If disabled, will keep the values from the first of the duplicates.',
},
{
displayName: 'Scope',
name: 'scope',
type: 'options',
default: 'node',
displayOptions: {
show: {
'/operation': ['clearDeduplicationHistory', 'removeItemsSeenInPreviousExecutions'],
},
},
description:
'If set to workflow, key values will be shared across all nodes in the workflow. If set to node, key values will be specific to this node.',
options: [
{
name: 'Workflow',
value: 'workflow',
description: 'Deduplication info will be shared by all the nodes in the workflow',
},
{
name: 'Node',
value: 'node',
description: 'Deduplication info will be stored only for this node',
},
],
},
{
displayName: 'History Size',
name: 'historySize',
type: 'number',
default: 10000,
hint: 'The max number of past items to store for deduplication',
displayOptions: {
show: {
'/logic': ['removeItemsWithAlreadySeenKeyValues'],
'/operation': ['removeItemsSeenInPreviousExecutions'],
},
},
},
],
},
];

View File

@@ -0,0 +1,277 @@
import {
NodeConnectionType,
NodeExecutionOutput,
NodeOperationError,
tryToParseDateTime,
} from 'n8n-workflow';
import type {
INodeTypeBaseDescription,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
DeduplicationScope,
} from 'n8n-workflow';
import { removeDuplicatesNodeFields } from './RemoveDuplicatesV2.description';
import { removeDuplicateInputItems } from '../utils';
const versionDescription: INodeTypeDescription = {
displayName: 'Remove Duplicates',
name: 'removeDuplicates',
icon: 'file:removeDuplicates.svg',
group: ['transform'],
subtitle: '',
version: [2],
description: 'Delete items with matching field values',
defaults: {
name: 'Remove Duplicates',
},
inputs: [NodeConnectionType.Main],
outputs: [NodeConnectionType.Main],
outputNames: ['Kept', 'Discarded'],
hints: [
{
message: 'The dedupe key set in “Value to Dedupe On” has no value',
displayCondition:
'={{ $parameter["operation"] === "removeItemsSeenInPreviousExecutions" && ($parameter["logic"] === "removeItemsWithAlreadySeenKeyValues" && $parameter["dedupeValue"] === undefined) || ($parameter["logic"] === "removeItemsUpToStoredIncrementalKey" && $parameter["incrementalDedupeValue"] === undefined) || ($parameter["logic"] === "removeItemsUpToStoredDate" && $parameter["dateDedupeValue"] === undefined) }}',
whenToDisplay: 'beforeExecution',
location: 'outputPane',
},
],
properties: [...removeDuplicatesNodeFields],
};
export class RemoveDuplicatesV2 implements INodeType {
description: INodeTypeDescription;
constructor(baseDescription: INodeTypeBaseDescription) {
this.description = {
...baseDescription,
...versionDescription,
};
}
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
const items = this.getInputData();
const operation = this.getNodeParameter('operation', 0);
const returnData: INodeExecutionData[][] = [];
const DEFAULT_MAX_ENTRIES = 10000;
try {
switch (operation) {
case 'removeDuplicateInputItems': {
return removeDuplicateInputItems(this, items);
}
case 'removeItemsSeenInPreviousExecutions': {
const logic = this.getNodeParameter('logic', 0);
const scope = this.getNodeParameter('options.scope', 0, 'node') as DeduplicationScope;
if (logic === 'removeItemsWithAlreadySeenKeyValues') {
if (!['node', 'workflow'].includes(scope)) {
throw new NodeOperationError(
this.getNode(),
`The scope '${scope}' is not supported. Please select either "node" or "workflow".`,
);
}
let checkValue: string;
const itemMapping: {
[key: string]: INodeExecutionData[];
} = {};
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
checkValue = this.getNodeParameter('dedupeValue', itemIndex, '')?.toString() ?? '';
if (itemMapping[checkValue]) {
itemMapping[checkValue].push(items[itemIndex]);
} else {
itemMapping[checkValue] = [items[itemIndex]];
}
}
const maxEntries = this.getNodeParameter(
'options.historySize',
0,
DEFAULT_MAX_ENTRIES,
) as number;
const maxEntriesNum = Number(maxEntries);
const currentProcessedDataCount = await this.helpers.getProcessedDataCount(scope, {
mode: 'entries',
maxEntries,
});
if (currentProcessedDataCount + items.length > maxEntriesNum) {
throw new NodeOperationError(
this.getNode(),
'The number of items to be processed exceeds the maximum history size. Please increase the history size or reduce the number of items to be processed.',
);
}
const itemsProcessed = await this.helpers.checkProcessedAndRecord(
Object.keys(itemMapping),
scope,
{ mode: 'entries', maxEntries },
);
const processedDataCount = await this.helpers.getProcessedDataCount(scope, {
mode: 'entries',
maxEntries,
});
returnData.push(
itemsProcessed.new
.map((key) => {
return itemMapping[key];
})
.flat(),
itemsProcessed.processed
.map((key) => {
return itemMapping[key];
})
.flat(),
);
if (maxEntriesNum > 0 && processedDataCount / maxEntriesNum > 0.5) {
return new NodeExecutionOutput(returnData, [
{
message: `Some duplicates may be not be removed since you're approaching the maximum history size (${maxEntriesNum} items). You can raise this limit using the history size option.`,
location: 'outputPane',
},
]);
} else return returnData;
} else if (logic === 'removeItemsUpToStoredIncrementalKey') {
if (!['node', 'workflow'].includes(scope)) {
throw new NodeOperationError(
this.getNode(),
`The scope '${scope}' is not supported. Please select either "node" or "workflow".`,
);
}
let parsedIncrementalKey: number;
const itemMapping: {
[key: string]: INodeExecutionData[];
} = {};
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
const incrementalKey = this.getNodeParameter('incrementalDedupeValue', itemIndex, '');
if (!incrementalKey?.toString()) {
throw new NodeOperationError(
this.getNode(),
'The `Value to Dedupe` On is empty. Please provide a value.',
);
}
parsedIncrementalKey = Number(incrementalKey);
if (isNaN(parsedIncrementalKey)) {
throw new NodeOperationError(
this.getNode(),
`The value '${incrementalKey}' is not a number. Please provide a number.`,
);
}
if (itemMapping[parsedIncrementalKey]) {
itemMapping[parsedIncrementalKey].push(items[itemIndex]);
} else {
itemMapping[parsedIncrementalKey] = [items[itemIndex]];
}
}
const itemsProcessed = await this.helpers.checkProcessedAndRecord(
Object.keys(itemMapping),
scope,
{ mode: 'latestIncrementalKey' },
);
returnData.push(
itemsProcessed.new
.map((key) => {
return itemMapping[key];
})
.flat(),
itemsProcessed.processed
.map((key) => {
return itemMapping[key];
})
.flat(),
);
return returnData;
} else if (logic === 'removeItemsUpToStoredDate') {
if (!['node', 'workflow'].includes(scope)) {
throw new NodeOperationError(
this.getNode(),
`The scope '${scope}' is not supported. Please select either "node" or "workflow".`,
);
}
let checkValue: string;
const itemMapping: {
[key: string]: INodeExecutionData[];
} = {};
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
checkValue =
this.getNodeParameter('dateDedupeValue', itemIndex, '')?.toString() ?? '';
if (!checkValue) {
throw new NodeOperationError(
this.getNode(),
'The `Value to Dedupe` On is empty. Please provide a value.',
);
}
try {
tryToParseDateTime(checkValue);
} catch (error) {
throw new NodeOperationError(
this.getNode(),
`The value '${checkValue}' is not a valid date. Please provide a valid date.`,
);
}
if (itemMapping[checkValue]) {
itemMapping[checkValue].push(items[itemIndex]);
} else {
itemMapping[checkValue] = [items[itemIndex]];
}
}
const itemsProcessed = await this.helpers.checkProcessedAndRecord(
Object.keys(itemMapping),
scope,
{ mode: 'latestDate' },
);
returnData.push(
itemsProcessed.new
.map((key) => {
return itemMapping[key];
})
.flat(),
itemsProcessed.processed
.map((key) => {
return itemMapping[key];
})
.flat(),
);
return returnData;
} else {
return [items];
}
}
case 'clearDeduplicationHistory': {
const mode = this.getNodeParameter('mode', 0) as string;
if (mode === 'updateKeyValuesInDatabase') {
} else if (mode === 'deleteKeyValuesFromDatabase') {
} else if (mode === 'cleanDatabase') {
const scope = this.getNodeParameter('options.scope', 0, 'node') as DeduplicationScope;
await this.helpers.clearAllProcessedItems(scope, {
mode: 'entries',
});
}
return [items];
}
default: {
return [items];
}
}
} catch (error) {
if (this.continueOnFail()) {
returnData.push([{ json: this.getInputData(0)[0].json, error }]);
} else {
throw error;
}
}
return returnData;
}
}

View File

@@ -0,0 +1,131 @@
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
import { mock } from 'jest-mock-extended';
import type { IExecuteFunctions, INodeExecutionData, INodeTypeBaseDescription } from 'n8n-workflow';
import { RemoveDuplicatesV2 } from '../RemoveDuplicatesV2.node';
describe('RemoveDuplicatesV2', () => {
let node: RemoveDuplicatesV2;
let executeFunctions: IExecuteFunctions;
beforeEach(() => {
const baseDescription: INodeTypeBaseDescription = {
displayName: 'Remove Duplicates',
name: 'removeDuplicates',
icon: 'file:removeDuplicates.svg',
group: ['transform'],
description: 'Delete items with matching field values',
};
node = new RemoveDuplicatesV2(baseDescription);
executeFunctions = mock<IExecuteFunctions>();
executeFunctions.helpers = {
checkProcessedAndRecord: jest.fn(),
clearAllProcessedItems: jest.fn(),
} as any;
executeFunctions.getInputData = jest.fn();
executeFunctions.getNodeParameter = jest.fn();
});
it('should Remove items repeated within current input based on all fields', async () => {
const items: INodeExecutionData[] = [
{ json: { id: 1, name: 'John' } },
{ json: { id: 2, name: 'Jane' } },
{ json: { id: 1, name: 'John' } },
];
(executeFunctions.getInputData as jest.Mock<any>).mockReturnValue(items);
(executeFunctions.getNodeParameter as jest.Mock<any, any>).mockImplementation(
(paramName: string) => {
if (paramName === 'operation') return 'removeDuplicateInputItems';
if (paramName === 'compare') return 'allFields';
return undefined;
},
);
const result = await node.execute.call(executeFunctions);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(2);
expect(result[0][0].json).toEqual({ id: 1, name: 'John' });
expect(result[0][1].json).toEqual({ id: 2, name: 'Jane' });
});
it('should Remove items repeated within current input based on selected fields', async () => {
const items: INodeExecutionData[] = [
{ json: { id: 1, name: 'John' } },
{ json: { id: 2, name: 'Jane' } },
{ json: { id: 1, name: 'Doe' } },
];
(executeFunctions.getInputData as jest.Mock<any, any>).mockReturnValue(items);
(executeFunctions.getNodeParameter as jest.Mock<any, any>).mockImplementation(
(paramName: string) => {
if (paramName === 'operation') return 'removeDuplicateInputItems';
if (paramName === 'compare') return 'selectedFields';
if (paramName === 'fieldsToCompare') return 'id';
return undefined;
},
);
const result = await node.execute.call(executeFunctions);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(2);
expect(result[0][0].json).toEqual({ id: 1, name: 'John' });
expect(result[0][1].json).toEqual({ id: 2, name: 'Jane' });
});
it('should remove items seen in previous executions', async () => {
const items: INodeExecutionData[] = [
{ json: { id: 1, name: 'John' } },
{ json: { id: 2, name: 'Jane' } },
{ json: { id: 3, name: 'Doe' } },
];
(executeFunctions.getInputData as jest.Mock<any, any>).mockReturnValue(items);
(executeFunctions.getNodeParameter as jest.Mock<any, any>).mockImplementation(
(paramName: string, itemIndex: number) => {
if (paramName === 'operation') return 'removeItemsSeenInPreviousExecutions';
if (paramName === 'logic') return 'removeItemsWithAlreadySeenKeyValues';
if (paramName === 'dedupeValue' && itemIndex === 0) return 1;
if (paramName === 'dedupeValue' && itemIndex === 1) return 2;
if (paramName === 'dedupeValue' && itemIndex === 2) return 3;
if (paramName === 'options.scope') return 'node';
if (paramName === 'options.historySize') return 10;
},
);
executeFunctions.helpers.getProcessedDataCount = jest.fn().mockReturnValue(3);
(executeFunctions.helpers.checkProcessedAndRecord as jest.Mock).mockReturnValue({
new: [1, 3],
processed: [2],
});
const result = await node.execute.call(executeFunctions);
expect(result).toHaveLength(2);
expect(result[0]).toHaveLength(2);
expect(result[1]).toHaveLength(1);
expect(result[0][0].json).toEqual({ id: 1, name: 'John' });
expect(result[0][1].json).toEqual({ id: 3, name: 'Doe' });
});
it('should clean database when managing key values', async () => {
const items: INodeExecutionData[] = [
{ json: { id: 1, name: 'John' } },
{ json: { id: 2, name: 'Jane' } },
];
(executeFunctions.getInputData as jest.Mock<any, any>).mockReturnValue(items);
(executeFunctions.getNodeParameter as jest.Mock<any, any>).mockImplementation(
(paramName: string) => {
if (paramName === 'operation') return 'clearDeduplicationHistory';
if (paramName === 'mode') return 'cleanDatabase';
if (paramName === 'options.scope') return 'node';
return undefined;
},
);
const result = await node.execute.call(executeFunctions);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(2);
expect(result[0][0].json).toEqual({ id: 1, name: 'John' });
expect(result[0][1].json).toEqual({ id: 2, name: 'Jane' });
});
});