refactor(core): Implement soft-deletions for executions (#7092)

Based on #7065 | Story: https://linear.app/n8n/issue/PAY-771

n8n on filesystem mode marks binary data to delete on manual execution
deletion, on unsaved execution completion, and on every execution
pruning cycle. We later prune binary data in a separate cycle via these
marker files, based on the configured TTL. In the context of introducing
an S3 client to manage binary data, the filesystem mode's mark-and-prune
setup is too tightly coupled to the general binary data management
client interface.

This PR...
- Ensures the deletion of an execution causes the deletion of any binary
data associated to it. This does away with the need for binary data TTL
and simplifies the filesystem mode's mark-and-prune setup.
- Refactors all execution deletions (including pruning) to cause soft
deletions, hard-deletes soft-deleted executions based on the existing
pruning config, and adjusts execution endpoints to filter out
soft-deleted executions. This reduces DB load, and keeps binary data
around long enough for users to access it when building workflows with
unsaved executions.
- Moves all execution pruning work from an execution lifecycle hook to
`execution.repository.ts`. This keeps related logic in a single place.
- Removes all marking logic from the binary data manager. This
simplifies the interface that the S3 client will meet.
- Adds basic sanity-check tests to pruning logic and execution deletion.

Out of scope:

- Improving existing pruning logic.
- Improving existing execution repository logic.
- Adjusting dir structure for filesystem mode.

---------

Co-authored-by: कारतोफ्फेलस्क्रिप्ट™ <aditya@netroy.in>
This commit is contained in:
Iván Ovejero
2023-09-20 15:21:42 +02:00
committed by GitHub
parent 09a7cf0980
commit cd08c8e4c6
36 changed files with 411 additions and 253 deletions

View File

@@ -9,7 +9,7 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { BinaryDataManager, UserSettings, WorkflowExecute } from 'n8n-core';
import { UserSettings, WorkflowExecute } from 'n8n-core';
import type {
IDataObject,
@@ -37,21 +37,16 @@ import {
} from 'n8n-workflow';
import { Container } from 'typedi';
import type { FindOptionsWhere } from 'typeorm';
import { LessThanOrEqual, In } from 'typeorm';
import { DateUtils } from 'typeorm/util/DateUtils';
import config from '@/config';
import * as Db from '@/Db';
import { ActiveExecutions } from '@/ActiveExecutions';
import { CredentialsHelper } from '@/CredentialsHelper';
import { ExternalHooks } from '@/ExternalHooks';
import type {
IExecutionDb,
IExecutionFlattedDb,
IPushDataExecutionFinished,
IWorkflowExecuteProcess,
IWorkflowExecutionDataProcess,
IWorkflowErrorData,
ExecutionPayload,
} from '@/Interfaces';
import { NodeTypes } from '@/NodeTypes';
import { Push } from '@/push';
@@ -184,77 +179,6 @@ export function executeErrorWorkflow(
}
}
/**
* Prunes Saved Execution which are older than configured.
* Throttled to be executed just once in configured timeframe.
* TODO: Consider moving this whole function to the repository or at least the queries
*/
let throttling = false;
async function pruneExecutionData(this: WorkflowHooks): Promise<void> {
if (!throttling) {
Logger.verbose('Pruning execution data from database');
throttling = true;
const timeout = config.getEnv('executions.pruneDataTimeout'); // in seconds
const maxAge = config.getEnv('executions.pruneDataMaxAge'); // in h
const maxCount = config.getEnv('executions.pruneDataMaxCount');
const date = new Date(); // today
date.setHours(date.getHours() - maxAge);
// date reformatting needed - see https://github.com/typeorm/typeorm/issues/2286
const utcDate = DateUtils.mixedDateToUtcDatetimeString(date);
const toPrune: Array<FindOptionsWhere<IExecutionFlattedDb>> = [
{ stoppedAt: LessThanOrEqual(utcDate) },
];
if (maxCount > 0) {
const executions = await Db.collections.Execution.find({
select: ['id'],
skip: maxCount,
take: 1,
order: { id: 'DESC' },
});
if (executions[0]) {
toPrune.push({ id: LessThanOrEqual(executions[0].id) });
}
}
try {
setTimeout(() => {
throttling = false;
}, timeout * 1000);
let executionIds: Array<IExecutionFlattedDb['id']>;
do {
executionIds = (
await Db.collections.Execution.find({
select: ['id'],
where: toPrune,
take: 100,
})
).map(({ id }) => id);
await Db.collections.Execution.delete({ id: In(executionIds) });
// Mark binary data for deletion for all executions
await BinaryDataManager.getInstance().markDataForDeletionByExecutionIds(executionIds);
} while (executionIds.length > 0);
} catch (error) {
ErrorReporter.error(error);
throttling = false;
Logger.error(
`Failed pruning execution data from database for execution ID ${this.executionId} (hookFunctionsSave)`,
{
...error,
executionId: this.executionId,
sessionId: this.sessionId,
workflowId: this.workflowData.id,
},
);
}
}
}
/**
* Returns hook functions to push data to Editor-UI
*
@@ -522,11 +446,6 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
workflowId: this.workflowData.id,
});
// Prune old execution data
if (config.getEnv('executions.pruneData')) {
await pruneExecutionData.call(this);
}
const isManualMode = [this.mode, parentProcessMode].includes('manual');
try {
@@ -554,8 +473,7 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
}
if (isManualMode && !saveManualExecutions && !fullRunData.waitTill) {
// Data is always saved, so we remove from database
await Container.get(ExecutionRepository).deleteExecution(this.executionId, true);
await Container.get(ExecutionRepository).softDelete(this.executionId);
return;
}
@@ -586,8 +504,7 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
this.executionId,
this.retryOf,
);
// Data is always saved, so we remove from database
await Container.get(ExecutionRepository).deleteExecution(this.executionId);
await Container.get(ExecutionRepository).softDelete(this.executionId);
return;
}
@@ -682,11 +599,6 @@ function hookFunctionsSaveWorker(): IWorkflowExecuteHooks {
workflowId: this.workflowData.id,
});
try {
// Prune old execution data
if (config.getEnv('executions.pruneData')) {
await pruneExecutionData.call(this);
}
if (isWorkflowIdValid(this.workflowData.id) && newStaticData) {
// Workflow is saved so update in database
try {
@@ -973,7 +885,7 @@ async function executeWorkflow(
// Therefore, database might not contain finished errors.
// Force an update to db as there should be no harm doing this
const fullExecutionData: IExecutionDb = {
const fullExecutionData: ExecutionPayload = {
data: fullRunData.data,
mode: fullRunData.mode,
finished: fullRunData.finished ? fullRunData.finished : false,