fix(core): Fix crashed execution recovery in scaling mode (#19412)

This commit is contained in:
Tomi Turtiainen
2025-09-15 13:25:37 +03:00
committed by GitHub
parent 267a62d9c1
commit 3576443a01
2 changed files with 36 additions and 9 deletions

View File

@@ -1,5 +1,6 @@
import { mockLogger, mockInstance } from '@n8n/backend-test-utils';
import { GlobalConfig } from '@n8n/config';
import type { ExecutionRepository } from '@n8n/db';
import { Container } from '@n8n/di';
import * as BullModule from 'bull';
import { mock } from 'jest-mock-extended';
@@ -54,6 +55,7 @@ describe('ScalingService', () => {
const instanceSettings = Container.get(InstanceSettings);
const jobProcessor = mock<JobProcessor>();
const executionRepository = mock<ExecutionRepository>();
let scalingService: ScalingService;
@@ -85,7 +87,7 @@ describe('ScalingService', () => {
mock(),
jobProcessor,
globalConfig,
mock(),
executionRepository,
instanceSettings,
mock(),
);
@@ -359,4 +361,36 @@ describe('ScalingService', () => {
});
});
});
describe('recoverFromQueue', () => {
it('should mark running executions as crashed if they are missing from the queue and queue is empty', async () => {
await scalingService.setupQueue();
executionRepository.getInProgressExecutionIds.mockResolvedValue(['123']);
queue.getJobs.mockResolvedValue([]);
await scalingService.recoverFromQueue();
expect(executionRepository.markAsCrashed).toHaveBeenCalledWith(['123']);
});
it('should mark running executions as crashed if they are missing from the queue and queue is not empty', async () => {
await scalingService.setupQueue();
executionRepository.getInProgressExecutionIds.mockResolvedValue(['123']);
queue.getJobs.mockResolvedValue([mock<Job>({ data: { executionId: '321' } })]);
await scalingService.recoverFromQueue();
expect(executionRepository.markAsCrashed).toHaveBeenCalledWith(['123']);
});
it('should not mark running executions as crashed if they are present in the queue', async () => {
await scalingService.setupQueue();
executionRepository.getInProgressExecutionIds.mockResolvedValue(['123']);
queue.getJobs.mockResolvedValue([mock<Job>({ data: { executionId: '123' } })]);
await scalingService.recoverFromQueue();
expect(executionRepository.markAsCrashed).not.toHaveBeenCalled();
});
});
});

View File

@@ -480,7 +480,7 @@ export class ScalingService {
* Mark in-progress executions as `crashed` if stored in DB as `new` or `running`
* but absent from the queue. Return time until next recovery cycle.
*/
private async recoverFromQueue() {
async recoverFromQueue() {
const { waitMs, batchSize } = this.queueRecoveryContext;
const storedIds = await this.executionRepository.getInProgressExecutionIds(batchSize);
@@ -491,16 +491,9 @@ export class ScalingService {
}
const runningJobs = await this.findJobsByStatus(['active', 'waiting']);
const queuedIds = new Set(runningJobs.map((job) => job.data.executionId));
if (queuedIds.size === 0) {
this.logger.debug('Completed queue recovery check, no dangling executions');
return waitMs;
}
const danglingIds = storedIds.filter((id) => !queuedIds.has(id));
if (danglingIds.length === 0) {
this.logger.debug('Completed queue recovery check, no dangling executions');
return waitMs;