fix(core): Handle Redis disconnects gracefully (#11007)

This commit is contained in:
Iván Ovejero
2024-09-30 16:36:27 +02:00
committed by GitHub
parent 805a1140c9
commit cd916480c2
5 changed files with 121 additions and 46 deletions

View File

@@ -24,8 +24,6 @@ export class Publisher {
if (config.getEnv('executions.mode') !== 'queue') return;
this.client = this.redisClientService.createClient({ type: 'publisher(n8n)' });
this.client.on('error', (error) => this.logger.error(error.message));
}
getClient() {

View File

@@ -27,8 +27,6 @@ export class Subscriber {
this.client = this.redisClientService.createClient({ type: 'subscriber(n8n)' });
this.client.on('error', (error) => this.logger.error(error.message));
this.client.on('message', (channel: PubSub.Channel, message) => {
this.handlers.get(channel)?.(message);
});

View File

@@ -173,39 +173,11 @@ export class ScalingService {
// #region Listeners
private registerListeners() {
let latestAttemptTs = 0;
let cumulativeTimeoutMs = 0;
const MAX_TIMEOUT_MS = this.globalConfig.queue.bull.redis.timeoutThreshold;
const RESET_LENGTH_MS = 30_000;
this.queue.on('error', (error: Error) => {
if ('code' in error && error.code === 'ECONNREFUSED') return; // handled by RedisClientService.retryStrategy
this.logger.error('[ScalingService] Queue errored', { error });
/**
* On Redis connection failure, try to reconnect. On every failed attempt,
* increment a cumulative timeout - if this exceeds a limit, exit the
* process. Reset the cumulative timeout if >30s between retries.
*/
if (error.message.includes('ECONNREFUSED')) {
const nowTs = Date.now();
if (nowTs - latestAttemptTs > RESET_LENGTH_MS) {
latestAttemptTs = nowTs;
cumulativeTimeoutMs = 0;
} else {
cumulativeTimeoutMs += nowTs - latestAttemptTs;
latestAttemptTs = nowTs;
if (cumulativeTimeoutMs > MAX_TIMEOUT_MS) {
this.logger.error('[ScalingService] Redis unavailable after max timeout');
this.logger.error('[ScalingService] Exiting process...');
process.exit(1);
}
}
this.logger.warn('[ScalingService] Redis unavailable - retrying to connect...');
return;
}
throw error;
});