fix(core): Handle Redis disconnects gracefully (#11007)

This commit is contained in:
Iván Ovejero
2024-09-30 16:36:27 +02:00
committed by GitHub
parent 805a1140c9
commit cd916480c2
5 changed files with 121 additions and 46 deletions

View File

@@ -173,39 +173,11 @@ export class ScalingService {
// #region Listeners
private registerListeners() {
let latestAttemptTs = 0;
let cumulativeTimeoutMs = 0;
const MAX_TIMEOUT_MS = this.globalConfig.queue.bull.redis.timeoutThreshold;
const RESET_LENGTH_MS = 30_000;
this.queue.on('error', (error: Error) => {
if ('code' in error && error.code === 'ECONNREFUSED') return; // handled by RedisClientService.retryStrategy
this.logger.error('[ScalingService] Queue errored', { error });
/**
* On Redis connection failure, try to reconnect. On every failed attempt,
* increment a cumulative timeout - if this exceeds a limit, exit the
* process. Reset the cumulative timeout if >30s between retries.
*/
if (error.message.includes('ECONNREFUSED')) {
const nowTs = Date.now();
if (nowTs - latestAttemptTs > RESET_LENGTH_MS) {
latestAttemptTs = nowTs;
cumulativeTimeoutMs = 0;
} else {
cumulativeTimeoutMs += nowTs - latestAttemptTs;
latestAttemptTs = nowTs;
if (cumulativeTimeoutMs > MAX_TIMEOUT_MS) {
this.logger.error('[ScalingService] Redis unavailable after max timeout');
this.logger.error('[ScalingService] Exiting process...');
process.exit(1);
}
}
this.logger.warn('[ScalingService] Redis unavailable - retrying to connect...');
return;
}
throw error;
});