Skip to content

Commit ba3f4b9

Browse files
committed
feat(grouper): add memory leak diagnostics logs
1 parent 9e4b6af commit ba3f4b9

File tree

2 files changed

+117
-5
lines changed

2 files changed

+117
-5
lines changed

.env.sample

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ PROMETHEUS_PUSHGATEWAY_URL=
3131
# pushgateway push interval in ms
3232
PROMETHEUS_PUSHGATEWAY_INTERVAL=10000
3333

34+
# Grouper memory log controls
35+
GROUPER_MEMORY_LOG_EVERY_TASKS=50
36+
GROUPER_MEMORY_GROWTH_WINDOW_TASKS=200
37+
GROUPER_MEMORY_GROWTH_WARN_MB=64
38+
GROUPER_MEMORY_HANDLE_GROWTH_WARN_MB=16
39+
3440
# project token for error catching
3541
HAWK_CATCHER_TOKEN=
3642

@@ -40,4 +46,4 @@ HAWK_CATCHER_TOKEN=
4046
IS_NOTIFIER_WORKER_ENABLED=false
4147

4248
## Url for telegram notifications about workspace blocks and unblocks
43-
TELEGRAM_LIMITER_CHAT_URL=
49+
TELEGRAM_LIMITER_CHAT_URL=

workers/grouper/src/index.ts

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,26 @@ const DB_DUPLICATE_KEY_ERROR = '11000';
5454
*/
5555
const MAX_CODE_LINE_LENGTH = 140;
5656

57+
const MB_IN_BYTES = 1_048_576;
58+
const HUNDRED = 100;
59+
const DEFAULT_MEMORY_LOG_EVERY_TASKS = 50;
60+
const DEFAULT_MEMORY_GROWTH_WINDOW_TASKS = 200;
61+
const DEFAULT_MEMORY_GROWTH_WARN_MB = 64;
62+
const DEFAULT_MEMORY_HANDLE_GROWTH_WARN_MB = 16;
63+
// eslint-disable-next-line @typescript-eslint/no-magic-numbers
64+
const METRICS_SIZE_BUCKETS = [100, 500, 1000, 5000, 10000, 50000, 100000, 500000];
65+
66+
function asPositiveNumber(value: string | undefined, fallback: number): number {
67+
const parsed = Number(value);
68+
69+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
70+
}
71+
72+
const MEMORY_LOG_EVERY_TASKS = asPositiveNumber(process.env.GROUPER_MEMORY_LOG_EVERY_TASKS, DEFAULT_MEMORY_LOG_EVERY_TASKS);
73+
const MEMORY_GROWTH_WINDOW_TASKS = asPositiveNumber(process.env.GROUPER_MEMORY_GROWTH_WINDOW_TASKS, DEFAULT_MEMORY_GROWTH_WINDOW_TASKS);
74+
const MEMORY_GROWTH_WARN_MB = asPositiveNumber(process.env.GROUPER_MEMORY_GROWTH_WARN_MB, DEFAULT_MEMORY_GROWTH_WARN_MB);
75+
const MEMORY_HANDLE_GROWTH_WARN_MB = asPositiveNumber(process.env.GROUPER_MEMORY_HANDLE_GROWTH_WARN_MB, DEFAULT_MEMORY_HANDLE_GROWTH_WARN_MB);
76+
5777
/**
5878
* Worker for handling Javascript events
5979
*/
@@ -115,14 +135,14 @@ export default class GrouperWorker extends Worker {
115135
private metricsDeltaSize = new client.Histogram({
116136
name: 'hawk_grouper_delta_size_bytes',
117137
help: 'Size of computed repetition delta in bytes',
118-
buckets: [100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
138+
buckets: METRICS_SIZE_BUCKETS,
119139
registers: [register],
120140
});
121141

122142
private metricsPayloadSize = new client.Histogram({
123143
name: 'hawk_grouper_payload_size_bytes',
124144
help: 'Size of incoming event payload in bytes',
125-
buckets: [100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
145+
buckets: METRICS_SIZE_BUCKETS,
126146
registers: [register],
127147
});
128148

@@ -137,6 +157,17 @@ export default class GrouperWorker extends Worker {
137157
*/
138158
private cacheCleanupInterval: NodeJS.Timeout | null = null;
139159

160+
/**
161+
* Number of handled tasks in current worker process.
162+
*/
163+
private handledTasksCount = 0;
164+
165+
/**
166+
* Baseline for memory growth checks.
167+
*/
168+
private memoryCheckpointTask = 0;
169+
private memoryCheckpointHeapUsed = 0;
170+
140171
/**
141172
* Start consuming messages
142173
*/
@@ -158,6 +189,11 @@ export default class GrouperWorker extends Worker {
158189
this.cacheCleanupInterval = setInterval(() => {
159190
this.clearCache();
160191
}, CACHE_CLEANUP_INTERVAL_MINUTES * TimeMs.MINUTE);
192+
const startupMemory = process.memoryUsage();
193+
194+
this.memoryCheckpointTask = 0;
195+
this.memoryCheckpointHeapUsed = startupMemory.heapUsed;
196+
this.logMemoryCheckpoint('startup', startupMemory, this.handledTasksCount);
161197

162198
await super.start();
163199
}
@@ -174,6 +210,7 @@ export default class GrouperWorker extends Worker {
174210
this.cacheCleanupInterval = null;
175211
}
176212

213+
this.logMemoryCheckpoint('shutdown', process.memoryUsage(), this.handledTasksCount);
177214
await super.finish();
178215
this.prepareCache();
179216
await this.eventsDb.close();
@@ -195,6 +232,7 @@ export default class GrouperWorker extends Worker {
195232
} catch (error) {
196233
endTimer();
197234
this.metricsErrorsTotal.inc();
235+
this.logMemoryCheckpoint('handle-error', process.memoryUsage(), this.handledTasksCount, `title="${task.payload?.title}"`);
198236
throw error;
199237
}
200238
}
@@ -206,9 +244,15 @@ export default class GrouperWorker extends Worker {
206244
*/
207245
private async handleInternal(task: GroupWorkerTask<ErrorsCatcherType>): Promise<void> {
208246
const taskPayloadSize = Buffer.byteLength(JSON.stringify(task.payload));
247+
const handledTasksCount = ++this.handledTasksCount;
248+
const memoryBeforeHandle = process.memoryUsage();
209249

210250
this.metricsPayloadSize.observe(taskPayloadSize);
211251

252+
if (handledTasksCount === 1 || handledTasksCount % MEMORY_LOG_EVERY_TASKS === 0) {
253+
this.logMemoryCheckpoint('before-handle', memoryBeforeHandle, handledTasksCount, `payloadSize=${taskPayloadSize}b`);
254+
}
255+
212256
this.logger.info(`[handle] project=${task.projectId} catcher=${task.catcherType} title="${task.payload.title}" payloadSize=${taskPayloadSize}b backtraceFrames=${task.payload.backtrace?.length ?? 0}`);
213257

214258
let uniqueEventHash = await this.getUniqueEventHash(task);
@@ -382,9 +426,21 @@ export default class GrouperWorker extends Worker {
382426
incrementDailyAffectedUsers
383427
);
384428

385-
const mem = process.memoryUsage();
429+
const memoryAfterHandle = process.memoryUsage();
430+
const heapDeltaBytes = memoryAfterHandle.heapUsed - memoryBeforeHandle.heapUsed;
431+
const heapDeltaMb = this.bytesToMegabytes(heapDeltaBytes);
432+
433+
this.logger.info(
434+
`[handle] done, ${this.formatMemoryUsage(memoryAfterHandle)} heapDelta=${heapDeltaMb}MB handled=${handledTasksCount}`
435+
);
436+
437+
if (heapDeltaBytes > MEMORY_HANDLE_GROWTH_WARN_MB * MB_IN_BYTES) {
438+
this.logger.warn(
439+
`[memory] high heap growth in single handle: heapDelta=${heapDeltaMb}MB payloadSize=${taskPayloadSize}b title="${task.payload.title}" project=${task.projectId}`
440+
);
441+
}
386442

387-
this.logger.info(`[handle] done, heapUsed=${Math.round(mem.heapUsed / 1024 / 1024)}MB heapTotal=${Math.round(mem.heapTotal / 1024 / 1024)}MB rss=${Math.round(mem.rss / 1024 / 1024)}MB`);
443+
this.checkMemoryGrowthWindow(memoryAfterHandle, handledTasksCount);
388444

389445
/**
390446
* Add task for NotifierWorker only if event is not ignored
@@ -640,6 +696,56 @@ export default class GrouperWorker extends Worker {
640696
return [shouldIncrementRepetitionAffectedUsers, shouldIncrementDailyAffectedUsers];
641697
}
642698

699+
/**
700+
* Logs sustained heap growth over a configurable number of handled tasks.
701+
*/
702+
private checkMemoryGrowthWindow(memoryUsage: NodeJS.MemoryUsage, handledTasksCount: number): void {
703+
const tasksInWindow = handledTasksCount - this.memoryCheckpointTask;
704+
705+
if (tasksInWindow < MEMORY_GROWTH_WINDOW_TASKS) {
706+
return;
707+
}
708+
709+
const heapGrowthBytes = memoryUsage.heapUsed - this.memoryCheckpointHeapUsed;
710+
const heapGrowthMb = this.bytesToMegabytes(heapGrowthBytes);
711+
712+
this.logger.info(
713+
`[memory] growth window tasks=${tasksInWindow} handled=${this.memoryCheckpointTask + 1}-${handledTasksCount} heapGrowth=${heapGrowthMb}MB heapUsedNow=${this.bytesToMegabytes(memoryUsage.heapUsed)}MB`
714+
);
715+
716+
if (heapGrowthBytes > MEMORY_GROWTH_WARN_MB * MB_IN_BYTES) {
717+
this.logger.warn(
718+
`[memory] possible leak detected: heap grew by ${heapGrowthMb}MB in ${tasksInWindow} handled tasks`
719+
);
720+
}
721+
722+
this.memoryCheckpointTask = handledTasksCount;
723+
this.memoryCheckpointHeapUsed = memoryUsage.heapUsed;
724+
}
725+
726+
/**
727+
* Format memory usage for consistent logs.
728+
*/
729+
private formatMemoryUsage(memoryUsage: NodeJS.MemoryUsage): string {
730+
return `heapUsed=${this.bytesToMegabytes(memoryUsage.heapUsed)}MB heapTotal=${this.bytesToMegabytes(memoryUsage.heapTotal)}MB rss=${this.bytesToMegabytes(memoryUsage.rss)}MB external=${this.bytesToMegabytes(memoryUsage.external)}MB arrayBuffers=${this.bytesToMegabytes(memoryUsage.arrayBuffers)}MB`;
731+
}
732+
733+
/**
734+
* Writes one memory checkpoint record.
735+
*/
736+
private logMemoryCheckpoint(stage: string, memoryUsage: NodeJS.MemoryUsage, handledTasksCount: number, suffix = ''): void {
737+
const extra = suffix ? ` ${suffix}` : '';
738+
739+
this.logger.info(`[memory] stage=${stage} handled=${handledTasksCount} ${this.formatMemoryUsage(memoryUsage)}${extra}`);
740+
}
741+
742+
/**
743+
* Convert bytes to megabytes with two fractional digits.
744+
*/
745+
private bytesToMegabytes(bytes: number): number {
746+
return Math.round((bytes / MB_IN_BYTES) * HUNDRED) / HUNDRED;
747+
}
748+
643749
/**
644750
* Returns finds event by query from project with passed ID
645751
*

0 commit comments

Comments
 (0)