|
1 | 1 | import * as client from 'prom-client'; |
2 | 2 | import os from 'os'; |
3 | 3 | import { nanoid } from 'nanoid'; |
| 4 | +import createLogger from './logger'; |
4 | 5 |
|
5 | 6 | const register = new client.Registry(); |
| 7 | +const logger = createLogger(); |
| 8 | + |
| 9 | +const DEFAULT_PUSH_INTERVAL_MS = 10_000; |
| 10 | +const ID_SIZE = 5; |
| 11 | +const METRICS_JOB_NAME = 'workers'; |
| 12 | + |
| 13 | +let pushInterval: NodeJS.Timeout | null = null; |
| 14 | +let currentWorkerName = ''; |
6 | 15 |
|
7 | 16 | client.collectDefaultMetrics({ register }); |
8 | 17 |
|
9 | 18 | export { register, client }; |
10 | 19 |
|
11 | 20 | /** |
12 | | - * Start periodic push to pushgateway |
| 21 | + * Parse push interval from environment. |
| 22 | + */ |
| 23 | +function getPushIntervalMs(): number { |
| 24 | + const rawInterval = process.env.PROMETHEUS_PUSHGATEWAY_INTERVAL; |
| 25 | + const parsedInterval = rawInterval === undefined |
| 26 | + ? DEFAULT_PUSH_INTERVAL_MS |
| 27 | + : Number(rawInterval); |
| 28 | + |
| 29 | + const interval = Number.isFinite(parsedInterval) && parsedInterval > 0 |
| 30 | + ? parsedInterval |
| 31 | + : DEFAULT_PUSH_INTERVAL_MS; |
| 32 | + |
| 33 | + if (rawInterval !== undefined && interval !== parsedInterval) { |
| 34 | + logger.warn(`[metrics] invalid PROMETHEUS_PUSHGATEWAY_INTERVAL="${rawInterval}", fallback to ${DEFAULT_PUSH_INTERVAL_MS}ms`); |
| 35 | + } |
| 36 | + |
| 37 | + return interval; |
| 38 | +} |
| 39 | + |
| 40 | +/** |
| 41 | + * Stop periodic push to pushgateway. |
| 42 | + */ |
| 43 | +export function stopMetricsPushing(): void { |
| 44 | + if (!pushInterval) { |
| 45 | + return; |
| 46 | + } |
| 47 | + |
| 48 | + clearInterval(pushInterval); |
| 49 | + pushInterval = null; |
| 50 | + logger.info(`[metrics] stopped pushing metrics for worker=${currentWorkerName}`); |
| 51 | + currentWorkerName = ''; |
| 52 | +} |
| 53 | + |
| 54 | +/** |
| 55 | + * Start periodic push to pushgateway. |
13 | 56 | * |
14 | | - * @param workerName - name of the worker for grouping |
| 57 | + * @param workerName - name of the worker for grouping. |
15 | 58 | */ |
16 | | -export function startMetricsPushing(workerName: string): void { |
| 59 | +export function startMetricsPushing(workerName: string): () => void { |
17 | 60 | const url = process.env.PROMETHEUS_PUSHGATEWAY_URL; |
18 | | - const interval = parseInt(process.env.PROMETHEUS_PUSHGATEWAY_INTERVAL || '10000'); |
19 | 61 |
|
20 | 62 | if (!url) { |
21 | | - return; |
| 63 | + return stopMetricsPushing; |
22 | 64 | } |
23 | 65 |
|
| 66 | + if (pushInterval) { |
| 67 | + logger.warn(`[metrics] pushing is already started for worker=${currentWorkerName}, skip duplicate start for worker=${workerName}`); |
| 68 | + |
| 69 | + return stopMetricsPushing; |
| 70 | + } |
| 71 | + |
| 72 | + const interval = getPushIntervalMs(); |
24 | 73 | const hostname = os.hostname(); |
25 | | - const ID_SIZE = 5; |
26 | 74 | const id = nanoid(ID_SIZE); |
27 | | - |
28 | 75 | const gateway = new client.Pushgateway(url, [], register); |
29 | 76 |
|
30 | | - console.log(`Start pushing metrics to ${url} every ${interval}ms (host: ${hostname}, id: ${id})`); |
| 77 | + currentWorkerName = workerName; |
| 78 | + |
| 79 | + logger.info(`Start pushing metrics to ${url} every ${interval}ms (host: ${hostname}, id: ${id}, worker: ${workerName})`); |
31 | 80 |
|
32 | | - setInterval(() => { |
33 | | - gateway.pushAdd({ jobName: 'workers', groupings: { worker: workerName, host: hostname, id } }, (err) => { |
| 81 | + pushInterval = setInterval(() => { |
| 82 | + gateway.pushAdd({ |
| 83 | + jobName: METRICS_JOB_NAME, |
| 84 | + groupings: { |
| 85 | + worker: workerName, |
| 86 | + host: hostname, |
| 87 | + id, |
| 88 | + }, |
| 89 | + }, (err) => { |
34 | 90 | if (err) { |
35 | | - console.error('Metrics push error:', err); |
| 91 | + logger.error(`Metrics push error: ${err.message || err}`); |
36 | 92 | } |
37 | 93 | }); |
38 | 94 | }, interval); |
| 95 | + |
| 96 | + return stopMetricsPushing; |
39 | 97 | } |
0 commit comments