Skip to content

Commit de85a49

Browse files
committed
Refactor alert handling and improve SQL query reliability
- Enhanced the `is_alert_duplicate` function in `alertFunctions.sh` to escape single quotes in alert messages, preventing SQL injection issues. - Updated database password handling to check for both `PGPASSWORD` and `DBPASSWORD`, improving flexibility in authentication. - Ensured numeric validation for alert count checks, enhancing robustness against unexpected query results. - Modified multiple monitoring scripts to conditionally check for the availability of the `send_alert` function, ensuring alerts are sent only when the function is defined, improving reliability across the monitoring system.
1 parent b412a51 commit de85a49

13 files changed

+95
-47
lines changed

bin/lib/alertFunctions.sh

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,35 +180,43 @@ is_alert_duplicate() {
180180
local dbport="${DBPORT:-5432}"
181181
local dbuser="${DBUSER:-postgres}"
182182

183+
# Escape single quotes in message for SQL
184+
local message_escaped="${message//\'/\'\'}"
185+
183186
local query
184187
query="SELECT COUNT(*) FROM alerts
185188
WHERE component = '${component}'
186189
AND alert_type = '${alert_type}'
187-
AND message = '${message}'
190+
AND message = '${message_escaped}'
188191
AND status = 'active'
189192
AND created_at > CURRENT_TIMESTAMP - INTERVAL '${window_minutes} minutes';"
190193

191194
local count
192-
# Use PGPASSWORD only if set, otherwise let psql use default authentication
193-
if [[ -n "${PGPASSWORD:-}" ]]; then
194-
count=$(PGPASSWORD="${PGPASSWORD}" psql \
195+
# Use PGPASSWORD or DBPASSWORD if set, otherwise let psql use default authentication
196+
local dbpassword="${PGPASSWORD:-${DBPASSWORD:-}}"
197+
if [[ -n "${dbpassword}" ]]; then
198+
count=$(PGPASSWORD="${dbpassword}" psql \
195199
-h "${dbhost}" \
196200
-p "${dbport}" \
197201
-U "${dbuser}" \
198202
-d "${dbname}" \
199203
-t -A \
200-
-c "${query}" 2> /dev/null)
204+
-c "${query}" 2> /dev/null || echo "0")
201205
else
202206
count=$(psql \
203207
-h "${dbhost}" \
204208
-p "${dbport}" \
205209
-U "${dbuser}" \
206210
-d "${dbname}" \
207211
-t -A \
208-
-c "${query}" 2> /dev/null)
212+
-c "${query}" 2> /dev/null || echo "0")
209213
fi
210214

211-
if [[ "${count:-0}" -gt 0 ]]; then
215+
# Ensure count is numeric and handle empty result
216+
count=$(echo "${count}" | tr -d '[:space:]' | grep -E '^[0-9]+$' || echo "0")
217+
count=$((count + 0))
218+
219+
if [[ "${count}" -gt 0 ]]; then
212220
return 0 # Duplicate
213221
else
214222
return 1 # Not duplicate

bin/monitor/checkPlanetNotes.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ run_planet_check() {
109109
if [[ ${duration} -gt ${planet_duration_threshold} ]]; then
110110
log_warning "${COMPONENT}: Planet check duration (${duration}s) exceeds threshold (${planet_duration_threshold}s)"
111111
# Only send alert if send_alert function is available
112-
if command -v send_alert >/dev/null 2>&1; then
112+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
113113
send_alert "${COMPONENT}" "WARNING" "planet_check_duration" "Planet Notes check took too long: ${duration}s (threshold: ${planet_duration_threshold}s)" || true
114114
fi
115115
fi
@@ -118,12 +118,12 @@ run_planet_check() {
118118
else
119119
log_error "${COMPONENT}: Planet Notes check failed (exit_code: ${exit_code}, duration: ${duration}s)"
120120
# Record metrics (don't fail if record_metric is not available or fails)
121-
if command -v record_metric >/dev/null 2>&1; then
121+
if declare -f record_metric >/dev/null 2>&1 || command -v record_metric >/dev/null 2>&1; then
122122
record_metric "${COMPONENT}" "planet_check_status" "0" "component=ingestion,check=processCheckPlanetNotes" || true
123123
record_metric "${COMPONENT}" "planet_check_duration" "${duration}" "component=ingestion,check=processCheckPlanetNotes" || true
124124
fi
125125
# Only send alert if send_alert function is available
126-
if command -v send_alert >/dev/null 2>&1; then
126+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
127127
send_alert "${COMPONENT}" "ERROR" "planet_check_failed" "Planet Notes check failed: exit_code=${exit_code}" || true
128128
fi
129129
return 1

bin/monitor/monitorIngestion.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ check_error_rate() {
311311
local error_count_threshold="${INGESTION_ERROR_COUNT_THRESHOLD:-1000}"
312312
if [[ ${error_lines} -gt ${error_count_threshold} ]]; then
313313
log_warning "${COMPONENT}: Error count (${error_lines}) exceeds threshold (${error_count_threshold})"
314-
if command -v send_alert >/dev/null 2>&1; then
314+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
315315
send_alert "${COMPONENT}" "WARNING" "error_rate" "High error count detected: ${error_lines} errors in 24h (threshold: ${error_count_threshold})" || true
316316
fi
317317
fi
@@ -320,7 +320,7 @@ check_error_rate() {
320320
local warning_count_threshold="${INGESTION_WARNING_COUNT_THRESHOLD:-2000}"
321321
if [[ ${warning_lines} -gt ${warning_count_threshold} ]]; then
322322
log_warning "${COMPONENT}: Warning count (${warning_lines}) exceeds threshold (${warning_count_threshold})"
323-
if command -v send_alert >/dev/null 2>&1; then
323+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
324324
send_alert "${COMPONENT}" "INFO" "warning_rate" "High warning count detected: ${warning_lines} warnings in 24h (threshold: ${warning_count_threshold})" || true
325325
fi
326326
fi
@@ -329,7 +329,7 @@ check_error_rate() {
329329
local max_error_rate="${INGESTION_MAX_ERROR_RATE:-5}"
330330
if [[ ${error_rate} -gt ${max_error_rate} ]]; then
331331
log_warning "${COMPONENT}: Error rate (${error_rate}%) exceeds threshold (${max_error_rate}%)"
332-
if command -v send_alert >/dev/null 2>&1; then
332+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
333333
send_alert "${COMPONENT}" "WARNING" "error_rate" "High error rate detected: ${error_rate}% (threshold: ${max_error_rate}%, errors: ${error_lines}/${total_lines})" || true
334334
fi
335335
return 1
@@ -339,7 +339,7 @@ check_error_rate() {
339339
local warning_rate_threshold="${INGESTION_WARNING_RATE_THRESHOLD:-15}"
340340
if [[ ${warning_rate} -gt ${warning_rate_threshold} ]]; then
341341
log_warning "${COMPONENT}: Warning rate (${warning_rate}%) exceeds threshold (${warning_rate_threshold}%)"
342-
if command -v send_alert >/dev/null 2>&1; then
342+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
343343
send_alert "${COMPONENT}" "WARNING" "warning_rate" "High warning rate detected: ${warning_rate}% (threshold: ${warning_rate_threshold}%, warnings: ${warning_lines}/${total_lines})" || true
344344
fi
345345
fi

bin/monitor/monitorWMS.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ check_error_rate() {
371371
# Alert if error rate exceeds threshold
372372
if [[ ${error_rate} -gt ${threshold} ]]; then
373373
log_warning "${COMPONENT}: Error rate (${error_rate}%) exceeds threshold (${threshold}%)"
374-
if command -v send_alert >/dev/null 2>&1; then
374+
if declare -f send_alert >/dev/null 2>&1 || command -v send_alert >/dev/null 2>&1; then
375375
send_alert "${COMPONENT}" "WARNING" "error_rate_exceeded" "WMS error rate (${error_rate}%) exceeds threshold (${threshold}%, errors: ${error_count}, requests: ${total_requests})" || true
376376
fi
377377
return 1

bin/security/ipBlocking.sh

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,17 @@
99

1010
set -euo pipefail
1111

12-
SCRIPT_DIR=""
13-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14-
readonly SCRIPT_DIR
15-
PROJECT_ROOT=""
16-
PROJECT_ROOT="$(dirname "$(dirname "${SCRIPT_DIR}")")"
17-
readonly PROJECT_ROOT
12+
# Only set SCRIPT_DIR if not already set (allows sourcing multiple times)
13+
if [[ -z "${SCRIPT_DIR:-}" ]]; then
14+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15+
readonly SCRIPT_DIR
16+
fi
17+
18+
# Only set PROJECT_ROOT if not already set (allows sourcing multiple times)
19+
if [[ -z "${PROJECT_ROOT:-}" ]]; then
20+
PROJECT_ROOT="$(dirname "$(dirname "${SCRIPT_DIR}")")"
21+
readonly PROJECT_ROOT
22+
fi
1823

1924
# Source libraries
2025
# shellcheck disable=SC1091

tests/integration/test_alert_delivery_complete.sh

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,25 @@
99

1010
set -euo pipefail
1111

12-
SCRIPT_DIR=""
13-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14-
readonly SCRIPT_DIR
15-
PROJECT_ROOT=""
16-
PROJECT_ROOT="$(dirname "$(dirname "${SCRIPT_DIR}")")"
17-
# Remove trailing slash if present
18-
PROJECT_ROOT="${PROJECT_ROOT%/}"
19-
readonly PROJECT_ROOT
12+
# Only set SCRIPT_DIR if not already set (allows sourcing multiple times)
13+
if [[ -z "${SCRIPT_DIR:-}" ]]; then
14+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15+
readonly SCRIPT_DIR
16+
fi
17+
18+
# Only set PROJECT_ROOT if not already set (allows sourcing multiple times)
19+
if [[ -z "${PROJECT_ROOT:-}" ]]; then
20+
PROJECT_ROOT="$(dirname "$(dirname "${SCRIPT_DIR}")")"
21+
# Remove trailing slash if present
22+
PROJECT_ROOT="${PROJECT_ROOT%/}"
23+
readonly PROJECT_ROOT
24+
fi
25+
26+
# Validate PROJECT_ROOT exists
27+
if [[ ! -d "${PROJECT_ROOT}/bin/lib" ]]; then
28+
echo "Error: Cannot find PROJECT_ROOT. SCRIPT_DIR=${SCRIPT_DIR}, PROJECT_ROOT=${PROJECT_ROOT}" >&2
29+
exit 1
30+
fi
2031

2132
# Source libraries
2233
# shellcheck disable=SC1091

tests/integration/test_database_integration.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,19 @@ skip_if_database_not_available() {
154154
# Step 1: Record metric
155155
record_metric "ingestion" "timestamp_test" "100" "test=timestamp"
156156

157-
# Step 2: Get timestamp
158-
local metric_timestamp
159-
metric_timestamp=$(psql -h "${DBHOST}" -p "${DBPORT}" -U "${DBUSER}" -d "${DBNAME}" -t -c \
160-
"SELECT timestamp FROM metrics WHERE component = 'ingestion' AND metric_name = 'timestamp_test' ORDER BY timestamp DESC LIMIT 1;" 2>/dev/null | tr -d ' ' || echo "")
157+
# Step 2: Get timestamp as Unix epoch (more reliable than parsing date strings)
158+
local metric_time
159+
metric_time=$(psql -h "${DBHOST}" -p "${DBPORT}" -U "${DBUSER}" -d "${DBNAME}" -t -c \
160+
"SELECT EXTRACT(EPOCH FROM timestamp)::bigint FROM metrics WHERE component = 'ingestion' AND metric_name = 'timestamp_test' ORDER BY timestamp DESC LIMIT 1;" 2>/dev/null | tr -d '[:space:]' || echo "0")
161161

162162
# Step 3: Verify timestamp is recent (within last minute)
163163
local current_time
164164
current_time=$(date +%s)
165-
local metric_time
166-
metric_time=$(date -d "${metric_timestamp}" +%s 2>/dev/null || echo "0")
167165
local time_diff
168166
time_diff=$((current_time - metric_time))
169167

168+
# Ensure metric_time is valid (greater than 0) and recent
169+
assert [ "${metric_time}" -gt 0 ]
170170
assert [ "${time_diff}" -lt 60 ]
171171
}
172172

tests/integration/test_monitorAnalytics_integration.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# Test configuration - set before loading test_helper
1515
export TEST_COMPONENT="ANALYTICS"
1616
export TEST_DB_NAME="${TEST_DB_NAME:-osm_notes_monitoring_test}"
17-
export TEST_ANALYTICS_DB_NAME="${TEST_ANALYTICS_DB_NAME:-analytics_test}"
17+
export TEST_ANALYTICS_DB_NAME="${TEST_ANALYTICS_DB_NAME:-osm_notes_monitoring_test}"
1818

1919
load "${BATS_TEST_DIRNAME}/../test_helper.bash"
2020

tests/integration/test_monitorData_integration.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# shellcheck disable=SC2030,SC2031
77
# SC2030/SC2031: Variables modified in subshells are expected in BATS tests
88

9-
export TEST_DB_NAME="test_monitor_data"
9+
export TEST_DB_NAME="${TEST_DB_NAME:-osm_notes_monitoring_test}"
1010
load "${BATS_TEST_DIRNAME}/../test_helper.bash"
1111

1212
# Test directories

tests/integration/test_monitorInfrastructure_integration.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# shellcheck disable=SC2030,SC2031
77
# SC2030/SC2031: Variables modified in subshells are expected in BATS tests
88

9-
export TEST_DB_NAME="test_monitor_infrastructure"
9+
export TEST_DB_NAME="${TEST_DB_NAME:-osm_notes_monitoring_test}"
1010
load "${BATS_TEST_DIRNAME}/../test_helper.bash"
1111

1212
# Test directories

0 commit comments

Comments
 (0)