diff options
author | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-01-04 14:22:20 +0100 |
---|---|---|
committer | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-01-04 14:22:20 +0100 |
commit | d8e0b9c937dc974fef7484db3f85fabfe9272d7d (patch) | |
tree | bfe3e8295f1e5d0919fcb331a3ba478be0d99031 /scripts/silentct-check | |
parent | 80667f1f5707b75cbd4aff47b51bab103c429b24 (diff) |
prometheus: Add basic metrics for alerting
- Detect if we're falling behind while downloading
- Detect if there are any found certificates alerting
Diffstat (limited to 'scripts/silentct-check')
-rwxr-xr-x | scripts/silentct-check | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/scripts/silentct-check b/scripts/silentct-check new file mode 100755 index 0000000..35d38c7 --- /dev/null +++ b/scripts/silentct-check @@ -0,0 +1,98 @@ +#!/bin/bash + +# +# A script that emits warnings based on the the silentct-mon prometheus metrics. +# Mainly meant as an example for those that configure alerts using prometheus. +# + +set -eu + +function warn() { + echo "WARNING: $*" >&2 +} + +function die() { + echo "FATAL: $*" >&2 + exit 1 +} + +#----------------------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------------------- +METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics} +ALERT_BACKLOG=${ALERT_BACKLOG:-65536} +ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400} + +#----------------------------------------------------------------------------------------- +# Download the current prometheus metrics +#----------------------------------------------------------------------------------------- +metrics_file=$(mktemp) +trap "rm -f $metrics_file" EXIT +curl -so "$metrics_file" "$METRICS_AT" || die "failed retrieving metrics from $METRICS_AT" + +#----------------------------------------------------------------------------------------- +# Parse per-log metrics +#----------------------------------------------------------------------------------------- +declare -A log_index +declare -A log_size +declare -A log_timestamp +while IFS= read -r line; do + if [[ $line =~ ^# ]]; then + continue # skip comments + fi + + if [[ $line =~ ^silentct_log_index ]]; then + id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') + value=$(echo "$line" | awk '{print $NF}') + log_index["$id"]=$value + fi + + if [[ $line =~ ^silentct_log_size ]]; then + id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') + value=$(echo "$line" | awk '{print $NF}') + log_size["$id"]=$value + fi + + if [[ $line =~ ^silentct_log_timestamp ]]; then + id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') + value=$(echo "$line" | awk '{print $NF}') + log_timestamp["$id"]=$value + fi +done <"$metrics_file" + +#----------------------------------------------------------------------------------------- +# Parse certificate-alert metrics +#----------------------------------------------------------------------------------------- +declare -A certificate_alert +while IFS= read -r line; do + if [[ $line =~ ^# ]]; then + continue # skip comments + fi + + if [[ $line =~ ^silentct_certificate_alert ]]; then + stored_at=$(echo "$line" | grep -oP '(?<=stored_at=")[^"]+') + observed_at=$(echo "$line" | awk '{print $NF}') + certificate_alert["$stored_at"]=$observed_at + fi +done <"$metrics_file" + +#----------------------------------------------------------------------------------------- +# Emit warnings +#----------------------------------------------------------------------------------------- +now=$(date +%s) +for id in "${!log_size[@]}"; do + backlog=$(awk "BEGIN {print ${log_size[$id]} - ${log_index[$id]}}") + if awk "BEGIN {exit !($backlog - $ALERT_BACKLOG >= 0)}"; then + warn "log $id -- backlog is at $backlog" + fi + + unix_timestamp=$(awk "BEGIN {printf \"%.0f\", ${log_timestamp[$id]} / 1000}") + if (( now - unix_timestamp >= ALERT_FRESHNESS )); then + warn "log $id -- latest timestamp at $(date -d @$unix_timestamp)" + fi +done + +for stored_at in "${!certificate_alert[@]}"; do + observed_at=$(awk "BEGIN {printf \"%.0f\", ${certificate_alert[$stored_at]}}") + warn "(mis)-issued certificate? Observed at $(date -d @$observed_at) -- see $stored_at" +done |