aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-04 14:22:20 +0100
committerRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-04 14:22:20 +0100
commitd8e0b9c937dc974fef7484db3f85fabfe9272d7d (patch)
treebfe3e8295f1e5d0919fcb331a3ba478be0d99031 /scripts
parent80667f1f5707b75cbd4aff47b51bab103c429b24 (diff)
prometheus: Add basic metrics for alerting
- Detect if we're falling behind while downloading - Detect if there are any found certificates alerting
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/silentct-check98
1 files changed, 98 insertions, 0 deletions
diff --git a/scripts/silentct-check b/scripts/silentct-check
new file mode 100755
index 0000000..35d38c7
--- /dev/null
+++ b/scripts/silentct-check
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+#
+# A script that emits warnings based on the the silentct-mon prometheus metrics.
+# Mainly meant as an example for those that configure alerts using prometheus.
+#
+
+set -eu
+
+function warn() {
+ echo "WARNING: $*" >&2
+}
+
+function die() {
+ echo "FATAL: $*" >&2
+ exit 1
+}
+
+#-----------------------------------------------------------------------------------------
+# Options
+#-----------------------------------------------------------------------------------------
+METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
+ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
+ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}
+
+#-----------------------------------------------------------------------------------------
+# Download the current prometheus metrics
+#-----------------------------------------------------------------------------------------
+metrics_file=$(mktemp)
+trap "rm -f $metrics_file" EXIT
+curl -so "$metrics_file" "$METRICS_AT" || die "failed retrieving metrics from $METRICS_AT"
+
+#-----------------------------------------------------------------------------------------
+# Parse per-log metrics
+#-----------------------------------------------------------------------------------------
+declare -A log_index
+declare -A log_size
+declare -A log_timestamp
+while IFS= read -r line; do
+ if [[ $line =~ ^# ]]; then
+ continue # skip comments
+ fi
+
+ if [[ $line =~ ^silentct_log_index ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_index["$id"]=$value
+ fi
+
+ if [[ $line =~ ^silentct_log_size ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_size["$id"]=$value
+ fi
+
+ if [[ $line =~ ^silentct_log_timestamp ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_timestamp["$id"]=$value
+ fi
+done <"$metrics_file"
+
+#-----------------------------------------------------------------------------------------
+# Parse certificate-alert metrics
+#-----------------------------------------------------------------------------------------
+declare -A certificate_alert
+while IFS= read -r line; do
+ if [[ $line =~ ^# ]]; then
+ continue # skip comments
+ fi
+
+ if [[ $line =~ ^silentct_certificate_alert ]]; then
+ stored_at=$(echo "$line" | grep -oP '(?<=stored_at=")[^"]+')
+ observed_at=$(echo "$line" | awk '{print $NF}')
+ certificate_alert["$stored_at"]=$observed_at
+ fi
+done <"$metrics_file"
+
+#-----------------------------------------------------------------------------------------
+# Emit warnings
+#-----------------------------------------------------------------------------------------
+now=$(date +%s)
+for id in "${!log_size[@]}"; do
+ backlog=$(awk "BEGIN {print ${log_size[$id]} - ${log_index[$id]}}")
+ if awk "BEGIN {exit !($backlog - $ALERT_BACKLOG >= 0)}"; then
+ warn "log $id -- backlog is at $backlog"
+ fi
+
+ unix_timestamp=$(awk "BEGIN {printf \"%.0f\", ${log_timestamp[$id]} / 1000}")
+ if (( now - unix_timestamp >= ALERT_FRESHNESS )); then
+ warn "log $id -- latest timestamp at $(date -d @$unix_timestamp)"
+ fi
+done
+
+for stored_at in "${!certificate_alert[@]}"; do
+ observed_at=$(awk "BEGIN {printf \"%.0f\", ${certificate_alert[$stored_at]}}")
+ warn "(mis)-issued certificate? Observed at $(date -d @$observed_at) -- see $stored_at"
+done