aboutsummaryrefslogtreecommitdiff
path: root/contrib/silentct-check
diff options
context:
space:
mode:
authorRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-18 13:39:40 +0100
committerRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-18 16:34:14 +0100
commit8a17817c61f14a727a1017a5bcd4b1ea82964528 (patch)
treeec0fa96bfc683e906413106f2db2b99d710dc389 /contrib/silentct-check
parent2d3b1f2cb0c05385c1702f1a7d74fa08d52c262f (diff)
prometheus: Refine based on input from anarcatmain
https://gitlab.torproject.org/tpo/tpa/team/-/issues/40677
Diffstat (limited to 'contrib/silentct-check')
-rwxr-xr-xcontrib/silentct-check104
1 files changed, 104 insertions, 0 deletions
diff --git a/contrib/silentct-check b/contrib/silentct-check
new file mode 100755
index 0000000..97aa2b2
--- /dev/null
+++ b/contrib/silentct-check
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+#
+# A script that outputs warnings from silentct-mon's Prometheus metrics. Mainly
+# meant as an example, but may be useful for simple (periodic) one-shot checks.
+#
+
+set -eu
+
+#-------------------------------------------------------------------------------
+# Configuration
+#-------------------------------------------------------------------------------
+METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
+ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
+ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}
+
+#-------------------------------------------------------------------------------
+# Helper functions
+#-------------------------------------------------------------------------------
+notice() {
+ echo "NOTICE: $*" >&2
+}
+
+die() {
+ echo "FATAL: $*" >&2
+ exit 1
+}
+
+to_integer() {
+ printf "%.f" "$1"
+}
+
+extract_label() {
+ local line=$1
+ local label=$2
+ echo "${line#*"$label"=}" | cut -d'"' -f2
+}
+
+extract_value() {
+ local line=$1
+ echo "${line##* }"
+}
+
+#-------------------------------------------------------------------------------
+# Fetch metrics
+#-------------------------------------------------------------------------------
+metrics_file=$(mktemp)
+trap 'rm -f $metrics_file' EXIT
+
+curl -so "$metrics_file" "$METRICS_AT" || die "failed fetching from $METRICS_AT"
+
+#-------------------------------------------------------------------------------
+# Parse metrics
+#-------------------------------------------------------------------------------
+declare -A log_index # log ID -> log index
+declare -A log_size # log ID -> log size
+declare -A log_timestamp # log ID -> log timestamp
+
+need_restart=0
+num_unexpected_crt=0
+
+while IFS= read -r line; do
+ [[ $line =~ ^# ]] && continue
+
+ case "$line" in
+ silentct_log_index*)
+ key=$(extract_label "$line" "log_id")
+ value=$(to_integer "$(extract_value "$line")")
+ log_index["$key"]=$value
+ ;;
+ silentct_log_size*)
+ key=$(extract_label "$line" "log_id")
+ value=$(to_integer "$(extract_value "$line")")
+ log_size["$key"]=$value
+ ;;
+ silentct_log_timestamp*)
+ key=$(extract_label "$line" "log_id")
+ value=$(to_integer "$(extract_value "$line")")
+ log_timestamp["$key"]=$((value / 1000))
+ ;;
+ silentct_need_restart*)
+ need_restart=$(extract_value "$line")
+ ;;
+ silentct_unexpected_certificate*)
+ num_unexpected_crt=$((num_unexpected_crt + 1))
+ ;;
+ esac
+done <"$metrics_file"
+
+#-------------------------------------------------------------------------------
+# Output warnings
+#-------------------------------------------------------------------------------
+now=$(date +%s)
+
+for log_id in "${!log_size[@]}"; do
+ backlog=$((log_size[$log_id] - log_index[$log_id]))
+ elapsed=$((now - log_timestamp[$log_id]))
+
+ ((backlog < ALERT_BACKLOG)) || notice "log $log_id -- backlog is at $backlog"
+ ((elapsed < ALERT_FRESHNESS)) || notice "log $log_id -- latest timestamp at $(date -d @"${log_timestamp[$log_id]}")"
+done
+
+[[ $need_restart == 0 ]] || notice "silentct-mon needs to be restarted"
+[[ $num_unexpected_crt == 0 ]] || notice "$num_unexpected_crt unexpected certificate(s)"