diff options
author | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-01-18 13:39:40 +0100 |
---|---|---|
committer | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-01-18 16:34:14 +0100 |
commit | 8a17817c61f14a727a1017a5bcd4b1ea82964528 (patch) | |
tree | ec0fa96bfc683e906413106f2db2b99d710dc389 /scripts/silentct-check | |
parent | 2d3b1f2cb0c05385c1702f1a7d74fa08d52c262f (diff) |
prometheus: Refine based on input from anarcatmain
https://gitlab.torproject.org/tpo/tpa/team/-/issues/40677
Diffstat (limited to 'scripts/silentct-check')
-rwxr-xr-x | scripts/silentct-check | 96 |
1 files changed, 0 insertions, 96 deletions
diff --git a/scripts/silentct-check b/scripts/silentct-check deleted file mode 100755 index 2c64d67..0000000 --- a/scripts/silentct-check +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash - -# -# A script that generates alerts based on the the silentct-mon prometheus -# metrics. Mainly meant as an example on how to define relevant alerts. -# - -set -eu - -function notice() { - echo "NOTICE: $*" >&2 -} - -function die() { - echo "FATAL: $*" >&2 - exit 1 -} - -#----------------------------------------------------------------------------------------- -# Options -#----------------------------------------------------------------------------------------- -METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics} -ALERT_BACKLOG=${ALERT_BACKLOG:-65536} -ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400} - -#----------------------------------------------------------------------------------------- -# Download the current prometheus metrics -#----------------------------------------------------------------------------------------- -metrics_file=$(mktemp) -trap "rm -f $metrics_file" EXIT -curl -so "$metrics_file" "$METRICS_AT" || die "failed retrieving metrics from $METRICS_AT" - -#----------------------------------------------------------------------------------------- -# Parse metrics -#----------------------------------------------------------------------------------------- -declare -A log_index -declare -A log_size -declare -A log_timestamp -declare -A certificate_alert -while IFS= read -r line; do - if [[ $line =~ ^# ]]; then - continue # skip comments - fi - - if [[ $line =~ ^silentct_log_index ]]; then - id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') - value=$(echo "$line" | awk '{print $NF}') - log_index["$id"]=$value - fi - - if [[ $line =~ ^silentct_log_size ]]; then - id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') - value=$(echo "$line" | awk '{print $NF}') - log_size["$id"]=$value - fi - - if [[ $line =~ ^silentct_log_timestamp ]]; then - id=$(echo "$line" | grep -oP '(?<=id=")[^"]+') - value=$(echo "$line" | awk '{print $NF}') - log_timestamp["$id"]=$value - fi - - if [[ $line =~ ^silentct_certificate_alert ]]; then - stored_at=$(echo "$line" | grep -oP '(?<=stored_at=")[^"]+') - observed_at=$(echo "$line" | awk '{print $NF}') - certificate_alert["$stored_at"]=$observed_at - fi -done <"$metrics_file" - -line=$(grep "^silentct_need_restart" "$metrics_file") -need_restart=$(echo $line | awk '{print $NF}') - -#----------------------------------------------------------------------------------------- -# Output alerts -#----------------------------------------------------------------------------------------- -now=$(date +%s) -for id in "${!log_size[@]}"; do - backlog=$(awk "BEGIN {print ${log_size[$id]} - ${log_index[$id]}}") - if awk "BEGIN {exit !($backlog - $ALERT_BACKLOG >= 0)}"; then - notice "log $id -- backlog is at $backlog" - fi - - unix_timestamp=$(awk "BEGIN {printf \"%.0f\", ${log_timestamp[$id]} / 1000}") - if (( now - unix_timestamp >= ALERT_FRESHNESS )); then - notice "log $id -- latest timestamp at $(date -d @$unix_timestamp)" - fi -done - -for stored_at in "${!certificate_alert[@]}"; do - observed_at=$(awk "BEGIN {printf \"%.0f\", ${certificate_alert[$stored_at]}}") - notice "(mis)-issued certificate? Observed at $(date -d @$observed_at) -- see $stored_at" -done - -if [[ $need_restart != 0 ]]; then - notice "silentct-mon needs to be restarted" -fi |