#!/bin/bash # # A script that outputs warnings from silentct-mon's Prometheus metrics. Mainly # meant as an example, but may be useful for simple (periodic) one-shot checks. # set -eu #------------------------------------------------------------------------------- # Configuration #------------------------------------------------------------------------------- METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics} ALERT_BACKLOG=${ALERT_BACKLOG:-65536} ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400} #------------------------------------------------------------------------------- # Helper functions #------------------------------------------------------------------------------- notice() { echo "NOTICE: $*" >&2 } die() { echo "FATAL: $*" >&2 exit 1 } to_integer() { printf "%.f" "$1" } extract_label() { local line=$1 local label=$2 echo "${line#*"$label"=}" | cut -d'"' -f2 } extract_value() { local line=$1 echo "${line##* }" } #------------------------------------------------------------------------------- # Fetch metrics #------------------------------------------------------------------------------- metrics_file=$(mktemp) trap 'rm -f $metrics_file' EXIT curl -so "$metrics_file" "$METRICS_AT" || die "failed fetching from $METRICS_AT" #------------------------------------------------------------------------------- # Parse metrics #------------------------------------------------------------------------------- declare -A log_index # log ID -> log index declare -A log_size # log ID -> log size declare -A log_timestamp # log ID -> log timestamp need_restart=0 num_unexpected_crt=0 while IFS= read -r line; do [[ $line =~ ^# ]] && continue case "$line" in silentct_log_index*) key=$(extract_label "$line" "log_id") value=$(to_integer "$(extract_value "$line")") log_index["$key"]=$value ;; silentct_log_size*) key=$(extract_label "$line" "log_id") value=$(to_integer "$(extract_value "$line")") log_size["$key"]=$value ;; silentct_log_timestamp*) key=$(extract_label "$line" "log_id") value=$(to_integer "$(extract_value "$line")") log_timestamp["$key"]=$((value / 1000)) ;; silentct_need_restart*) need_restart=$(extract_value "$line") ;; silentct_unexpected_certificate*) num_unexpected_crt=$((num_unexpected_crt + 1)) ;; esac done <"$metrics_file" #------------------------------------------------------------------------------- # Output warnings #------------------------------------------------------------------------------- now=$(date +%s) for log_id in "${!log_size[@]}"; do backlog=$((log_size[$log_id] - log_index[$log_id])) elapsed=$((now - log_timestamp[$log_id])) ((backlog < ALERT_BACKLOG)) || notice "log $log_id -- backlog is at $backlog" ((elapsed < ALERT_FRESHNESS)) || notice "log $log_id -- latest timestamp at $(date -d @"${log_timestamp[$log_id]}")" done [[ $need_restart == 0 ]] || notice "silentct-mon needs to be restarted" [[ $num_unexpected_crt == 0 ]] || notice "$num_unexpected_crt unexpected certificate(s)"