blob: 97aa2b21b46f72e19dcbf2b7a18854406102fde4 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
#!/bin/bash
#
# A script that outputs warnings from silentct-mon's Prometheus metrics. Mainly
# meant as an example, but may be useful for simple (periodic) one-shot checks.
#
set -eu
#-------------------------------------------------------------------------------
# Configuration
#-------------------------------------------------------------------------------
METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}
#-------------------------------------------------------------------------------
# Helper functions
#-------------------------------------------------------------------------------
notice() {
echo "NOTICE: $*" >&2
}
die() {
echo "FATAL: $*" >&2
exit 1
}
to_integer() {
printf "%.f" "$1"
}
extract_label() {
local line=$1
local label=$2
echo "${line#*"$label"=}" | cut -d'"' -f2
}
extract_value() {
local line=$1
echo "${line##* }"
}
#-------------------------------------------------------------------------------
# Fetch metrics
#-------------------------------------------------------------------------------
metrics_file=$(mktemp)
trap 'rm -f $metrics_file' EXIT
curl -so "$metrics_file" "$METRICS_AT" || die "failed fetching from $METRICS_AT"
#-------------------------------------------------------------------------------
# Parse metrics
#-------------------------------------------------------------------------------
declare -A log_index # log ID -> log index
declare -A log_size # log ID -> log size
declare -A log_timestamp # log ID -> log timestamp
need_restart=0
num_unexpected_crt=0
while IFS= read -r line; do
[[ $line =~ ^# ]] && continue
case "$line" in
silentct_log_index*)
key=$(extract_label "$line" "log_id")
value=$(to_integer "$(extract_value "$line")")
log_index["$key"]=$value
;;
silentct_log_size*)
key=$(extract_label "$line" "log_id")
value=$(to_integer "$(extract_value "$line")")
log_size["$key"]=$value
;;
silentct_log_timestamp*)
key=$(extract_label "$line" "log_id")
value=$(to_integer "$(extract_value "$line")")
log_timestamp["$key"]=$((value / 1000))
;;
silentct_need_restart*)
need_restart=$(extract_value "$line")
;;
silentct_unexpected_certificate*)
num_unexpected_crt=$((num_unexpected_crt + 1))
;;
esac
done <"$metrics_file"
#-------------------------------------------------------------------------------
# Output warnings
#-------------------------------------------------------------------------------
now=$(date +%s)
for log_id in "${!log_size[@]}"; do
backlog=$((log_size[$log_id] - log_index[$log_id]))
elapsed=$((now - log_timestamp[$log_id]))
((backlog < ALERT_BACKLOG)) || notice "log $log_id -- backlog is at $backlog"
((elapsed < ALERT_FRESHNESS)) || notice "log $log_id -- latest timestamp at $(date -d @"${log_timestamp[$log_id]}")"
done
[[ $need_restart == 0 ]] || notice "silentct-mon needs to be restarted"
[[ $num_unexpected_crt == 0 ]] || notice "$num_unexpected_crt unexpected certificate(s)"
|