blob: a6a79a3602297f22a7c8e3ae1f05c1532c876f91 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#!/bin/bash
#
# A script that emits warnings based on the the silentct-mon prometheus metrics.
# Mainly meant as an example for those that configure alerts using prometheus.
#
set -eu
function warn() {
echo "WARNING: $*" >&2
}
function die() {
echo "FATAL: $*" >&2
exit 1
}
#-----------------------------------------------------------------------------------------
# Options
#-----------------------------------------------------------------------------------------
METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}
#-----------------------------------------------------------------------------------------
# Download the current prometheus metrics
#-----------------------------------------------------------------------------------------
metrics_file=$(mktemp)
trap "rm -f $metrics_file" EXIT
curl -so "$metrics_file" "$METRICS_AT" || die "failed retrieving metrics from $METRICS_AT"
#-----------------------------------------------------------------------------------------
# Parse per-log metrics
#-----------------------------------------------------------------------------------------
declare -A log_index
declare -A log_size
declare -A log_timestamp
while IFS= read -r line; do
if [[ $line =~ ^# ]]; then
continue # skip comments
fi
if [[ $line =~ ^silentct_log_index ]]; then
id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
value=$(echo "$line" | awk '{print $NF}')
log_index["$id"]=$value
fi
if [[ $line =~ ^silentct_log_size ]]; then
id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
value=$(echo "$line" | awk '{print $NF}')
log_size["$id"]=$value
fi
if [[ $line =~ ^silentct_log_timestamp ]]; then
id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
value=$(echo "$line" | awk '{print $NF}')
log_timestamp["$id"]=$value
fi
done <"$metrics_file"
#-----------------------------------------------------------------------------------------
# Parse certificate-alert metrics
#-----------------------------------------------------------------------------------------
declare -A certificate_alert
while IFS= read -r line; do
if [[ $line =~ ^# ]]; then
continue # skip comments
fi
if [[ $line =~ ^silentct_certificate_alert ]]; then
stored_at=$(echo "$line" | grep -oP '(?<=stored_at=")[^"]+')
observed_at=$(echo "$line" | awk '{print $NF}')
certificate_alert["$stored_at"]=$observed_at
fi
done <"$metrics_file"
#-----------------------------------------------------------------------------------------
# Parse restart metric
#-----------------------------------------------------------------------------------------
line=$(grep "^silentct_need_restart" "$metrics_file")
need_restart=$(echo $line | awk '{print $NF}')
#-----------------------------------------------------------------------------------------
# Emit warnings
#-----------------------------------------------------------------------------------------
now=$(date +%s)
for id in "${!log_size[@]}"; do
backlog=$(awk "BEGIN {print ${log_size[$id]} - ${log_index[$id]}}")
if awk "BEGIN {exit !($backlog - $ALERT_BACKLOG >= 0)}"; then
warn "log $id -- backlog is at $backlog"
fi
unix_timestamp=$(awk "BEGIN {printf \"%.0f\", ${log_timestamp[$id]} / 1000}")
if (( now - unix_timestamp >= ALERT_FRESHNESS )); then
warn "log $id -- latest timestamp at $(date -d @$unix_timestamp)"
fi
done
for stored_at in "${!certificate_alert[@]}"; do
observed_at=$(awk "BEGIN {printf \"%.0f\", ${certificate_alert[$stored_at]}}")
warn "(mis)-issued certificate? Observed at $(date -d @$observed_at) -- see $stored_at"
done
if [[ $need_restart != 0 ]]; then
warn "silentct-mon needs to be restarted"
fi
|