aboutsummaryrefslogtreecommitdiff
path: root/contrib/silentct-check
blob: 97aa2b21b46f72e19dcbf2b7a18854406102fde4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/bin/bash

#
# A script that outputs warnings from silentct-mon's Prometheus metrics.  Mainly
# meant as an example, but may be useful for simple (periodic) one-shot checks.
#

set -eu

#-------------------------------------------------------------------------------
# Configuration
#-------------------------------------------------------------------------------
METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}

#-------------------------------------------------------------------------------
# Helper functions
#-------------------------------------------------------------------------------
notice() {
    echo "NOTICE: $*" >&2
}

die() {
    echo "FATAL: $*" >&2
    exit 1
}

to_integer() {
    printf "%.f" "$1"
}

extract_label() {
    local line=$1
    local label=$2
    echo "${line#*"$label"=}" | cut -d'"' -f2
}

extract_value() {
    local line=$1
    echo "${line##* }"
}

#-------------------------------------------------------------------------------
# Fetch metrics
#-------------------------------------------------------------------------------
metrics_file=$(mktemp)
trap 'rm -f $metrics_file' EXIT

curl -so "$metrics_file" "$METRICS_AT" || die "failed fetching from $METRICS_AT"

#-------------------------------------------------------------------------------
# Parse metrics
#-------------------------------------------------------------------------------
declare -A log_index      # log ID -> log index
declare -A log_size       # log ID -> log size
declare -A log_timestamp  # log ID -> log timestamp

need_restart=0
num_unexpected_crt=0

while IFS= read -r line; do
    [[ $line =~ ^# ]] && continue

    case "$line" in
        silentct_log_index*)
            key=$(extract_label "$line" "log_id")
            value=$(to_integer "$(extract_value "$line")")
            log_index["$key"]=$value
            ;;
        silentct_log_size*)
            key=$(extract_label "$line" "log_id")
            value=$(to_integer "$(extract_value "$line")")
            log_size["$key"]=$value
            ;;
        silentct_log_timestamp*)
            key=$(extract_label "$line" "log_id")
            value=$(to_integer "$(extract_value "$line")")
            log_timestamp["$key"]=$((value / 1000))
            ;;
        silentct_need_restart*)
            need_restart=$(extract_value "$line")
            ;;
        silentct_unexpected_certificate*)
            num_unexpected_crt=$((num_unexpected_crt + 1))
            ;;
    esac
done <"$metrics_file"

#-------------------------------------------------------------------------------
# Output warnings
#-------------------------------------------------------------------------------
now=$(date +%s)

for log_id in "${!log_size[@]}"; do
    backlog=$((log_size[$log_id] - log_index[$log_id]))
    elapsed=$((now - log_timestamp[$log_id]))

    ((backlog < ALERT_BACKLOG))   || notice "log $log_id -- backlog is at $backlog"
    ((elapsed < ALERT_FRESHNESS)) || notice "log $log_id -- latest timestamp at $(date -d @"${log_timestamp[$log_id]}")"
done

[[ $need_restart == 0 ]]       || notice "silentct-mon needs to be restarted"
[[ $num_unexpected_crt == 0 ]] || notice "$num_unexpected_crt unexpected certificate(s)"