aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-04 14:22:20 +0100
committerRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-04 14:22:20 +0100
commitd8e0b9c937dc974fef7484db3f85fabfe9272d7d (patch)
treebfe3e8295f1e5d0919fcb331a3ba478be0d99031
parent80667f1f5707b75cbd4aff47b51bab103c429b24 (diff)
prometheus: Add basic metrics for alerting
- Detect if we're falling behind while downloading - Detect if there are any found certificates alerting
-rw-r--r--cmd/silentct-mon/main.go22
-rwxr-xr-xcmd/silentct-mon/silentct-monbin0 -> 14044167 bytes
-rw-r--r--go.mod8
-rw-r--r--go.sum18
-rwxr-xr-xintegration/smoke-test3
-rw-r--r--internal/manager/manager.go12
-rw-r--r--internal/metrics/metrics.go71
-rw-r--r--pkg/storage/index/index.go7
-rwxr-xr-xscripts/silentct-check98
9 files changed, 238 insertions, 1 deletions
diff --git a/cmd/silentct-mon/main.go b/cmd/silentct-mon/main.go
index e2ecdb7..ce0a548 100644
--- a/cmd/silentct-mon/main.go
+++ b/cmd/silentct-mon/main.go
@@ -6,6 +6,7 @@ import (
"flag"
"fmt"
"log"
+ "net/http"
"os"
"os/signal"
"strings"
@@ -13,11 +14,14 @@ import (
"syscall"
"time"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
"rgdd.se/silentct/internal/feedback"
"rgdd.se/silentct/internal/flagopt"
"rgdd.se/silentct/internal/ioutil"
"rgdd.se/silentct/internal/logger"
"rgdd.se/silentct/internal/manager"
+ "rgdd.se/silentct/internal/metrics"
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/policy"
)
@@ -43,6 +47,7 @@ Options:
-e, --please-exit Toggle to only run until up-to-date (Default: false)
-f, --force Override santity checks that may not be fatal (Default: false)
-o, --output-file File that all output will be written to (Default: stdout)
+ -m, --metrics-at Host address to serve /metrics on (Default: disabled)
-p, --pull-interval How often nodes are pulled for certificates (Default: 15m)
-v, --verbosity Leveled logging output (default: NOTICE)
-w, --num-workers Number of parallel workers to fetch each log with (Default: 1)
@@ -57,6 +62,7 @@ type config struct {
directory string
pleaseExit bool
force bool
+ metricsAt string
outputFile string
pullInterval time.Duration
numWorkers uint
@@ -75,6 +81,7 @@ func configure(cmd string, args []string) (cfg config, err error) {
flagopt.StringOpt(fs, &cfg.directory, "directory", "d", "")
flagopt.BoolOpt(fs, &cfg.pleaseExit, "please-exit", "e", false)
flagopt.BoolOpt(fs, &cfg.force, "force", "f", false)
+ flagopt.StringOpt(fs, &cfg.metricsAt, "metrics-at", "m", "")
flagopt.StringOpt(fs, &cfg.outputFile, "output-file", "o", "")
flagopt.DurationOpt(fs, &cfg.pullInterval, "pull-interval", "p", 15*time.Minute)
flagopt.StringOpt(fs, &cfg.verbosity, "verbosity", "v", logger.LevelNotice.String())
@@ -144,10 +151,12 @@ func main() {
errorCh := make(chan error)
defer close(errorCh)
+ registry := prometheus.NewRegistry()
mgr, err := manager.New(manager.Config{
Policy: cfg.policy,
Bootstrap: cfg.bootstrap,
Directory: cfg.directory,
+ Metrics: metrics.NewMetrics(registry),
Logger: cfg.log,
AlertDelay: cfg.pullInterval * 3 / 2,
}, feventCh, meventCh, mconfigCh, errorCh)
@@ -203,6 +212,19 @@ func main() {
fb.RunForever(ctx)
}()
+ if cfg.metricsAt != "" {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ defer cancel()
+
+ http.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
+ if err := http.ListenAndServe(cfg.metricsAt, nil); err != nil {
+ cfg.log.Fatalf("metrics: %v\n", err)
+ }
+ }()
+ }
+
os.Exit(func() int {
defer wg.Wait()
defer cancel()
diff --git a/cmd/silentct-mon/silentct-mon b/cmd/silentct-mon/silentct-mon
new file mode 100755
index 0000000..1965f48
--- /dev/null
+++ b/cmd/silentct-mon/silentct-mon
Binary files differ
diff --git a/go.mod b/go.mod
index 4f49688..b119535 100644
--- a/go.mod
+++ b/go.mod
@@ -4,14 +4,22 @@ go 1.22.7
require (
github.com/google/certificate-transparency-go v1.3.0
+ github.com/prometheus/client_golang v1.20.5
github.com/transparency-dev/merkle v0.0.2
gitlab.torproject.org/rgdd/ct v0.0.0
golang.org/x/crypto v0.31.0
)
require (
+ github.com/beorn7/perks v1.0.1 // indirect
+ github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/google/trillian v1.7.0 // indirect
+ github.com/klauspost/compress v1.17.9 // indirect
+ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+ github.com/prometheus/client_model v0.6.1 // indirect
+ github.com/prometheus/common v0.55.0 // indirect
+ github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/net v0.31.0 // indirect
golang.org/x/sys v0.28.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241113202542-65e8d215514f // indirect
diff --git a/go.sum b/go.sum
index 8babb4d..abcd96e 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,7 @@
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
@@ -8,6 +12,20 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/trillian v1.7.0 h1:Oib7mKRvZ0Z3GjvNcn2C4clRmFouEOkBcbzw7q8JlFI=
github.com/google/trillian v1.7.0/go.mod h1:JMp1zzzHe7j2m9m8P/eTWOaoon3R/SwgqUnFMhm4vfw=
+github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
+github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/transparency-dev/merkle v0.0.2 h1:Q9nBoQcZcgPamMkGn7ghV8XiTZ/kRxn1yCG81+twTK4=
github.com/transparency-dev/merkle v0.0.2/go.mod h1:pqSy+OXefQ1EDUVmAJ8MUhHB9TXGuzVAT58PqBoHz1A=
gitlab.torproject.org/rgdd/ct v0.0.0 h1:YeVjFD14bFMMY+oIT6oGuG+8MzcOkFVcKgACqK1IbD0=
diff --git a/integration/smoke-test b/integration/smoke-test
index 4d92478..a128592 100755
--- a/integration/smoke-test
+++ b/integration/smoke-test
@@ -95,3 +95,6 @@ pass "run the monitor and be warned of an unreported certificate"
#-------------------------------------------------------------------------------
echo "---" >&2
echo "All smoke tests passed" >&2
+echo "For interactive tests:" >&2
+echo "go run ../cmd/silentct-mon -c "$dir/config.json" -d "$dir/state" -C "dev:silentct" -p 15s -m localhost:8080 -v DEBUG" >&2
+echo "ALERT_BACKLOG=0 ALERT_FRESHNESS=0 ../scripts/silentct-check" >&2
diff --git a/internal/manager/manager.go b/internal/manager/manager.go
index f8b05fd..0f206b8 100644
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -10,6 +10,7 @@ import (
"gitlab.torproject.org/rgdd/ct/pkg/metadata"
"rgdd.se/silentct/internal/feedback"
"rgdd.se/silentct/internal/logger"
+ "rgdd.se/silentct/internal/metrics"
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/policy"
"rgdd.se/silentct/pkg/storage"
@@ -19,6 +20,7 @@ type Config struct {
Policy policy.Policy
Bootstrap bool // Whether a new storage should be initialized from scratch
Directory string // Path to a directory where everything will be stored
+ Metrics *metrics.Metrics
// Optional
Logger *logger.Logger // Where to output messages and with what verbosity
@@ -140,6 +142,7 @@ func (mgr *Manager) startupConfig() error {
return err
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
+ mgr.Metrics.LogState(state)
}
return nil
}
@@ -164,6 +167,7 @@ func (mgr *Manager) removeLogs(logs []metadata.Log) {
state, _ := mgr.GetMonitorState(log)
mgr.Logger.Infof("removing log %s with %d entries in its backlog\n", log.URL, state.TreeSize-state.NextIndex)
mgr.mconfigCh <- monitor.MonitoredLog{Config: log}
+ mgr.Metrics.RemoveLogState(state)
}
}
@@ -179,6 +183,7 @@ func (mgr *Manager) addLogs(ctx context.Context, logs []metadata.Log) {
mgr.Logger.Infof("bootstrapping log %s at next index 0\n", log.URL)
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
+ mgr.Metrics.LogState(state)
}
}
@@ -197,7 +202,11 @@ func (mgr *Manager) monitorJob(msg monitor.Event) error {
if err := mgr.AddEntries(msg.State.LogID, msg.Matches); err != nil {
return err
}
- return mgr.SetMonitorState(msg.State.LogID, msg.State)
+ if err := mgr.SetMonitorState(msg.State.LogID, msg.State); err != nil {
+ return err
+ }
+ mgr.Metrics.LogState(msg.State)
+ return nil
}
func (mgr *Manager) alertJob() error {
@@ -208,6 +217,7 @@ func (mgr *Manager) alertJob() error {
for _, alert := range alerts {
mgr.Logger.Noticef("certificate mis-issuance? No allowlisting for %s\n", alert.StoredAt)
}
+ mgr.Metrics.CertificateAlert(mgr.Storage.Index.Alerting())
return nil
}
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
new file mode 100644
index 0000000..113d28c
--- /dev/null
+++ b/internal/metrics/metrics.go
@@ -0,0 +1,71 @@
+package metrics
+
+import (
+ "github.com/prometheus/client_golang/prometheus"
+ "rgdd.se/silentct/internal/monitor"
+ "rgdd.se/silentct/pkg/storage/index"
+)
+
+type Metrics struct {
+ logSize *prometheus.GaugeVec
+ logIndex *prometheus.GaugeVec
+ logTimestamp *prometheus.GaugeVec
+ certificateAlert *prometheus.GaugeVec
+}
+
+func NewMetrics(registry *prometheus.Registry) *Metrics {
+ m := &Metrics{
+ logSize: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_log_size",
+ Help: "The number of entries in the log.",
+ },
+ []string{"id"},
+ ),
+ logIndex: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_log_index",
+ Help: "The next log entry to be downloaded.",
+ },
+ []string{"id"},
+ ),
+ logTimestamp: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_log_timestamp",
+ Help: "The log's UNIX timestamp in ms.",
+ },
+ []string{"id"},
+ ),
+ certificateAlert: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_certificate_alert",
+ Help: "The time the certificate without allowlisting was found.",
+ },
+ []string{"stored_at"},
+ ),
+ }
+
+ registry.MustRegister(m.logSize, m.logIndex, m.logTimestamp, m.certificateAlert)
+ return m
+}
+
+func (m *Metrics) LogState(state monitor.State) {
+ id := state.LogID.Base64String()
+ m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex))
+ m.logSize.WithLabelValues(id).Set(float64(state.TreeSize))
+ m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp))
+}
+
+func (m *Metrics) RemoveLogState(state monitor.State) {
+ id := state.LogID.Base64String()
+ m.logIndex.Delete(prometheus.Labels{"id": id})
+ m.logSize.Delete(prometheus.Labels{"id": id})
+ m.logTimestamp.Delete(prometheus.Labels{"id": id})
+}
+
+func (m *Metrics) CertificateAlert(alerts []index.CertificateInfo) {
+ m.certificateAlert.Reset()
+ for _, alert := range alerts {
+ m.certificateAlert.WithLabelValues(alert.StoredAt).Set(float64(alert.ObservedAt.Unix()))
+ }
+}
diff --git a/pkg/storage/index/index.go b/pkg/storage/index/index.go
index 0056565..bf94711 100644
--- a/pkg/storage/index/index.go
+++ b/pkg/storage/index/index.go
@@ -101,6 +101,13 @@ func (ix *Index) TriggerAlerts() ([]CertificateInfo, error) {
return alerts, ioutil.CommitJSON(ix.cfg.IndexFile, ix.mem)
}
+func (ix *Index) Alerting() (ret []CertificateInfo) {
+ for _, ci := range ix.mem.Alerting {
+ ret = append(ret, ci[0]) // one is enough for the same crt ID
+ }
+ return
+}
+
func (index *Index) Validate() error {
return nil // FIXME: check that the index is populated with valid values
}
diff --git a/scripts/silentct-check b/scripts/silentct-check
new file mode 100755
index 0000000..35d38c7
--- /dev/null
+++ b/scripts/silentct-check
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+#
+# A script that emits warnings based on the the silentct-mon prometheus metrics.
+# Mainly meant as an example for those that configure alerts using prometheus.
+#
+
+set -eu
+
+function warn() {
+ echo "WARNING: $*" >&2
+}
+
+function die() {
+ echo "FATAL: $*" >&2
+ exit 1
+}
+
+#-----------------------------------------------------------------------------------------
+# Options
+#-----------------------------------------------------------------------------------------
+METRICS_AT=${METRICS_AT:-http://localhost:8080/metrics}
+ALERT_BACKLOG=${ALERT_BACKLOG:-65536}
+ALERT_FRESHNESS=${ALERT_FRESHNESS:-86400}
+
+#-----------------------------------------------------------------------------------------
+# Download the current prometheus metrics
+#-----------------------------------------------------------------------------------------
+metrics_file=$(mktemp)
+trap "rm -f $metrics_file" EXIT
+curl -so "$metrics_file" "$METRICS_AT" || die "failed retrieving metrics from $METRICS_AT"
+
+#-----------------------------------------------------------------------------------------
+# Parse per-log metrics
+#-----------------------------------------------------------------------------------------
+declare -A log_index
+declare -A log_size
+declare -A log_timestamp
+while IFS= read -r line; do
+ if [[ $line =~ ^# ]]; then
+ continue # skip comments
+ fi
+
+ if [[ $line =~ ^silentct_log_index ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_index["$id"]=$value
+ fi
+
+ if [[ $line =~ ^silentct_log_size ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_size["$id"]=$value
+ fi
+
+ if [[ $line =~ ^silentct_log_timestamp ]]; then
+ id=$(echo "$line" | grep -oP '(?<=id=")[^"]+')
+ value=$(echo "$line" | awk '{print $NF}')
+ log_timestamp["$id"]=$value
+ fi
+done <"$metrics_file"
+
+#-----------------------------------------------------------------------------------------
+# Parse certificate-alert metrics
+#-----------------------------------------------------------------------------------------
+declare -A certificate_alert
+while IFS= read -r line; do
+ if [[ $line =~ ^# ]]; then
+ continue # skip comments
+ fi
+
+ if [[ $line =~ ^silentct_certificate_alert ]]; then
+ stored_at=$(echo "$line" | grep -oP '(?<=stored_at=")[^"]+')
+ observed_at=$(echo "$line" | awk '{print $NF}')
+ certificate_alert["$stored_at"]=$observed_at
+ fi
+done <"$metrics_file"
+
+#-----------------------------------------------------------------------------------------
+# Emit warnings
+#-----------------------------------------------------------------------------------------
+now=$(date +%s)
+for id in "${!log_size[@]}"; do
+ backlog=$(awk "BEGIN {print ${log_size[$id]} - ${log_index[$id]}}")
+ if awk "BEGIN {exit !($backlog - $ALERT_BACKLOG >= 0)}"; then
+ warn "log $id -- backlog is at $backlog"
+ fi
+
+ unix_timestamp=$(awk "BEGIN {printf \"%.0f\", ${log_timestamp[$id]} / 1000}")
+ if (( now - unix_timestamp >= ALERT_FRESHNESS )); then
+ warn "log $id -- latest timestamp at $(date -d @$unix_timestamp)"
+ fi
+done
+
+for stored_at in "${!certificate_alert[@]}"; do
+ observed_at=$(awk "BEGIN {printf \"%.0f\", ${certificate_alert[$stored_at]}}")
+ warn "(mis)-issued certificate? Observed at $(date -d @$observed_at) -- see $stored_at"
+done