From d8e0b9c937dc974fef7484db3f85fabfe9272d7d Mon Sep 17 00:00:00 2001
From: Rasmus Dahlberg <rgdd@glasklarteknik.se>
Date: Sat, 4 Jan 2025 14:22:20 +0100
Subject: prometheus: Add basic metrics for alerting

- Detect if we're falling behind while downloading
- Detect if there are any found certificates alerting
---
 internal/manager/manager.go | 12 +++++++-
 internal/metrics/metrics.go | 71 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 internal/metrics/metrics.go

(limited to 'internal')

diff --git a/internal/manager/manager.go b/internal/manager/manager.go
index f8b05fd..0f206b8 100644
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -10,6 +10,7 @@ import (
 	"gitlab.torproject.org/rgdd/ct/pkg/metadata"
 	"rgdd.se/silentct/internal/feedback"
 	"rgdd.se/silentct/internal/logger"
+	"rgdd.se/silentct/internal/metrics"
 	"rgdd.se/silentct/internal/monitor"
 	"rgdd.se/silentct/pkg/policy"
 	"rgdd.se/silentct/pkg/storage"
@@ -19,6 +20,7 @@ type Config struct {
 	Policy    policy.Policy
 	Bootstrap bool   // Whether a new storage should be initialized from scratch
 	Directory string // Path to a directory where everything will be stored
+	Metrics   *metrics.Metrics
 
 	// Optional
 	Logger                  *logger.Logger // Where to output messages and with what verbosity
@@ -140,6 +142,7 @@ func (mgr *Manager) startupConfig() error {
 			return err
 		}
 		mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
+		mgr.Metrics.LogState(state)
 	}
 	return nil
 }
@@ -164,6 +167,7 @@ func (mgr *Manager) removeLogs(logs []metadata.Log) {
 		state, _ := mgr.GetMonitorState(log)
 		mgr.Logger.Infof("removing log %s with %d entries in its backlog\n", log.URL, state.TreeSize-state.NextIndex)
 		mgr.mconfigCh <- monitor.MonitoredLog{Config: log}
+		mgr.Metrics.RemoveLogState(state)
 	}
 }
 
@@ -179,6 +183,7 @@ func (mgr *Manager) addLogs(ctx context.Context, logs []metadata.Log) {
 			mgr.Logger.Infof("bootstrapping log %s at next index 0\n", log.URL)
 		}
 		mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
+		mgr.Metrics.LogState(state)
 	}
 }
 
@@ -197,7 +202,11 @@ func (mgr *Manager) monitorJob(msg monitor.Event) error {
 	if err := mgr.AddEntries(msg.State.LogID, msg.Matches); err != nil {
 		return err
 	}
-	return mgr.SetMonitorState(msg.State.LogID, msg.State)
+	if err := mgr.SetMonitorState(msg.State.LogID, msg.State); err != nil {
+		return err
+	}
+	mgr.Metrics.LogState(msg.State)
+	return nil
 }
 
 func (mgr *Manager) alertJob() error {
@@ -208,6 +217,7 @@ func (mgr *Manager) alertJob() error {
 	for _, alert := range alerts {
 		mgr.Logger.Noticef("certificate mis-issuance? No allowlisting for %s\n", alert.StoredAt)
 	}
+	mgr.Metrics.CertificateAlert(mgr.Storage.Index.Alerting())
 	return nil
 }
 
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
new file mode 100644
index 0000000..113d28c
--- /dev/null
+++ b/internal/metrics/metrics.go
@@ -0,0 +1,71 @@
+package metrics
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	"rgdd.se/silentct/internal/monitor"
+	"rgdd.se/silentct/pkg/storage/index"
+)
+
+type Metrics struct {
+	logSize          *prometheus.GaugeVec
+	logIndex         *prometheus.GaugeVec
+	logTimestamp     *prometheus.GaugeVec
+	certificateAlert *prometheus.GaugeVec
+}
+
+func NewMetrics(registry *prometheus.Registry) *Metrics {
+	m := &Metrics{
+		logSize: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Name: "silentct_log_size",
+				Help: "The number of entries in the log.",
+			},
+			[]string{"id"},
+		),
+		logIndex: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Name: "silentct_log_index",
+				Help: "The next log entry to be downloaded.",
+			},
+			[]string{"id"},
+		),
+		logTimestamp: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Name: "silentct_log_timestamp",
+				Help: "The log's UNIX timestamp in ms.",
+			},
+			[]string{"id"},
+		),
+		certificateAlert: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Name: "silentct_certificate_alert",
+				Help: "The time the certificate without allowlisting was found.",
+			},
+			[]string{"stored_at"},
+		),
+	}
+
+	registry.MustRegister(m.logSize, m.logIndex, m.logTimestamp, m.certificateAlert)
+	return m
+}
+
+func (m *Metrics) LogState(state monitor.State) {
+	id := state.LogID.Base64String()
+	m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex))
+	m.logSize.WithLabelValues(id).Set(float64(state.TreeSize))
+	m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp))
+}
+
+func (m *Metrics) RemoveLogState(state monitor.State) {
+	id := state.LogID.Base64String()
+	m.logIndex.Delete(prometheus.Labels{"id": id})
+	m.logSize.Delete(prometheus.Labels{"id": id})
+	m.logTimestamp.Delete(prometheus.Labels{"id": id})
+}
+
+func (m *Metrics) CertificateAlert(alerts []index.CertificateInfo) {
+	m.certificateAlert.Reset()
+	for _, alert := range alerts {
+		m.certificateAlert.WithLabelValues(alert.StoredAt).Set(float64(alert.ObservedAt.Unix()))
+	}
+}
-- 
cgit v1.2.3