From 8a17817c61f14a727a1017a5bcd4b1ea82964528 Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sat, 18 Jan 2025 13:39:40 +0100 Subject: prometheus: Refine based on input from anarcat https://gitlab.torproject.org/tpo/tpa/team/-/issues/40677 --- internal/metrics/metrics.go | 80 ++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 33 deletions(-) (limited to 'internal/metrics') diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index c5ff0d6..7a67a9c 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -1,55 +1,51 @@ package metrics import ( + "fmt" + "strings" + "github.com/prometheus/client_golang/prometheus" "rgdd.se/silentct/internal/monitor" "rgdd.se/silentct/pkg/storage/index" ) type Metrics struct { - logSize *prometheus.GaugeVec - logIndex *prometheus.GaugeVec - logTimestamp *prometheus.GaugeVec - certificateAlert *prometheus.GaugeVec - errorCounter prometheus.Counter - needRestart prometheus.Gauge + errorCounter prometheus.Counter + logIndex *prometheus.GaugeVec + logSize *prometheus.GaugeVec + logTimestamp *prometheus.GaugeVec + needRestart prometheus.Gauge + unexpectedCertificateCount *prometheus.GaugeVec } func NewMetrics(registry *prometheus.Registry) *Metrics { m := &Metrics{ - logSize: prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "silentct_log_size", - Help: "The number of entries in the log.", + errorCounter: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "silentct_error_counter", + Help: "The number of errors propagated to the main loop.", }, - []string{"id"}, ), logIndex: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "silentct_log_index", Help: "The next log entry to be downloaded.", }, - []string{"id"}, + []string{"log_id"}, ), - logTimestamp: prometheus.NewGaugeVec( + logSize: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "silentct_log_timestamp", - Help: "The log's UNIX timestamp in ms.", + Name: "silentct_log_size", + Help: "The number of entries in the log.", }, - []string{"id"}, + []string{"log_id"}, ), - certificateAlert: prometheus.NewGaugeVec( + logTimestamp: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "silentct_certificate_alert", - Help: "The time the certificate without allowlisting was found.", - }, - []string{"stored_at"}, - ), - errorCounter: prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "silentct_error_counter", - Help: "The number of errors propagated to the main loop.", + Name: "silentct_log_timestamp", + Help: "The log's UNIX timestamp in ms.", }, + []string{"log_id"}, ), needRestart: prometheus.NewGauge( prometheus.GaugeOpts{ @@ -57,30 +53,48 @@ func NewMetrics(registry *prometheus.Registry) *Metrics { Help: "A non-zero value if the monitor needs restarting.", }, ), + unexpectedCertificateCount: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "silentct_unexpected_certificate_count", + Help: "Number of certificates without any allowlisting", + }, + []string{"log_id", "log_index", "crt_sans"}, + ), } - - registry.MustRegister(m.logSize, m.logIndex, m.logTimestamp, m.certificateAlert, m.errorCounter, m.needRestart) + registry.MustRegister( + m.errorCounter, + m.logIndex, + m.logSize, + m.logTimestamp, + m.needRestart, + m.unexpectedCertificateCount, + ) return m } func (m *Metrics) LogState(state monitor.State) { - id := state.LogID.Base64String() + id := fmt.Sprintf("%x", state.LogID) m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex)) m.logSize.WithLabelValues(id).Set(float64(state.TreeSize)) m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp)) } func (m *Metrics) RemoveLogState(state monitor.State) { - id := state.LogID.Base64String() + id := fmt.Sprintf("%x", state.LogID) m.logIndex.Delete(prometheus.Labels{"id": id}) m.logSize.Delete(prometheus.Labels{"id": id}) m.logTimestamp.Delete(prometheus.Labels{"id": id}) } -func (m *Metrics) CertificateAlert(alerts []index.CertificateInfo) { - m.certificateAlert.Reset() +func (m *Metrics) UnexpectedCertificateCount(alerts []index.CertificateInfo) { + m.unexpectedCertificateCount.Reset() for _, alert := range alerts { - m.certificateAlert.WithLabelValues(alert.StoredAt).Set(float64(alert.ObservedAt.Unix())) + labels := prometheus.Labels{ + "crt_sans": strings.Join(alert.SANs, " "), + "log_id": fmt.Sprintf("%x", alert.LogID), + "log_index": fmt.Sprintf("%d", alert.LogIndex), + } + m.unexpectedCertificateCount.With(labels).Set(1) } } -- cgit v1.2.3