aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-18 13:39:40 +0100
committerRasmus Dahlberg <rgdd@glasklarteknik.se>2025-01-18 16:34:14 +0100
commit8a17817c61f14a727a1017a5bcd4b1ea82964528 (patch)
treeec0fa96bfc683e906413106f2db2b99d710dc389 /internal
parent2d3b1f2cb0c05385c1702f1a7d74fa08d52c262f (diff)
prometheus: Refine based on input from anarcatmain
https://gitlab.torproject.org/tpo/tpa/team/-/issues/40677
Diffstat (limited to 'internal')
-rw-r--r--internal/manager/manager.go5
-rw-r--r--internal/metrics/metrics.go80
2 files changed, 50 insertions, 35 deletions
diff --git a/internal/manager/manager.go b/internal/manager/manager.go
index 90f6507..d63d52d 100644
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"os"
+ "strings"
"time"
"gitlab.torproject.org/rgdd/ct/pkg/metadata"
@@ -222,9 +223,9 @@ func (mgr *Manager) alertJob() error {
return err
}
for _, alert := range alerts {
- mgr.Logger.Noticef("certificate mis-issuance? No allowlisting for %s\n", alert.StoredAt)
+ mgr.Logger.Noticef("unexpected certificate: no allowlisting for crt_sans=\"%s\", see log_id=\"%x\" log_index=\"%d\"\n", strings.Join(alert.SANs, " "), alert.LogID, alert.LogIndex)
}
- mgr.Metrics.CertificateAlert(mgr.Storage.Index.Alerting())
+ mgr.Metrics.UnexpectedCertificateCount(mgr.Storage.Index.Alerting())
return nil
}
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
index c5ff0d6..7a67a9c 100644
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@@ -1,55 +1,51 @@
package metrics
import (
+ "fmt"
+ "strings"
+
"github.com/prometheus/client_golang/prometheus"
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/storage/index"
)
type Metrics struct {
- logSize *prometheus.GaugeVec
- logIndex *prometheus.GaugeVec
- logTimestamp *prometheus.GaugeVec
- certificateAlert *prometheus.GaugeVec
- errorCounter prometheus.Counter
- needRestart prometheus.Gauge
+ errorCounter prometheus.Counter
+ logIndex *prometheus.GaugeVec
+ logSize *prometheus.GaugeVec
+ logTimestamp *prometheus.GaugeVec
+ needRestart prometheus.Gauge
+ unexpectedCertificateCount *prometheus.GaugeVec
}
func NewMetrics(registry *prometheus.Registry) *Metrics {
m := &Metrics{
- logSize: prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "silentct_log_size",
- Help: "The number of entries in the log.",
+ errorCounter: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "silentct_error_counter",
+ Help: "The number of errors propagated to the main loop.",
},
- []string{"id"},
),
logIndex: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "silentct_log_index",
Help: "The next log entry to be downloaded.",
},
- []string{"id"},
+ []string{"log_id"},
),
- logTimestamp: prometheus.NewGaugeVec(
+ logSize: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
- Name: "silentct_log_timestamp",
- Help: "The log's UNIX timestamp in ms.",
+ Name: "silentct_log_size",
+ Help: "The number of entries in the log.",
},
- []string{"id"},
+ []string{"log_id"},
),
- certificateAlert: prometheus.NewGaugeVec(
+ logTimestamp: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
- Name: "silentct_certificate_alert",
- Help: "The time the certificate without allowlisting was found.",
- },
- []string{"stored_at"},
- ),
- errorCounter: prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "silentct_error_counter",
- Help: "The number of errors propagated to the main loop.",
+ Name: "silentct_log_timestamp",
+ Help: "The log's UNIX timestamp in ms.",
},
+ []string{"log_id"},
),
needRestart: prometheus.NewGauge(
prometheus.GaugeOpts{
@@ -57,30 +53,48 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
Help: "A non-zero value if the monitor needs restarting.",
},
),
+ unexpectedCertificateCount: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_unexpected_certificate_count",
+ Help: "Number of certificates without any allowlisting",
+ },
+ []string{"log_id", "log_index", "crt_sans"},
+ ),
}
-
- registry.MustRegister(m.logSize, m.logIndex, m.logTimestamp, m.certificateAlert, m.errorCounter, m.needRestart)
+ registry.MustRegister(
+ m.errorCounter,
+ m.logIndex,
+ m.logSize,
+ m.logTimestamp,
+ m.needRestart,
+ m.unexpectedCertificateCount,
+ )
return m
}
func (m *Metrics) LogState(state monitor.State) {
- id := state.LogID.Base64String()
+ id := fmt.Sprintf("%x", state.LogID)
m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex))
m.logSize.WithLabelValues(id).Set(float64(state.TreeSize))
m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp))
}
func (m *Metrics) RemoveLogState(state monitor.State) {
- id := state.LogID.Base64String()
+ id := fmt.Sprintf("%x", state.LogID)
m.logIndex.Delete(prometheus.Labels{"id": id})
m.logSize.Delete(prometheus.Labels{"id": id})
m.logTimestamp.Delete(prometheus.Labels{"id": id})
}
-func (m *Metrics) CertificateAlert(alerts []index.CertificateInfo) {
- m.certificateAlert.Reset()
+func (m *Metrics) UnexpectedCertificateCount(alerts []index.CertificateInfo) {
+ m.unexpectedCertificateCount.Reset()
for _, alert := range alerts {
- m.certificateAlert.WithLabelValues(alert.StoredAt).Set(float64(alert.ObservedAt.Unix()))
+ labels := prometheus.Labels{
+ "crt_sans": strings.Join(alert.SANs, " "),
+ "log_id": fmt.Sprintf("%x", alert.LogID),
+ "log_index": fmt.Sprintf("%d", alert.LogIndex),
+ }
+ m.unexpectedCertificateCount.With(labels).Set(1)
}
}