diff options
author | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-05-11 20:07:41 +0200 |
---|---|---|
committer | Rasmus Dahlberg <rgdd@glasklarteknik.se> | 2025-05-11 20:23:33 +0200 |
commit | f073493c3d11a4d743f0ee1c3f4b423c51f60e29 (patch) | |
tree | 3aafdeb2d3ddd1ec898468897a6e759bf2a7e709 | |
parent | 5285be7f31ea8c043566e4e9f1ed5acd50168837 (diff) |
metrics: Add human-meaningful log_name labelmain
Use log metadata description if available, otherwise fall back on log
URL without the https:// suffix.
Keeping log_id for now at least - seems useful for scripts.
-rwxr-xr-x | contrib/silentct-check | 16 | ||||
-rw-r--r-- | docs/metrics.md | 16 | ||||
-rw-r--r-- | internal/manager/manager.go | 28 | ||||
-rw-r--r-- | internal/metrics/metrics.go | 39 | ||||
-rw-r--r-- | pkg/storage/loglist/loglist.go | 4 | ||||
-rw-r--r-- | pkg/storage/loglist/metadata.go | 26 |
6 files changed, 95 insertions, 34 deletions
diff --git a/contrib/silentct-check b/contrib/silentct-check index 97aa2b2..712517d 100755 --- a/contrib/silentct-check +++ b/contrib/silentct-check @@ -64,17 +64,17 @@ while IFS= read -r line; do case "$line" in silentct_log_index*) - key=$(extract_label "$line" "log_id") + key=$(extract_label "$line" "log_name") value=$(to_integer "$(extract_value "$line")") log_index["$key"]=$value ;; silentct_log_size*) - key=$(extract_label "$line" "log_id") + key=$(extract_label "$line" "log_name") value=$(to_integer "$(extract_value "$line")") log_size["$key"]=$value ;; silentct_log_timestamp*) - key=$(extract_label "$line" "log_id") + key=$(extract_label "$line" "log_name") value=$(to_integer "$(extract_value "$line")") log_timestamp["$key"]=$((value / 1000)) ;; @@ -92,12 +92,12 @@ done <"$metrics_file" #------------------------------------------------------------------------------- now=$(date +%s) -for log_id in "${!log_size[@]}"; do - backlog=$((log_size[$log_id] - log_index[$log_id])) - elapsed=$((now - log_timestamp[$log_id])) +for log_name in "${!log_size[@]}"; do + backlog=$((log_size[$log_name] - log_index[$log_name])) + elapsed=$((now - log_timestamp[$log_name])) - ((backlog < ALERT_BACKLOG)) || notice "log $log_id -- backlog is at $backlog" - ((elapsed < ALERT_FRESHNESS)) || notice "log $log_id -- latest timestamp at $(date -d @"${log_timestamp[$log_id]}")" + ((backlog < ALERT_BACKLOG)) || notice "$log_name -- backlog is at $backlog" + ((elapsed < ALERT_FRESHNESS)) || notice "$log_name -- latest timestamp at $(date -d @"${log_timestamp[$log_name]}")" done [[ $need_restart == 0 ]] || notice "silentct-mon needs to be restarted" diff --git a/docs/metrics.md b/docs/metrics.md index aac873e..627776a 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -27,31 +27,37 @@ Do not use for alerting, this metric is too noisy and currently used for debug. ``` # HELP silentct_log_index The next log entry to be downloaded. # TYPE silentct_log_index gauge -silentct_log_index{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df"} 7.30980064e+08 +silentct_log_index{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df",log_name="Google 'Argon2025h1'} 7.30980064e+08 ``` `log_id` is a unique log identifier in hex, computed as in RFC 6962 §3.2. +`log_name` is a human-meaningful name of the log. + ## `"silentct_log_size"` ``` # HELP silentct_log_size The number of entries in the log. # TYPE silentct_log_size gauge -silentct_log_size{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df"} 7.31044085e+08 +silentct_log_size{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df",log_name="Google 'Argon2025h1'} 7.31044085e+08 ``` `log_id` is a unique log identifier in hex, computed as in RFC 6962 §3.2. +`log_name` is a human-meaningful name of the log. + ## `"silentct_log_timestamp"` ``` # HELP silentct_log_timestamp The log's UNIX timestamp in ms. # TYPE silentct_log_timestamp gauge -silentct_log_timestamp{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df"} 1.737202578179e+12 +silentct_log_timestamp{log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df",log_name="Google 'Argon2025h1'} 1.737202578179e+12 ``` `log_id` is a unique log identifier in hex, computed as in RFC 6962 §3.2. +`log_name` is a human-meaningful name of the log. + ## `"silentct_need_restart"` ``` @@ -68,7 +74,7 @@ implementation can assure that all corner-cases are handled without restarts. ``` # HELP silentct_unexpected_certificate_count Number of certificates without any allowlisting # TYPE silentct_unexpected_certificate_count gauge -silentct_unexpected_certificate_count{crt_sans="example.org www.example.org",log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df",log_index="1234"} 1 +silentct_unexpected_certificate_count{crt_sans="example.org www.example.org",log_id="4e75a3275c9a10c3385b6cd4df3f52eb1df0e08e1b8d69c0b1fa64b1629a39df",log_index="1234",log_name="Google 'Argon2025h1'} 1 ``` `crt_sans` are the subject alternative names in the unexpected certificate, @@ -78,6 +84,8 @@ space separated. `log_index` specifies the log entry that contains the unexpected certificate. +`log_name` is a human-meaningful name of the log. + See `STATE_DIRECTORY/crt_found/<log_id>-<log_index>.*` for further details. The `.json` file contains the downloaded log entry. The `.ascii` file contains the parsed leaf certificate in a human-readable format to make debugging easier. diff --git a/internal/manager/manager.go b/internal/manager/manager.go index d63d52d..b839502 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -15,6 +15,7 @@ import ( "rgdd.se/silentct/internal/monitor" "rgdd.se/silentct/pkg/policy" "rgdd.se/silentct/pkg/storage" + "rgdd.se/silentct/pkg/storage/loglist" ) type Config struct { @@ -146,7 +147,7 @@ func (mgr *Manager) startupConfig() error { return err } mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state} - mgr.Metrics.LogState(state) + mgr.Metrics.LogState(loglist.FormatLogName(log), state) } return nil } @@ -171,7 +172,7 @@ func (mgr *Manager) removeLogs(logs []metadata.Log) { state, _ := mgr.GetMonitorState(log) mgr.Logger.Infof("removing log %s with %d entries in its backlog\n", log.URL, state.TreeSize-state.NextIndex) mgr.mconfigCh <- monitor.MonitoredLog{Config: log} - mgr.Metrics.RemoveLogState(state) + mgr.Metrics.RemoveLogState(loglist.FormatLogName(log), state) } } @@ -188,7 +189,7 @@ func (mgr *Manager) addLogs(ctx context.Context, logs []metadata.Log) { mgr.Logger.Infof("bootstrapping log %s at next index 0\n", log.URL) } mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state} - mgr.Metrics.LogState(state) + mgr.Metrics.LogState(loglist.FormatLogName(log), state) } } @@ -210,14 +211,20 @@ func (mgr *Manager) monitorJob(msg monitor.Event) error { if err := mgr.SetMonitorState(msg.State.LogID, msg.State); err != nil { return err } - mgr.Metrics.LogState(msg.State) for _, err := range msg.Errors { mgr.errorJob(err) } + + // no metrics update if the log has just been removed (final event) + name, err := mgr.Storage.LogList.LogName(msg.State.SignedTreeHead.LogID) + if err == nil { + mgr.Metrics.LogState(name, msg.State) + } return nil } func (mgr *Manager) alertJob() error { + // See if there are any new unexpected certificates alerts, err := mgr.Index.TriggerAlerts() if err != nil { return err @@ -225,7 +232,18 @@ func (mgr *Manager) alertJob() error { for _, alert := range alerts { mgr.Logger.Noticef("unexpected certificate: no allowlisting for crt_sans=\"%s\", see log_id=\"%x\" log_index=\"%d\"\n", strings.Join(alert.SANs, " "), alert.LogID, alert.LogIndex) } - mgr.Metrics.UnexpectedCertificateCount(mgr.Storage.Index.Alerting()) + + // Update metrics for the current unexpected certificates + alerting := mgr.Storage.Index.Alerting() + var names []string + for _, alert := range alerting { + name, err := mgr.Storage.LogList.LogName(alert.LogID) + if err != nil { + name = "historic log" + } + names = append(names, name) + } + mgr.Metrics.UnexpectedCertificateCount(names, mgr.Storage.Index.Alerting()) return nil } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 7a67a9c..aae46cd 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -31,21 +31,21 @@ func NewMetrics(registry *prometheus.Registry) *Metrics { Name: "silentct_log_index", Help: "The next log entry to be downloaded.", }, - []string{"log_id"}, + []string{"log_id", "log_name"}, ), logSize: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "silentct_log_size", Help: "The number of entries in the log.", }, - []string{"log_id"}, + []string{"log_id", "log_name"}, ), logTimestamp: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "silentct_log_timestamp", Help: "The log's UNIX timestamp in ms.", }, - []string{"log_id"}, + []string{"log_id", "log_name"}, ), needRestart: prometheus.NewGauge( prometheus.GaugeOpts{ @@ -58,7 +58,7 @@ func NewMetrics(registry *prometheus.Registry) *Metrics { Name: "silentct_unexpected_certificate_count", Help: "Number of certificates without any allowlisting", }, - []string{"log_id", "log_index", "crt_sans"}, + []string{"log_id", "log_name", "log_index", "crt_sans"}, ), } registry.MustRegister( @@ -72,26 +72,33 @@ func NewMetrics(registry *prometheus.Registry) *Metrics { return m } -func (m *Metrics) LogState(state monitor.State) { - id := fmt.Sprintf("%x", state.LogID) - m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex)) - m.logSize.WithLabelValues(id).Set(float64(state.TreeSize)) - m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp)) +func (m *Metrics) LogState(logName string, state monitor.State) { + labels := prometheus.Labels{ + "log_id": fmt.Sprintf("%x", state.LogID[:]), + "log_name": logName, + } + m.logIndex.With(labels).Set(float64(state.NextIndex)) + m.logSize.With(labels).Set(float64(state.TreeSize)) + m.logTimestamp.With(labels).Set(float64(state.Timestamp)) } -func (m *Metrics) RemoveLogState(state monitor.State) { - id := fmt.Sprintf("%x", state.LogID) - m.logIndex.Delete(prometheus.Labels{"id": id}) - m.logSize.Delete(prometheus.Labels{"id": id}) - m.logTimestamp.Delete(prometheus.Labels{"id": id}) +func (m *Metrics) RemoveLogState(logName string, state monitor.State) { + labels := prometheus.Labels{ + "log_id": fmt.Sprintf("%x", state.LogID[:]), + "log_name": logName, + } + m.logIndex.Delete(labels) + m.logSize.Delete(labels) + m.logTimestamp.Delete(labels) } -func (m *Metrics) UnexpectedCertificateCount(alerts []index.CertificateInfo) { +func (m *Metrics) UnexpectedCertificateCount(logNames []string, alerts []index.CertificateInfo) { m.unexpectedCertificateCount.Reset() - for _, alert := range alerts { + for i, alert := range alerts { labels := prometheus.Labels{ "crt_sans": strings.Join(alert.SANs, " "), "log_id": fmt.Sprintf("%x", alert.LogID), + "log_name": logNames[i], "log_index": fmt.Sprintf("%d", alert.LogIndex), } m.unexpectedCertificateCount.With(labels).Set(1) diff --git a/pkg/storage/loglist/loglist.go b/pkg/storage/loglist/loglist.go index a37cb32..f282113 100644 --- a/pkg/storage/loglist/loglist.go +++ b/pkg/storage/loglist/loglist.go @@ -72,6 +72,10 @@ func New(cfg Config) (LogList, error) { return ll, nil } +func (ll *LogList) LogName(logID [32]byte) (string, error) { + return metadataLogName(ll.md, logID) +} + func (ll *LogList) IsRecent() bool { return time.Now().Before(ll.md.CreatedAt.Add(ll.cfg.MetadataIsRecent)) } diff --git a/pkg/storage/loglist/metadata.go b/pkg/storage/loglist/metadata.go index adacf81..96d035c 100644 --- a/pkg/storage/loglist/metadata.go +++ b/pkg/storage/loglist/metadata.go @@ -1,6 +1,11 @@ package loglist -import "gitlab.torproject.org/rgdd/ct/pkg/metadata" +import ( + "fmt" + "strings" + + "gitlab.torproject.org/rgdd/ct/pkg/metadata" +) // FIXME: helpers that should probably be in the upstream package @@ -13,6 +18,25 @@ func metadataFindLog(md metadata.Metadata, target metadata.Log) bool { return false } +func metadataLogName(md metadata.Metadata, targetID [32]byte) (string, error) { + for _, operator := range md.Operators { + for _, log := range operator.Logs { + id, _ := log.Key.ID() + if id == targetID { + return FormatLogName(log), nil + } + } + } + return "", fmt.Errorf("no match for log ID: %x", targetID[:]) +} + +func FormatLogName(log metadata.Log) string { + if log.Description != nil { + return *log.Description + } + return strings.TrimSuffix("https://", string(log.URL)) +} + func findLog(logs []metadata.Log, target metadata.Log) bool { targetID, _ := target.Key.ID() for _, log := range logs { |