aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/manager/manager.go28
-rw-r--r--internal/metrics/metrics.go39
-rw-r--r--internal/monitor/backoff.go83
3 files changed, 126 insertions, 24 deletions
diff --git a/internal/manager/manager.go b/internal/manager/manager.go
index d63d52d..b839502 100644
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -15,6 +15,7 @@ import (
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/policy"
"rgdd.se/silentct/pkg/storage"
+ "rgdd.se/silentct/pkg/storage/loglist"
)
type Config struct {
@@ -146,7 +147,7 @@ func (mgr *Manager) startupConfig() error {
return err
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
- mgr.Metrics.LogState(state)
+ mgr.Metrics.LogState(loglist.FormatLogName(log), state)
}
return nil
}
@@ -171,7 +172,7 @@ func (mgr *Manager) removeLogs(logs []metadata.Log) {
state, _ := mgr.GetMonitorState(log)
mgr.Logger.Infof("removing log %s with %d entries in its backlog\n", log.URL, state.TreeSize-state.NextIndex)
mgr.mconfigCh <- monitor.MonitoredLog{Config: log}
- mgr.Metrics.RemoveLogState(state)
+ mgr.Metrics.RemoveLogState(loglist.FormatLogName(log), state)
}
}
@@ -188,7 +189,7 @@ func (mgr *Manager) addLogs(ctx context.Context, logs []metadata.Log) {
mgr.Logger.Infof("bootstrapping log %s at next index 0\n", log.URL)
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
- mgr.Metrics.LogState(state)
+ mgr.Metrics.LogState(loglist.FormatLogName(log), state)
}
}
@@ -210,14 +211,20 @@ func (mgr *Manager) monitorJob(msg monitor.Event) error {
if err := mgr.SetMonitorState(msg.State.LogID, msg.State); err != nil {
return err
}
- mgr.Metrics.LogState(msg.State)
for _, err := range msg.Errors {
mgr.errorJob(err)
}
+
+ // no metrics update if the log has just been removed (final event)
+ name, err := mgr.Storage.LogList.LogName(msg.State.SignedTreeHead.LogID)
+ if err == nil {
+ mgr.Metrics.LogState(name, msg.State)
+ }
return nil
}
func (mgr *Manager) alertJob() error {
+ // See if there are any new unexpected certificates
alerts, err := mgr.Index.TriggerAlerts()
if err != nil {
return err
@@ -225,7 +232,18 @@ func (mgr *Manager) alertJob() error {
for _, alert := range alerts {
mgr.Logger.Noticef("unexpected certificate: no allowlisting for crt_sans=\"%s\", see log_id=\"%x\" log_index=\"%d\"\n", strings.Join(alert.SANs, " "), alert.LogID, alert.LogIndex)
}
- mgr.Metrics.UnexpectedCertificateCount(mgr.Storage.Index.Alerting())
+
+ // Update metrics for the current unexpected certificates
+ alerting := mgr.Storage.Index.Alerting()
+ var names []string
+ for _, alert := range alerting {
+ name, err := mgr.Storage.LogList.LogName(alert.LogID)
+ if err != nil {
+ name = "historic log"
+ }
+ names = append(names, name)
+ }
+ mgr.Metrics.UnexpectedCertificateCount(names, mgr.Storage.Index.Alerting())
return nil
}
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
index 7a67a9c..aae46cd 100644
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@@ -31,21 +31,21 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
Name: "silentct_log_index",
Help: "The next log entry to be downloaded.",
},
- []string{"log_id"},
+ []string{"log_id", "log_name"},
),
logSize: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "silentct_log_size",
Help: "The number of entries in the log.",
},
- []string{"log_id"},
+ []string{"log_id", "log_name"},
),
logTimestamp: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "silentct_log_timestamp",
Help: "The log's UNIX timestamp in ms.",
},
- []string{"log_id"},
+ []string{"log_id", "log_name"},
),
needRestart: prometheus.NewGauge(
prometheus.GaugeOpts{
@@ -58,7 +58,7 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
Name: "silentct_unexpected_certificate_count",
Help: "Number of certificates without any allowlisting",
},
- []string{"log_id", "log_index", "crt_sans"},
+ []string{"log_id", "log_name", "log_index", "crt_sans"},
),
}
registry.MustRegister(
@@ -72,26 +72,33 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
return m
}
-func (m *Metrics) LogState(state monitor.State) {
- id := fmt.Sprintf("%x", state.LogID)
- m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex))
- m.logSize.WithLabelValues(id).Set(float64(state.TreeSize))
- m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp))
+func (m *Metrics) LogState(logName string, state monitor.State) {
+ labels := prometheus.Labels{
+ "log_id": fmt.Sprintf("%x", state.LogID[:]),
+ "log_name": logName,
+ }
+ m.logIndex.With(labels).Set(float64(state.NextIndex))
+ m.logSize.With(labels).Set(float64(state.TreeSize))
+ m.logTimestamp.With(labels).Set(float64(state.Timestamp))
}
-func (m *Metrics) RemoveLogState(state monitor.State) {
- id := fmt.Sprintf("%x", state.LogID)
- m.logIndex.Delete(prometheus.Labels{"id": id})
- m.logSize.Delete(prometheus.Labels{"id": id})
- m.logTimestamp.Delete(prometheus.Labels{"id": id})
+func (m *Metrics) RemoveLogState(logName string, state monitor.State) {
+ labels := prometheus.Labels{
+ "log_id": fmt.Sprintf("%x", state.LogID[:]),
+ "log_name": logName,
+ }
+ m.logIndex.Delete(labels)
+ m.logSize.Delete(labels)
+ m.logTimestamp.Delete(labels)
}
-func (m *Metrics) UnexpectedCertificateCount(alerts []index.CertificateInfo) {
+func (m *Metrics) UnexpectedCertificateCount(logNames []string, alerts []index.CertificateInfo) {
m.unexpectedCertificateCount.Reset()
- for _, alert := range alerts {
+ for i, alert := range alerts {
labels := prometheus.Labels{
"crt_sans": strings.Join(alert.SANs, " "),
"log_id": fmt.Sprintf("%x", alert.LogID),
+ "log_name": logNames[i],
"log_index": fmt.Sprintf("%d", alert.LogIndex),
}
m.unexpectedCertificateCount.With(labels).Set(1)
diff --git a/internal/monitor/backoff.go b/internal/monitor/backoff.go
index 63c5f55..3bfff7e 100644
--- a/internal/monitor/backoff.go
+++ b/internal/monitor/backoff.go
@@ -2,6 +2,7 @@ package monitor
import (
"context"
+ "time"
ct "github.com/google/certificate-transparency-go"
"github.com/google/certificate-transparency-go/client"
@@ -15,6 +16,10 @@ import (
//
// For reference on this issue, see:
// https://github.com/google/certificate-transparency-go/issues/898
+//
+// Update: retries was added for get-sth and proof fetching.
+// Only because we need 3x queries that succeed in a row, and some logs seem to
+// rate limit globally for all endpoints rather than per endpoint.
type backoffClient struct {
cli *client.LogClient
}
@@ -24,15 +29,75 @@ func (bc *backoffClient) BaseURI() string {
}
func (bc *backoffClient) GetSTH(ctx context.Context) (*ct.SignedTreeHead, error) {
- return bc.cli.GetSTH(ctx)
+ backoff := 1
+ for {
+ rsp, err := bc.cli.GetSTH(ctx)
+ if err == nil {
+ return rsp, nil
+ }
+
+ jcErr, ok := err.(jsonclient.RspError)
+ if !ok {
+ return rsp, err
+ }
+ if jcErr.StatusCode != 429 {
+ return rsp, err
+ }
+ if err := sleep(ctx, time.Duration(backoff)*time.Second); err != nil {
+ return nil, err
+ }
+ if backoff < 32 {
+ backoff = 2 * backoff
+ }
+ }
}
func (bc *backoffClient) GetSTHConsistency(ctx context.Context, first, second uint64) ([][]byte, error) {
- return bc.cli.GetSTHConsistency(ctx, first, second)
+ backoff := 1
+ for {
+ rsp, err := bc.cli.GetSTHConsistency(ctx, first, second)
+ if err == nil {
+ return rsp, nil
+ }
+
+ jcErr, ok := err.(jsonclient.RspError)
+ if !ok {
+ return rsp, err
+ }
+ if jcErr.StatusCode != 429 {
+ return rsp, err
+ }
+ if err := sleep(ctx, time.Duration(backoff)*time.Second); err != nil {
+ return nil, err
+ }
+ if backoff < 32 {
+ backoff = 2 * backoff
+ }
+ }
}
func (bc *backoffClient) GetProofByHash(ctx context.Context, hash []byte, treeSize uint64) (*ct.GetProofByHashResponse, error) {
- return bc.cli.GetProofByHash(ctx, hash, treeSize)
+ backoff := 1
+ for {
+ rsp, err := bc.cli.GetProofByHash(ctx, hash, treeSize)
+ if err == nil {
+ return rsp, nil
+ }
+
+ jcErr, ok := err.(jsonclient.RspError)
+ if !ok {
+ return rsp, err
+ }
+ if jcErr.StatusCode != 429 {
+ return rsp, err
+ }
+ if err := sleep(ctx, time.Duration(backoff)*time.Second); err != nil {
+ return nil, err
+ }
+ if backoff < 32 {
+ backoff = 2 * backoff
+ }
+ }
}
func (bc *backoffClient) GetRawEntries(ctx context.Context, start, end int64) (*ct.GetEntriesResponse, error) {
@@ -54,3 +119,15 @@ func (bc *backoffClient) GetRawEntries(ctx context.Context, start, end int64) (*
}
return rsp, err
}
+
+func sleep(ctx context.Context, d time.Duration) error {
+ timer := time.NewTimer(d)
+ defer timer.Stop()
+
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-timer.C:
+ return nil
+ }
+}