aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/manager/manager.go31
-rw-r--r--internal/metrics/metrics.go105
-rw-r--r--internal/monitor/backoff.go56
-rw-r--r--internal/monitor/monitor.go3
4 files changed, 146 insertions, 49 deletions
diff --git a/internal/manager/manager.go b/internal/manager/manager.go
index 90f6507..b839502 100644
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"os"
+ "strings"
"time"
"gitlab.torproject.org/rgdd/ct/pkg/metadata"
@@ -14,6 +15,7 @@ import (
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/policy"
"rgdd.se/silentct/pkg/storage"
+ "rgdd.se/silentct/pkg/storage/loglist"
)
type Config struct {
@@ -145,7 +147,7 @@ func (mgr *Manager) startupConfig() error {
return err
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
- mgr.Metrics.LogState(state)
+ mgr.Metrics.LogState(loglist.FormatLogName(log), state)
}
return nil
}
@@ -170,7 +172,7 @@ func (mgr *Manager) removeLogs(logs []metadata.Log) {
state, _ := mgr.GetMonitorState(log)
mgr.Logger.Infof("removing log %s with %d entries in its backlog\n", log.URL, state.TreeSize-state.NextIndex)
mgr.mconfigCh <- monitor.MonitoredLog{Config: log}
- mgr.Metrics.RemoveLogState(state)
+ mgr.Metrics.RemoveLogState(loglist.FormatLogName(log), state)
}
}
@@ -187,7 +189,7 @@ func (mgr *Manager) addLogs(ctx context.Context, logs []metadata.Log) {
mgr.Logger.Infof("bootstrapping log %s at next index 0\n", log.URL)
}
mgr.mconfigCh <- monitor.MonitoredLog{Config: log, State: state}
- mgr.Metrics.LogState(state)
+ mgr.Metrics.LogState(loglist.FormatLogName(log), state)
}
}
@@ -209,22 +211,39 @@ func (mgr *Manager) monitorJob(msg monitor.Event) error {
if err := mgr.SetMonitorState(msg.State.LogID, msg.State); err != nil {
return err
}
- mgr.Metrics.LogState(msg.State)
for _, err := range msg.Errors {
mgr.errorJob(err)
}
+
+ // no metrics update if the log has just been removed (final event)
+ name, err := mgr.Storage.LogList.LogName(msg.State.SignedTreeHead.LogID)
+ if err == nil {
+ mgr.Metrics.LogState(name, msg.State)
+ }
return nil
}
func (mgr *Manager) alertJob() error {
+ // See if there are any new unexpected certificates
alerts, err := mgr.Index.TriggerAlerts()
if err != nil {
return err
}
for _, alert := range alerts {
- mgr.Logger.Noticef("certificate mis-issuance? No allowlisting for %s\n", alert.StoredAt)
+ mgr.Logger.Noticef("unexpected certificate: no allowlisting for crt_sans=\"%s\", see log_id=\"%x\" log_index=\"%d\"\n", strings.Join(alert.SANs, " "), alert.LogID, alert.LogIndex)
+ }
+
+ // Update metrics for the current unexpected certificates
+ alerting := mgr.Storage.Index.Alerting()
+ var names []string
+ for _, alert := range alerting {
+ name, err := mgr.Storage.LogList.LogName(alert.LogID)
+ if err != nil {
+ name = "historic log"
+ }
+ names = append(names, name)
}
- mgr.Metrics.CertificateAlert(mgr.Storage.Index.Alerting())
+ mgr.Metrics.UnexpectedCertificateCount(names, mgr.Storage.Index.Alerting())
return nil
}
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
index c5ff0d6..aae46cd 100644
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@@ -1,55 +1,51 @@
package metrics
import (
+ "fmt"
+ "strings"
+
"github.com/prometheus/client_golang/prometheus"
"rgdd.se/silentct/internal/monitor"
"rgdd.se/silentct/pkg/storage/index"
)
type Metrics struct {
- logSize *prometheus.GaugeVec
- logIndex *prometheus.GaugeVec
- logTimestamp *prometheus.GaugeVec
- certificateAlert *prometheus.GaugeVec
- errorCounter prometheus.Counter
- needRestart prometheus.Gauge
+ errorCounter prometheus.Counter
+ logIndex *prometheus.GaugeVec
+ logSize *prometheus.GaugeVec
+ logTimestamp *prometheus.GaugeVec
+ needRestart prometheus.Gauge
+ unexpectedCertificateCount *prometheus.GaugeVec
}
func NewMetrics(registry *prometheus.Registry) *Metrics {
m := &Metrics{
- logSize: prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "silentct_log_size",
- Help: "The number of entries in the log.",
+ errorCounter: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "silentct_error_counter",
+ Help: "The number of errors propagated to the main loop.",
},
- []string{"id"},
),
logIndex: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "silentct_log_index",
Help: "The next log entry to be downloaded.",
},
- []string{"id"},
+ []string{"log_id", "log_name"},
),
- logTimestamp: prometheus.NewGaugeVec(
+ logSize: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
- Name: "silentct_log_timestamp",
- Help: "The log's UNIX timestamp in ms.",
+ Name: "silentct_log_size",
+ Help: "The number of entries in the log.",
},
- []string{"id"},
+ []string{"log_id", "log_name"},
),
- certificateAlert: prometheus.NewGaugeVec(
+ logTimestamp: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
- Name: "silentct_certificate_alert",
- Help: "The time the certificate without allowlisting was found.",
- },
- []string{"stored_at"},
- ),
- errorCounter: prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "silentct_error_counter",
- Help: "The number of errors propagated to the main loop.",
+ Name: "silentct_log_timestamp",
+ Help: "The log's UNIX timestamp in ms.",
},
+ []string{"log_id", "log_name"},
),
needRestart: prometheus.NewGauge(
prometheus.GaugeOpts{
@@ -57,30 +53,55 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
Help: "A non-zero value if the monitor needs restarting.",
},
),
+ unexpectedCertificateCount: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "silentct_unexpected_certificate_count",
+ Help: "Number of certificates without any allowlisting",
+ },
+ []string{"log_id", "log_name", "log_index", "crt_sans"},
+ ),
}
-
- registry.MustRegister(m.logSize, m.logIndex, m.logTimestamp, m.certificateAlert, m.errorCounter, m.needRestart)
+ registry.MustRegister(
+ m.errorCounter,
+ m.logIndex,
+ m.logSize,
+ m.logTimestamp,
+ m.needRestart,
+ m.unexpectedCertificateCount,
+ )
return m
}
-func (m *Metrics) LogState(state monitor.State) {
- id := state.LogID.Base64String()
- m.logIndex.WithLabelValues(id).Set(float64(state.NextIndex))
- m.logSize.WithLabelValues(id).Set(float64(state.TreeSize))
- m.logTimestamp.WithLabelValues(id).Set(float64(state.Timestamp))
+func (m *Metrics) LogState(logName string, state monitor.State) {
+ labels := prometheus.Labels{
+ "log_id": fmt.Sprintf("%x", state.LogID[:]),
+ "log_name": logName,
+ }
+ m.logIndex.With(labels).Set(float64(state.NextIndex))
+ m.logSize.With(labels).Set(float64(state.TreeSize))
+ m.logTimestamp.With(labels).Set(float64(state.Timestamp))
}
-func (m *Metrics) RemoveLogState(state monitor.State) {
- id := state.LogID.Base64String()
- m.logIndex.Delete(prometheus.Labels{"id": id})
- m.logSize.Delete(prometheus.Labels{"id": id})
- m.logTimestamp.Delete(prometheus.Labels{"id": id})
+func (m *Metrics) RemoveLogState(logName string, state monitor.State) {
+ labels := prometheus.Labels{
+ "log_id": fmt.Sprintf("%x", state.LogID[:]),
+ "log_name": logName,
+ }
+ m.logIndex.Delete(labels)
+ m.logSize.Delete(labels)
+ m.logTimestamp.Delete(labels)
}
-func (m *Metrics) CertificateAlert(alerts []index.CertificateInfo) {
- m.certificateAlert.Reset()
- for _, alert := range alerts {
- m.certificateAlert.WithLabelValues(alert.StoredAt).Set(float64(alert.ObservedAt.Unix()))
+func (m *Metrics) UnexpectedCertificateCount(logNames []string, alerts []index.CertificateInfo) {
+ m.unexpectedCertificateCount.Reset()
+ for i, alert := range alerts {
+ labels := prometheus.Labels{
+ "crt_sans": strings.Join(alert.SANs, " "),
+ "log_id": fmt.Sprintf("%x", alert.LogID),
+ "log_name": logNames[i],
+ "log_index": fmt.Sprintf("%d", alert.LogIndex),
+ }
+ m.unexpectedCertificateCount.With(labels).Set(1)
}
}
diff --git a/internal/monitor/backoff.go b/internal/monitor/backoff.go
new file mode 100644
index 0000000..63c5f55
--- /dev/null
+++ b/internal/monitor/backoff.go
@@ -0,0 +1,56 @@
+package monitor
+
+import (
+ "context"
+
+ ct "github.com/google/certificate-transparency-go"
+ "github.com/google/certificate-transparency-go/client"
+ "github.com/google/certificate-transparency-go/jsonclient"
+ "github.com/google/trillian/client/backoff"
+)
+
+// backoffClient wraps client.LogClient so that we always backoff on get-entries
+// 4XX and 5XX. Backoff is on by default for get-sth already, and our silentct
+// usage is guaranteed to not do any hammering on any of the proof endpoints.
+//
+// For reference on this issue, see:
+// https://github.com/google/certificate-transparency-go/issues/898
+type backoffClient struct {
+ cli *client.LogClient
+}
+
+func (bc *backoffClient) BaseURI() string {
+ return bc.cli.BaseURI()
+}
+
+func (bc *backoffClient) GetSTH(ctx context.Context) (*ct.SignedTreeHead, error) {
+ return bc.cli.GetSTH(ctx)
+}
+
+func (bc *backoffClient) GetSTHConsistency(ctx context.Context, first, second uint64) ([][]byte, error) {
+ return bc.cli.GetSTHConsistency(ctx, first, second)
+}
+
+func (bc *backoffClient) GetProofByHash(ctx context.Context, hash []byte, treeSize uint64) (*ct.GetProofByHashResponse, error) {
+ return bc.cli.GetProofByHash(ctx, hash, treeSize)
+}
+
+func (bc *backoffClient) GetRawEntries(ctx context.Context, start, end int64) (*ct.GetEntriesResponse, error) {
+ rsp, err := bc.cli.GetRawEntries(ctx, start, end)
+ if err != nil {
+ jcErr, ok := err.(jsonclient.RspError)
+ if !ok {
+ return rsp, err
+ }
+ if jcErr.StatusCode < 400 || jcErr.StatusCode >= 600 {
+ return rsp, err
+ }
+ // This ensures we never start hammering when the status code is 4XX or
+ // 5XX. Probably not the right thing to do in all cases, but since the
+ // download library we're using starts hammering if the log suddenly
+ // serves something unexpected this seems like a good safety precaution.
+ // Users of the silentct monitor eventually notice they get no entries.
+ return rsp, backoff.RetriableErrorf("get-entries: %v", err)
+ }
+ return rsp, err
+}
diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go
index 1f068b2..2575977 100644
--- a/internal/monitor/monitor.go
+++ b/internal/monitor/monitor.go
@@ -173,7 +173,8 @@ func (mon *Monitor) newTailRFC6962(log MonitoredLog) (tail, error) {
return tail{}, err
}
- return tail{cfg: mon.cfg, scanner: cli, checker: cli, matcher: mon.matcher}, nil
+ bc := &backoffClient{cli: cli}
+ return tail{cfg: mon.cfg, scanner: bc, checker: bc, matcher: mon.matcher}, nil
}
func (mon *Monitor) newTailTile(cfg MonitoredLog) (tail, error) {