aboutsummaryrefslogtreecommitdiff
path: root/pkg/monitor
diff options
context:
space:
mode:
authorRasmus Dahlberg <rasmus@rgdd.se>2023-12-31 09:39:25 +0100
committerRasmus Dahlberg <rasmus@rgdd.se>2024-01-07 20:22:23 +0100
commite18d36ebae30536c77c61cd5da123991e0ca1629 (patch)
treebf4880c0019a6009ab1b671e23ef4a1a4a5e8e08 /pkg/monitor
parent54d980afcbd6f0011d6a162e0003587d26a3e311 (diff)
Add drafty prototype
Diffstat (limited to 'pkg/monitor')
-rw-r--r--pkg/monitor/chunks.go88
-rw-r--r--pkg/monitor/errors.go41
-rw-r--r--pkg/monitor/matcher.go90
-rw-r--r--pkg/monitor/messages.go40
-rw-r--r--pkg/monitor/monitor.go286
5 files changed, 0 insertions, 545 deletions
diff --git a/pkg/monitor/chunks.go b/pkg/monitor/chunks.go
deleted file mode 100644
index 87871b9..0000000
--- a/pkg/monitor/chunks.go
+++ /dev/null
@@ -1,88 +0,0 @@
-package monitor
-
-//
-// A min heap of chunks, oredered on each chunk's start index.
-//
-// Credit: inspiration to use a heap from Aaron Gable, see
-// https://github.com/aarongable/ctaudit
-//
-
-import (
- "container/heap"
- "crypto/sha256"
-)
-
-type chunk struct {
- startIndex uint64 // Index of the first leaf
- leafHashes [][sha256.Size]byte // List of consecutive leaf hashes
- matches []LogEntry // Leaves that matches some criteria
- errors []error // Errors that ocurred while parsing leaves
-}
-
-type chunks []*chunk
-
-func newChunks() *chunks {
- var h chunks
- heap.Init((*internal)(&h))
- return &h
-}
-
-func (h *chunks) push(c *chunk) {
- heap.Push((*internal)(h), c)
-}
-
-func (h *chunks) pop() *chunk {
- x := heap.Pop((*internal)(h))
- return x.(*chunk)
-}
-
-// gap returns true if there's a gap between the provided start index and the
-// top most chunk. If the top most chunk is in sequence, it is merged with
-// any following chunks that are also in sequence to form one larger chunk.
-func (h *chunks) gap(start uint64) bool {
- if len(*h) == 0 {
- return true
- }
-
- top := h.pop()
- if start != top.startIndex {
- h.push(top)
- return true
- }
-
- for len(*h) > 0 {
- c := h.pop()
- if c.startIndex != top.startIndex+uint64(len(top.leafHashes)) {
- h.push(c)
- break
- }
-
- top.leafHashes = append(top.leafHashes, c.leafHashes...)
- top.matches = append(top.matches, c.matches...)
- top.errors = append(top.errors, c.errors...)
- }
-
- h.push(top)
- return false
-}
-
-// internal implements the heap interface, see example:
-// https://cs.opensource.google/go/go/+/refs/tags/go1.21.5:src/container/heap/example_intheap_test.go
-type internal chunks
-
-func (h internal) Len() int { return len(h) }
-func (h internal) Less(i, j int) bool { return h[i].startIndex < h[j].startIndex }
-func (h internal) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
-
-func (h *internal) Push(x any) {
- *h = append(*h, x.(*chunk))
-}
-
-func (h *internal) Pop() any {
- old := *h
- n := len(old)
- x := old[n-1]
- old[n-1] = nil // avoid memory leak
- *h = old[:n-1]
- return x
-}
diff --git a/pkg/monitor/errors.go b/pkg/monitor/errors.go
deleted file mode 100644
index 4d676af..0000000
--- a/pkg/monitor/errors.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package monitor
-
-import (
- "fmt"
-
- ct "github.com/google/certificate-transparency-go"
-)
-
-// ErrorFetch occurs if there's a problem hitting the log's HTTP API. An STH is
-// provided if available, since it might carry evidence of some log misbehavior.
-type ErrorFetch struct {
- URL string
- Msg string
- Err error
- STH *ct.SignedTreeHead
-}
-
-func (e ErrorFetch) Error() string {
- return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err)
-}
-
-// ErrorMerkleTree occurs if the log's Merkle tree can't be verified. An STH is
-// provided if available (i.e., won't be available for internal tree building).
-type ErrorMerkleTree struct {
- URL string
- Msg string
- Err error
- STH *ct.SignedTreeHead
-}
-
-func (e ErrorMerkleTree) Error() string {
- return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err)
-}
-
-// TODO: MMD violations
-// TODO: Growing read-only logs
-
-// noout implements the Logger interface to discard unwanted output
-type noout struct{}
-
-func (n *noout) Printf(string, ...interface{}) {}
diff --git a/pkg/monitor/matcher.go b/pkg/monitor/matcher.go
deleted file mode 100644
index fa3a894..0000000
--- a/pkg/monitor/matcher.go
+++ /dev/null
@@ -1,90 +0,0 @@
-package monitor
-
-import (
- "fmt"
- "strings"
-
- ct "github.com/google/certificate-transparency-go"
-)
-
-type Matcher interface {
- Match(leafInput, extraData []byte) (bool, error)
-}
-
-// MatchAll matches all certificates
-type MatchAll struct{}
-
-func (m *MatchAll) Match(leafInput, extraData []byte) (bool, error) {
- return true, nil
-}
-
-// MatchWildcards matches a list of wildcards, see the MatchWildcard type
-type MatchWildcards []MatchWildcard
-
-func (m *MatchWildcards) Match(leafInput, extraData []byte) (bool, error) {
- sans, err := getSANs(ct.LeafEntry{LeafInput: leafInput, ExtraData: extraData})
- if err != nil {
- return false, err
- }
- return m.match(sans), nil
-}
-
-func (m *MatchWildcards) match(sans []string) bool {
- for _, mw := range (*m)[:] {
- if mw.match(sans) {
- return true
- }
- }
- return false
-}
-
-// MatchWildcard exclude matches for `.*<Exclude>\.<Wildcard>`, but will
-// otherwise match on any `.*\.<Wildcard>` as well as SANs equal to <Wildcard>.
-//
-// For example, let <Wildcard> be example.org and Exclude be [foo, bar]. Then
-// example.org and www.example.org would match, whereas foo.example.org,
-// sub.foo.example.org, and bar.example.org. would not match.
-type MatchWildcard struct {
- Wildcard string `json:"wildcard"`
- Excludes []string `json:"excludes"`
-}
-
-func (m *MatchWildcard) match(sans []string) bool {
- for _, san := range sans {
- if san == m.Wildcard {
- return true
- }
- if strings.HasSuffix(san, "."+m.Wildcard) && !m.exclude(san) {
- return true
- }
- }
- return false
-}
-
-func (m *MatchWildcard) exclude(san string) bool {
- for _, exclude := range m.Excludes {
- suffix := exclude + "." + m.Wildcard
- if strings.HasSuffix(san, suffix) {
- return true
- }
- }
- return false
-}
-
-func getSANs(entry ct.LeafEntry) ([]string, error) {
- // Warning: here be dragons, parsing of DNS names in certificates...
- e, err := ct.LogEntryFromLeaf(0, &entry)
- if err != nil {
- return nil, fmt.Errorf("parse leaf: %v", err)
- }
- if e.Precert == nil && e.X509Cert == nil {
- return nil, fmt.Errorf("neither precertificate nor certificate in leaf")
- }
- if e.Precert != nil && e.X509Cert != nil {
- return nil, fmt.Errorf("both certificate and precertificate in leaf")
- }
- if e.Precert != nil {
- return e.Precert.TBSCertificate.DNSNames, nil
- }
- return e.X509Cert.DNSNames, nil
-}
diff --git a/pkg/monitor/messages.go b/pkg/monitor/messages.go
deleted file mode 100644
index 717aae6..0000000
--- a/pkg/monitor/messages.go
+++ /dev/null
@@ -1,40 +0,0 @@
-package monitor
-
-import (
- ct "github.com/google/certificate-transparency-go"
- "gitlab.torproject.org/rgdd/ct/pkg/metadata"
-)
-
-// MessageLogConfig provides information about a log the monitor is downloading
-type MessageLogConfig struct {
- Metadata metadata.Log
- State MonitorState
-}
-
-// MessageLogProgress is the next log state and any encountered leaves that were
-// considered matching since the previous log state. Parse errors are included.
-type MessageLogProgress struct {
- State MonitorState
- Matches []LogEntry
- Errors []error
-}
-
-// MonitorState describes the monitor's state for a particular log. The signed tree
-// head is the latest verified append-only state that was observed. The index
-// is the next leaf which will be downloaded and processed by the monitor.
-type MonitorState struct {
- LogState
- NextIndex uint64
-}
-
-// LogState describes the state of a log
-type LogState struct {
- ct.SignedTreeHead
-}
-
-// LogEntry is a leaf in a log's Merkle tree
-type LogEntry struct {
- LeafIndex uint64
- LeafData []byte
- ExtraData []byte
-}
diff --git a/pkg/monitor/monitor.go b/pkg/monitor/monitor.go
deleted file mode 100644
index 5f7a629..0000000
--- a/pkg/monitor/monitor.go
+++ /dev/null
@@ -1,286 +0,0 @@
-// Package monitor provides a Certificate Transparency monitor that tails a list
-// of logs which can be updated dynamically while running. All emitted progress
-// messages have been verified by the monitor to be included in the log's
-// append-only Merkle tree with regard to the initial start-up state. It is up
-// to the user to process the monitor's progress, errors, and persist state.
-//
-// Implement the Matcher interface to customize which certificates should be
-// included in a log's progress messages, or use any of the existing matchers
-// provided by this package (see for example MatchAll and MatchWildcards).
-package monitor
-
-import (
- "context"
- "crypto/sha256"
- "fmt"
- "net/http"
- "sync"
- "time"
-
- ct "github.com/google/certificate-transparency-go"
- "github.com/google/certificate-transparency-go/client"
- "github.com/google/certificate-transparency-go/jsonclient"
- "github.com/google/certificate-transparency-go/scanner"
- "rgdd.se/silent-ct/internal/merkle"
-)
-
-const (
- UserAgentPrefix = "rgdd.se/silent-ct"
- DefaultContact = "unknown-user"
- DefaultChunkSize = 256 // TODO: increase me
- DefaultBatchSize = 128 // TODO: increase me
- DefaultNumWorkers = 2
-)
-
-type Config struct {
- Contact string // Something that help log operators get in touch
- ChunkSize int // Min number of leaves to propagate a chunk without matches
- BatchSize int // Max number of certificates to accept per worker
- NumWorkers int // Number of parallel workers to use for each log
-
- // Callback determines which certificates are interesting to detect
- Callback Matcher
-}
-
-type Monitor struct {
- Config
-}
-
-func New(cfg Config) (Monitor, error) {
- if cfg.Contact == "" {
- cfg.Contact = "unknown-user"
- }
- if cfg.ChunkSize <= 0 {
- cfg.ChunkSize = DefaultChunkSize
- }
- if cfg.BatchSize <= 0 {
- cfg.BatchSize = DefaultBatchSize
- }
- if cfg.NumWorkers <= 0 {
- cfg.NumWorkers = DefaultNumWorkers
- }
- if cfg.Callback == nil {
- cfg.Callback = &MatchAll{}
- }
- return Monitor{Config: cfg}, nil
-}
-
-func (mon *Monitor) Run(ctx context.Context, metadataCh chan []MessageLogConfig, eventCh chan MessageLogProgress, errorCh chan error) {
- var wg sync.WaitGroup
- var sctx context.Context
- var cancel context.CancelFunc
-
- for {
- select {
- case <-ctx.Done():
- return
- case metadata := <-metadataCh:
- fmt.Printf("DEBUG: received new list with %d logs\n", len(metadata))
- if cancel != nil {
- fmt.Printf("DEBUG: stopping all log tailers\n")
- cancel()
- wg.Wait()
- }
-
- sctx, cancel = context.WithCancel(ctx)
- for _, md := range metadata {
- fmt.Printf("DEBUG: starting log tailer %s\n", md.Metadata.URL)
- wg.Add(1)
- go func(lcfg MessageLogConfig) {
- defer wg.Done()
-
- opts := jsonclient.Options{Logger: &noout{}, UserAgent: UserAgentPrefix + ":" + mon.Contact}
- cli, err := client.New(string(lcfg.Metadata.URL), &http.Client{}, opts)
- if err != nil {
- errorCh <- fmt.Errorf("unable to configure %s: %v", lcfg.Metadata.URL, err)
- return
- }
-
- chunkCh := make(chan *chunk)
- defer close(chunkCh)
-
- t := tail{mon.Config, *cli, chunkCh, eventCh, errorCh}
- if err := t.run(sctx, lcfg.State); err != nil {
- errorCh <- fmt.Errorf("unable to continue tailing %s: %v", lcfg.Metadata.URL, err)
- }
- }(md)
- }
- }
- }
-}
-
-type tail struct {
- mcfg Config
- cli client.LogClient
-
- chunkCh chan *chunk
- eventCh chan MessageLogProgress
- errorCh chan error
-}
-
-func (t *tail) run(ctx context.Context, state MonitorState) error {
- var wg sync.WaitGroup
- defer wg.Wait()
-
- mctx, cancel := context.WithCancel(ctx)
- defer cancel()
-
- wg.Add(1)
- go func() {
- defer wg.Done()
- defer cancel()
- t.sequence(mctx, state)
- }()
-
- fetcher := scanner.NewFetcher(&t.cli, &scanner.FetcherOptions{
- BatchSize: t.mcfg.BatchSize,
- StartIndex: int64(state.NextIndex),
- ParallelFetch: t.mcfg.NumWorkers,
- Continuous: true,
- })
- callback := func(eb scanner.EntryBatch) {
- c := chunk{startIndex: uint64(eb.Start)}
- for i := 0; i < len(eb.Entries); i++ {
- c.leafHashes = append(c.leafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput))
- match, err := t.mcfg.Callback.Match(eb.Entries[i].LeafInput, eb.Entries[i].ExtraData)
- if err != nil {
- c.errors = append(c.errors, fmt.Errorf("while processing index %d for %s: %v", i, t.cli.BaseURI(), err))
- } else if match {
- c.matches = append(c.matches, LogEntry{
- LeafIndex: uint64(i),
- LeafData: eb.Entries[i].LeafInput,
- ExtraData: eb.Entries[i].ExtraData,
- })
- }
- }
- t.chunkCh <- &c
- }
- return fetcher.Run(mctx, callback)
-}
-
-func (t *tail) sequence(ctx context.Context, state MonitorState) {
- heap := newChunks()
- for {
- select {
- case <-ctx.Done():
- return
- case c := <-t.chunkCh:
- heap.push(c)
- if heap.gap(state.NextIndex) {
- continue
- }
- c = heap.pop()
- if len(c.matches) == 0 && len(c.leafHashes) < t.mcfg.ChunkSize {
- heap.push(c)
- continue // TODO: don't trigger if we havn't run nextState for too long
- }
- nextState, err := t.nextState(ctx, state, c)
- if err != nil {
- t.errorCh <- err
- heap.push(c)
- continue
- }
-
- state = nextState
- t.eventCh <- MessageLogProgress{State: state, Matches: c.matches, Errors: c.errors}
- }
- }
-}
-
-func (t *tail) nextState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) {
- newState, err := t.nextConsistentState(ctx, state)
- if err != nil {
- return MonitorState{}, err
- }
- newState, err = t.nextIncludedState(ctx, state, c)
- if err != nil {
- return MonitorState{}, err
- }
- return newState, nil
-}
-
-func (t *tail) nextConsistentState(ctx context.Context, state MonitorState) (MonitorState, error) {
- sth, err := getSignedTreeHead(ctx, &t.cli)
- if err != nil {
- return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-sth", Err: err}
- }
- oldSize := state.TreeSize
- oldRoot := state.SHA256RootHash
- newSize := sth.TreeSize
- newRoot := sth.SHA256RootHash
-
- proof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize)
- if err != nil {
- return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", STH: sth, Err: err}
- }
- if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(proof)); err != nil {
- return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", STH: sth, Err: err}
- }
-
- fmt.Printf("DEBUG: consistently updated STH from size %d to %d\n", oldSize, newSize)
- return MonitorState{LogState: LogState{*sth}, NextIndex: state.NextIndex}, nil
-}
-
-func (t *tail) nextIncludedState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) {
- leafHash := c.leafHashes[0]
- oldSize := state.NextIndex + uint64(len(c.leafHashes))
- iproof, err := getInclusionProof(ctx, &t.cli, leafHash, oldSize)
- if err != nil {
- err = fmt.Errorf("leaf hash %x and tree size %d: %v", leafHash[:], oldSize, err)
- return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-inclusion", Err: err}
- }
- if got, want := uint64(iproof.LeafIndex), state.NextIndex; got != want {
- err := fmt.Errorf("leaf hash %x and tree size %d: expected leaf index %d but got %d", leafHash[:], oldSize, got, want)
- return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "proof-index", Err: err}
- }
- oldRoot, err := merkle.TreeHeadFromRangeProof(c.leafHashes, state.NextIndex, unslice(iproof.AuditPath))
- if err != nil {
- return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "inclusion", Err: err}
- }
-
- newSize := state.TreeSize
- newRoot := state.SHA256RootHash
- cproof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize)
- if err != nil {
- err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err)
- return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", Err: err}
- }
- if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(cproof)); err != nil {
- err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err)
- return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", Err: err}
- }
-
- state.NextIndex += uint64(len(c.leafHashes))
- return state, nil
-}
-
-func getInclusionProof(ctx context.Context, cli *client.LogClient, leafHash [sha256.Size]byte, size uint64) (*ct.GetProofByHashResponse, error) {
- rctx, cancel := context.WithTimeout(ctx, 10*time.Second)
- defer cancel()
- return cli.GetProofByHash(rctx, leafHash[:], size)
-}
-
-func getConsistencyProof(ctx context.Context, cli *client.LogClient, oldSize, newSize uint64) ([][]byte, error) {
- if oldSize == 0 || oldSize >= newSize {
- return [][]byte{}, nil
- }
- rctx, cancel := context.WithTimeout(ctx, 10*time.Second)
- defer cancel()
- return cli.GetSTHConsistency(rctx, oldSize, newSize)
-}
-
-func getSignedTreeHead(ctx context.Context, cli *client.LogClient) (*ct.SignedTreeHead, error) {
- rctx, cancel := context.WithTimeout(ctx, 10*time.Second)
- defer cancel()
- return cli.GetSTH(rctx)
-}
-
-func unslice(hashes [][]byte) [][sha256.Size]byte {
- var ret [][sha256.Size]byte
- for _, hash := range hashes {
- var h [sha256.Size]byte
- copy(h[:], hash)
- ret = append(ret, h)
- }
- return ret
-}