From e18d36ebae30536c77c61cd5da123991e0ca1629 Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sun, 31 Dec 2023 09:39:25 +0100 Subject: Add drafty prototype --- pkg/monitor/chunks.go | 88 --------------- pkg/monitor/errors.go | 41 ------- pkg/monitor/matcher.go | 90 --------------- pkg/monitor/messages.go | 40 ------- pkg/monitor/monitor.go | 286 ------------------------------------------------ 5 files changed, 545 deletions(-) delete mode 100644 pkg/monitor/chunks.go delete mode 100644 pkg/monitor/errors.go delete mode 100644 pkg/monitor/matcher.go delete mode 100644 pkg/monitor/messages.go delete mode 100644 pkg/monitor/monitor.go (limited to 'pkg/monitor') diff --git a/pkg/monitor/chunks.go b/pkg/monitor/chunks.go deleted file mode 100644 index 87871b9..0000000 --- a/pkg/monitor/chunks.go +++ /dev/null @@ -1,88 +0,0 @@ -package monitor - -// -// A min heap of chunks, oredered on each chunk's start index. -// -// Credit: inspiration to use a heap from Aaron Gable, see -// https://github.com/aarongable/ctaudit -// - -import ( - "container/heap" - "crypto/sha256" -) - -type chunk struct { - startIndex uint64 // Index of the first leaf - leafHashes [][sha256.Size]byte // List of consecutive leaf hashes - matches []LogEntry // Leaves that matches some criteria - errors []error // Errors that ocurred while parsing leaves -} - -type chunks []*chunk - -func newChunks() *chunks { - var h chunks - heap.Init((*internal)(&h)) - return &h -} - -func (h *chunks) push(c *chunk) { - heap.Push((*internal)(h), c) -} - -func (h *chunks) pop() *chunk { - x := heap.Pop((*internal)(h)) - return x.(*chunk) -} - -// gap returns true if there's a gap between the provided start index and the -// top most chunk. If the top most chunk is in sequence, it is merged with -// any following chunks that are also in sequence to form one larger chunk. -func (h *chunks) gap(start uint64) bool { - if len(*h) == 0 { - return true - } - - top := h.pop() - if start != top.startIndex { - h.push(top) - return true - } - - for len(*h) > 0 { - c := h.pop() - if c.startIndex != top.startIndex+uint64(len(top.leafHashes)) { - h.push(c) - break - } - - top.leafHashes = append(top.leafHashes, c.leafHashes...) - top.matches = append(top.matches, c.matches...) - top.errors = append(top.errors, c.errors...) - } - - h.push(top) - return false -} - -// internal implements the heap interface, see example: -// https://cs.opensource.google/go/go/+/refs/tags/go1.21.5:src/container/heap/example_intheap_test.go -type internal chunks - -func (h internal) Len() int { return len(h) } -func (h internal) Less(i, j int) bool { return h[i].startIndex < h[j].startIndex } -func (h internal) Swap(i, j int) { h[i], h[j] = h[j], h[i] } - -func (h *internal) Push(x any) { - *h = append(*h, x.(*chunk)) -} - -func (h *internal) Pop() any { - old := *h - n := len(old) - x := old[n-1] - old[n-1] = nil // avoid memory leak - *h = old[:n-1] - return x -} diff --git a/pkg/monitor/errors.go b/pkg/monitor/errors.go deleted file mode 100644 index 4d676af..0000000 --- a/pkg/monitor/errors.go +++ /dev/null @@ -1,41 +0,0 @@ -package monitor - -import ( - "fmt" - - ct "github.com/google/certificate-transparency-go" -) - -// ErrorFetch occurs if there's a problem hitting the log's HTTP API. An STH is -// provided if available, since it might carry evidence of some log misbehavior. -type ErrorFetch struct { - URL string - Msg string - Err error - STH *ct.SignedTreeHead -} - -func (e ErrorFetch) Error() string { - return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err) -} - -// ErrorMerkleTree occurs if the log's Merkle tree can't be verified. An STH is -// provided if available (i.e., won't be available for internal tree building). -type ErrorMerkleTree struct { - URL string - Msg string - Err error - STH *ct.SignedTreeHead -} - -func (e ErrorMerkleTree) Error() string { - return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err) -} - -// TODO: MMD violations -// TODO: Growing read-only logs - -// noout implements the Logger interface to discard unwanted output -type noout struct{} - -func (n *noout) Printf(string, ...interface{}) {} diff --git a/pkg/monitor/matcher.go b/pkg/monitor/matcher.go deleted file mode 100644 index fa3a894..0000000 --- a/pkg/monitor/matcher.go +++ /dev/null @@ -1,90 +0,0 @@ -package monitor - -import ( - "fmt" - "strings" - - ct "github.com/google/certificate-transparency-go" -) - -type Matcher interface { - Match(leafInput, extraData []byte) (bool, error) -} - -// MatchAll matches all certificates -type MatchAll struct{} - -func (m *MatchAll) Match(leafInput, extraData []byte) (bool, error) { - return true, nil -} - -// MatchWildcards matches a list of wildcards, see the MatchWildcard type -type MatchWildcards []MatchWildcard - -func (m *MatchWildcards) Match(leafInput, extraData []byte) (bool, error) { - sans, err := getSANs(ct.LeafEntry{LeafInput: leafInput, ExtraData: extraData}) - if err != nil { - return false, err - } - return m.match(sans), nil -} - -func (m *MatchWildcards) match(sans []string) bool { - for _, mw := range (*m)[:] { - if mw.match(sans) { - return true - } - } - return false -} - -// MatchWildcard exclude matches for `.*\.`, but will -// otherwise match on any `.*\.` as well as SANs equal to . -// -// For example, let be example.org and Exclude be [foo, bar]. Then -// example.org and www.example.org would match, whereas foo.example.org, -// sub.foo.example.org, and bar.example.org. would not match. -type MatchWildcard struct { - Wildcard string `json:"wildcard"` - Excludes []string `json:"excludes"` -} - -func (m *MatchWildcard) match(sans []string) bool { - for _, san := range sans { - if san == m.Wildcard { - return true - } - if strings.HasSuffix(san, "."+m.Wildcard) && !m.exclude(san) { - return true - } - } - return false -} - -func (m *MatchWildcard) exclude(san string) bool { - for _, exclude := range m.Excludes { - suffix := exclude + "." + m.Wildcard - if strings.HasSuffix(san, suffix) { - return true - } - } - return false -} - -func getSANs(entry ct.LeafEntry) ([]string, error) { - // Warning: here be dragons, parsing of DNS names in certificates... - e, err := ct.LogEntryFromLeaf(0, &entry) - if err != nil { - return nil, fmt.Errorf("parse leaf: %v", err) - } - if e.Precert == nil && e.X509Cert == nil { - return nil, fmt.Errorf("neither precertificate nor certificate in leaf") - } - if e.Precert != nil && e.X509Cert != nil { - return nil, fmt.Errorf("both certificate and precertificate in leaf") - } - if e.Precert != nil { - return e.Precert.TBSCertificate.DNSNames, nil - } - return e.X509Cert.DNSNames, nil -} diff --git a/pkg/monitor/messages.go b/pkg/monitor/messages.go deleted file mode 100644 index 717aae6..0000000 --- a/pkg/monitor/messages.go +++ /dev/null @@ -1,40 +0,0 @@ -package monitor - -import ( - ct "github.com/google/certificate-transparency-go" - "gitlab.torproject.org/rgdd/ct/pkg/metadata" -) - -// MessageLogConfig provides information about a log the monitor is downloading -type MessageLogConfig struct { - Metadata metadata.Log - State MonitorState -} - -// MessageLogProgress is the next log state and any encountered leaves that were -// considered matching since the previous log state. Parse errors are included. -type MessageLogProgress struct { - State MonitorState - Matches []LogEntry - Errors []error -} - -// MonitorState describes the monitor's state for a particular log. The signed tree -// head is the latest verified append-only state that was observed. The index -// is the next leaf which will be downloaded and processed by the monitor. -type MonitorState struct { - LogState - NextIndex uint64 -} - -// LogState describes the state of a log -type LogState struct { - ct.SignedTreeHead -} - -// LogEntry is a leaf in a log's Merkle tree -type LogEntry struct { - LeafIndex uint64 - LeafData []byte - ExtraData []byte -} diff --git a/pkg/monitor/monitor.go b/pkg/monitor/monitor.go deleted file mode 100644 index 5f7a629..0000000 --- a/pkg/monitor/monitor.go +++ /dev/null @@ -1,286 +0,0 @@ -// Package monitor provides a Certificate Transparency monitor that tails a list -// of logs which can be updated dynamically while running. All emitted progress -// messages have been verified by the monitor to be included in the log's -// append-only Merkle tree with regard to the initial start-up state. It is up -// to the user to process the monitor's progress, errors, and persist state. -// -// Implement the Matcher interface to customize which certificates should be -// included in a log's progress messages, or use any of the existing matchers -// provided by this package (see for example MatchAll and MatchWildcards). -package monitor - -import ( - "context" - "crypto/sha256" - "fmt" - "net/http" - "sync" - "time" - - ct "github.com/google/certificate-transparency-go" - "github.com/google/certificate-transparency-go/client" - "github.com/google/certificate-transparency-go/jsonclient" - "github.com/google/certificate-transparency-go/scanner" - "rgdd.se/silent-ct/internal/merkle" -) - -const ( - UserAgentPrefix = "rgdd.se/silent-ct" - DefaultContact = "unknown-user" - DefaultChunkSize = 256 // TODO: increase me - DefaultBatchSize = 128 // TODO: increase me - DefaultNumWorkers = 2 -) - -type Config struct { - Contact string // Something that help log operators get in touch - ChunkSize int // Min number of leaves to propagate a chunk without matches - BatchSize int // Max number of certificates to accept per worker - NumWorkers int // Number of parallel workers to use for each log - - // Callback determines which certificates are interesting to detect - Callback Matcher -} - -type Monitor struct { - Config -} - -func New(cfg Config) (Monitor, error) { - if cfg.Contact == "" { - cfg.Contact = "unknown-user" - } - if cfg.ChunkSize <= 0 { - cfg.ChunkSize = DefaultChunkSize - } - if cfg.BatchSize <= 0 { - cfg.BatchSize = DefaultBatchSize - } - if cfg.NumWorkers <= 0 { - cfg.NumWorkers = DefaultNumWorkers - } - if cfg.Callback == nil { - cfg.Callback = &MatchAll{} - } - return Monitor{Config: cfg}, nil -} - -func (mon *Monitor) Run(ctx context.Context, metadataCh chan []MessageLogConfig, eventCh chan MessageLogProgress, errorCh chan error) { - var wg sync.WaitGroup - var sctx context.Context - var cancel context.CancelFunc - - for { - select { - case <-ctx.Done(): - return - case metadata := <-metadataCh: - fmt.Printf("DEBUG: received new list with %d logs\n", len(metadata)) - if cancel != nil { - fmt.Printf("DEBUG: stopping all log tailers\n") - cancel() - wg.Wait() - } - - sctx, cancel = context.WithCancel(ctx) - for _, md := range metadata { - fmt.Printf("DEBUG: starting log tailer %s\n", md.Metadata.URL) - wg.Add(1) - go func(lcfg MessageLogConfig) { - defer wg.Done() - - opts := jsonclient.Options{Logger: &noout{}, UserAgent: UserAgentPrefix + ":" + mon.Contact} - cli, err := client.New(string(lcfg.Metadata.URL), &http.Client{}, opts) - if err != nil { - errorCh <- fmt.Errorf("unable to configure %s: %v", lcfg.Metadata.URL, err) - return - } - - chunkCh := make(chan *chunk) - defer close(chunkCh) - - t := tail{mon.Config, *cli, chunkCh, eventCh, errorCh} - if err := t.run(sctx, lcfg.State); err != nil { - errorCh <- fmt.Errorf("unable to continue tailing %s: %v", lcfg.Metadata.URL, err) - } - }(md) - } - } - } -} - -type tail struct { - mcfg Config - cli client.LogClient - - chunkCh chan *chunk - eventCh chan MessageLogProgress - errorCh chan error -} - -func (t *tail) run(ctx context.Context, state MonitorState) error { - var wg sync.WaitGroup - defer wg.Wait() - - mctx, cancel := context.WithCancel(ctx) - defer cancel() - - wg.Add(1) - go func() { - defer wg.Done() - defer cancel() - t.sequence(mctx, state) - }() - - fetcher := scanner.NewFetcher(&t.cli, &scanner.FetcherOptions{ - BatchSize: t.mcfg.BatchSize, - StartIndex: int64(state.NextIndex), - ParallelFetch: t.mcfg.NumWorkers, - Continuous: true, - }) - callback := func(eb scanner.EntryBatch) { - c := chunk{startIndex: uint64(eb.Start)} - for i := 0; i < len(eb.Entries); i++ { - c.leafHashes = append(c.leafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput)) - match, err := t.mcfg.Callback.Match(eb.Entries[i].LeafInput, eb.Entries[i].ExtraData) - if err != nil { - c.errors = append(c.errors, fmt.Errorf("while processing index %d for %s: %v", i, t.cli.BaseURI(), err)) - } else if match { - c.matches = append(c.matches, LogEntry{ - LeafIndex: uint64(i), - LeafData: eb.Entries[i].LeafInput, - ExtraData: eb.Entries[i].ExtraData, - }) - } - } - t.chunkCh <- &c - } - return fetcher.Run(mctx, callback) -} - -func (t *tail) sequence(ctx context.Context, state MonitorState) { - heap := newChunks() - for { - select { - case <-ctx.Done(): - return - case c := <-t.chunkCh: - heap.push(c) - if heap.gap(state.NextIndex) { - continue - } - c = heap.pop() - if len(c.matches) == 0 && len(c.leafHashes) < t.mcfg.ChunkSize { - heap.push(c) - continue // TODO: don't trigger if we havn't run nextState for too long - } - nextState, err := t.nextState(ctx, state, c) - if err != nil { - t.errorCh <- err - heap.push(c) - continue - } - - state = nextState - t.eventCh <- MessageLogProgress{State: state, Matches: c.matches, Errors: c.errors} - } - } -} - -func (t *tail) nextState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) { - newState, err := t.nextConsistentState(ctx, state) - if err != nil { - return MonitorState{}, err - } - newState, err = t.nextIncludedState(ctx, state, c) - if err != nil { - return MonitorState{}, err - } - return newState, nil -} - -func (t *tail) nextConsistentState(ctx context.Context, state MonitorState) (MonitorState, error) { - sth, err := getSignedTreeHead(ctx, &t.cli) - if err != nil { - return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-sth", Err: err} - } - oldSize := state.TreeSize - oldRoot := state.SHA256RootHash - newSize := sth.TreeSize - newRoot := sth.SHA256RootHash - - proof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize) - if err != nil { - return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", STH: sth, Err: err} - } - if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(proof)); err != nil { - return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", STH: sth, Err: err} - } - - fmt.Printf("DEBUG: consistently updated STH from size %d to %d\n", oldSize, newSize) - return MonitorState{LogState: LogState{*sth}, NextIndex: state.NextIndex}, nil -} - -func (t *tail) nextIncludedState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) { - leafHash := c.leafHashes[0] - oldSize := state.NextIndex + uint64(len(c.leafHashes)) - iproof, err := getInclusionProof(ctx, &t.cli, leafHash, oldSize) - if err != nil { - err = fmt.Errorf("leaf hash %x and tree size %d: %v", leafHash[:], oldSize, err) - return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-inclusion", Err: err} - } - if got, want := uint64(iproof.LeafIndex), state.NextIndex; got != want { - err := fmt.Errorf("leaf hash %x and tree size %d: expected leaf index %d but got %d", leafHash[:], oldSize, got, want) - return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "proof-index", Err: err} - } - oldRoot, err := merkle.TreeHeadFromRangeProof(c.leafHashes, state.NextIndex, unslice(iproof.AuditPath)) - if err != nil { - return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "inclusion", Err: err} - } - - newSize := state.TreeSize - newRoot := state.SHA256RootHash - cproof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize) - if err != nil { - err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err) - return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", Err: err} - } - if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(cproof)); err != nil { - err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err) - return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", Err: err} - } - - state.NextIndex += uint64(len(c.leafHashes)) - return state, nil -} - -func getInclusionProof(ctx context.Context, cli *client.LogClient, leafHash [sha256.Size]byte, size uint64) (*ct.GetProofByHashResponse, error) { - rctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - return cli.GetProofByHash(rctx, leafHash[:], size) -} - -func getConsistencyProof(ctx context.Context, cli *client.LogClient, oldSize, newSize uint64) ([][]byte, error) { - if oldSize == 0 || oldSize >= newSize { - return [][]byte{}, nil - } - rctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - return cli.GetSTHConsistency(rctx, oldSize, newSize) -} - -func getSignedTreeHead(ctx context.Context, cli *client.LogClient) (*ct.SignedTreeHead, error) { - rctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - return cli.GetSTH(rctx) -} - -func unslice(hashes [][]byte) [][sha256.Size]byte { - var ret [][sha256.Size]byte - for _, hash := range hashes { - var h [sha256.Size]byte - copy(h[:], hash) - ret = append(ret, h) - } - return ret -} -- cgit v1.2.3