diff options
| author | Rasmus Dahlberg <rasmus@rgdd.se> | 2023-12-09 17:08:45 +0100 | 
|---|---|---|
| committer | Rasmus Dahlberg <rasmus@rgdd.se> | 2023-12-10 20:38:21 +0100 | 
| commit | 895d5fea41177e444c18f4fdc820fffa5f67d5bf (patch) | |
| tree | 42fd1e9507384abcd9c6e82f18ccca813f8b6296 /pkg/monitor | |
| parent | f124940f75e1f49100fe5381019d2f65d6915304 (diff) | |
Add drafty skeleton
Diffstat (limited to 'pkg/monitor')
| -rw-r--r-- | pkg/monitor/chunks.go | 88 | ||||
| -rw-r--r-- | pkg/monitor/errors.go | 41 | ||||
| -rw-r--r-- | pkg/monitor/matcher.go | 90 | ||||
| -rw-r--r-- | pkg/monitor/messages.go | 40 | ||||
| -rw-r--r-- | pkg/monitor/monitor.go | 286 | 
5 files changed, 545 insertions, 0 deletions
| diff --git a/pkg/monitor/chunks.go b/pkg/monitor/chunks.go new file mode 100644 index 0000000..87871b9 --- /dev/null +++ b/pkg/monitor/chunks.go @@ -0,0 +1,88 @@ +package monitor + +// +// A min heap of chunks, oredered on each chunk's start index. +// +// Credit: inspiration to use a heap from Aaron Gable, see +// https://github.com/aarongable/ctaudit +// + +import ( +	"container/heap" +	"crypto/sha256" +) + +type chunk struct { +	startIndex uint64              // Index of the first leaf +	leafHashes [][sha256.Size]byte // List of consecutive leaf hashes +	matches    []LogEntry          // Leaves that matches some criteria +	errors     []error             // Errors that ocurred while parsing leaves +} + +type chunks []*chunk + +func newChunks() *chunks { +	var h chunks +	heap.Init((*internal)(&h)) +	return &h +} + +func (h *chunks) push(c *chunk) { +	heap.Push((*internal)(h), c) +} + +func (h *chunks) pop() *chunk { +	x := heap.Pop((*internal)(h)) +	return x.(*chunk) +} + +// gap returns true if there's a gap between the provided start index and the +// top most chunk.  If the top most chunk is in sequence, it is merged with +// any following chunks that are also in sequence to form one larger chunk. +func (h *chunks) gap(start uint64) bool { +	if len(*h) == 0 { +		return true +	} + +	top := h.pop() +	if start != top.startIndex { +		h.push(top) +		return true +	} + +	for len(*h) > 0 { +		c := h.pop() +		if c.startIndex != top.startIndex+uint64(len(top.leafHashes)) { +			h.push(c) +			break +		} + +		top.leafHashes = append(top.leafHashes, c.leafHashes...) +		top.matches = append(top.matches, c.matches...) +		top.errors = append(top.errors, c.errors...) +	} + +	h.push(top) +	return false +} + +// internal implements the heap interface, see example: +// https://cs.opensource.google/go/go/+/refs/tags/go1.21.5:src/container/heap/example_intheap_test.go +type internal chunks + +func (h internal) Len() int           { return len(h) } +func (h internal) Less(i, j int) bool { return h[i].startIndex < h[j].startIndex } +func (h internal) Swap(i, j int)      { h[i], h[j] = h[j], h[i] } + +func (h *internal) Push(x any) { +	*h = append(*h, x.(*chunk)) +} + +func (h *internal) Pop() any { +	old := *h +	n := len(old) +	x := old[n-1] +	old[n-1] = nil // avoid memory leak +	*h = old[:n-1] +	return x +} diff --git a/pkg/monitor/errors.go b/pkg/monitor/errors.go new file mode 100644 index 0000000..4d676af --- /dev/null +++ b/pkg/monitor/errors.go @@ -0,0 +1,41 @@ +package monitor + +import ( +	"fmt" + +	ct "github.com/google/certificate-transparency-go" +) + +// ErrorFetch occurs if there's a problem hitting the log's HTTP API.  An STH is +// provided if available, since it might carry evidence of some log misbehavior. +type ErrorFetch struct { +	URL string +	Msg string +	Err error +	STH *ct.SignedTreeHead +} + +func (e ErrorFetch) Error() string { +	return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err) +} + +// ErrorMerkleTree occurs if the log's Merkle tree can't be verified.  An STH is +// provided if available (i.e., won't be available for internal tree building). +type ErrorMerkleTree struct { +	URL string +	Msg string +	Err error +	STH *ct.SignedTreeHead +} + +func (e ErrorMerkleTree) Error() string { +	return fmt.Sprintf("%s: %s: %v", e.URL, e.Msg, e.Err) +} + +// TODO: MMD violations +// TODO: Growing read-only logs + +// noout implements the Logger interface to discard unwanted output +type noout struct{} + +func (n *noout) Printf(string, ...interface{}) {} diff --git a/pkg/monitor/matcher.go b/pkg/monitor/matcher.go new file mode 100644 index 0000000..fa3a894 --- /dev/null +++ b/pkg/monitor/matcher.go @@ -0,0 +1,90 @@ +package monitor + +import ( +	"fmt" +	"strings" + +	ct "github.com/google/certificate-transparency-go" +) + +type Matcher interface { +	Match(leafInput, extraData []byte) (bool, error) +} + +// MatchAll matches all certificates +type MatchAll struct{} + +func (m *MatchAll) Match(leafInput, extraData []byte) (bool, error) { +	return true, nil +} + +// MatchWildcards matches a list of wildcards, see the MatchWildcard type +type MatchWildcards []MatchWildcard + +func (m *MatchWildcards) Match(leafInput, extraData []byte) (bool, error) { +	sans, err := getSANs(ct.LeafEntry{LeafInput: leafInput, ExtraData: extraData}) +	if err != nil { +		return false, err +	} +	return m.match(sans), nil +} + +func (m *MatchWildcards) match(sans []string) bool { +	for _, mw := range (*m)[:] { +		if mw.match(sans) { +			return true +		} +	} +	return false +} + +// MatchWildcard exclude matches for `.*<Exclude>\.<Wildcard>`, but will +// otherwise match on any `.*\.<Wildcard>` as well as SANs equal to <Wildcard>. +// +// For example, let <Wildcard> be example.org and Exclude be [foo, bar].  Then +// example.org and www.example.org would match, whereas foo.example.org, +// sub.foo.example.org, and bar.example.org. would not match. +type MatchWildcard struct { +	Wildcard string   `json:"wildcard"` +	Excludes []string `json:"excludes"` +} + +func (m *MatchWildcard) match(sans []string) bool { +	for _, san := range sans { +		if san == m.Wildcard { +			return true +		} +		if strings.HasSuffix(san, "."+m.Wildcard) && !m.exclude(san) { +			return true +		} +	} +	return false +} + +func (m *MatchWildcard) exclude(san string) bool { +	for _, exclude := range m.Excludes { +		suffix := exclude + "." + m.Wildcard +		if strings.HasSuffix(san, suffix) { +			return true +		} +	} +	return false +} + +func getSANs(entry ct.LeafEntry) ([]string, error) { +	// Warning: here be dragons, parsing of DNS names in certificates... +	e, err := ct.LogEntryFromLeaf(0, &entry) +	if err != nil { +		return nil, fmt.Errorf("parse leaf: %v", err) +	} +	if e.Precert == nil && e.X509Cert == nil { +		return nil, fmt.Errorf("neither precertificate nor certificate in leaf") +	} +	if e.Precert != nil && e.X509Cert != nil { +		return nil, fmt.Errorf("both certificate and precertificate in leaf") +	} +	if e.Precert != nil { +		return e.Precert.TBSCertificate.DNSNames, nil +	} +	return e.X509Cert.DNSNames, nil +} diff --git a/pkg/monitor/messages.go b/pkg/monitor/messages.go new file mode 100644 index 0000000..717aae6 --- /dev/null +++ b/pkg/monitor/messages.go @@ -0,0 +1,40 @@ +package monitor + +import ( +	ct "github.com/google/certificate-transparency-go" +	"gitlab.torproject.org/rgdd/ct/pkg/metadata" +) + +// MessageLogConfig provides information about a log the monitor is downloading +type MessageLogConfig struct { +	Metadata metadata.Log +	State    MonitorState +} + +// MessageLogProgress is the next log state and any encountered leaves that were +// considered matching since the previous log state.  Parse errors are included. +type MessageLogProgress struct { +	State   MonitorState +	Matches []LogEntry +	Errors  []error +} + +// MonitorState describes the monitor's state for a particular log.  The signed tree +// head is the latest verified append-only state that was observed.  The index +// is the next leaf which will be downloaded and processed by the monitor. +type MonitorState struct { +	LogState +	NextIndex uint64 +} + +// LogState describes the state of a log +type LogState struct { +	ct.SignedTreeHead +} + +// LogEntry is a leaf in a log's Merkle tree +type LogEntry struct { +	LeafIndex uint64 +	LeafData  []byte +	ExtraData []byte +} diff --git a/pkg/monitor/monitor.go b/pkg/monitor/monitor.go new file mode 100644 index 0000000..5f7a629 --- /dev/null +++ b/pkg/monitor/monitor.go @@ -0,0 +1,286 @@ +// Package monitor provides a Certificate Transparency monitor that tails a list +// of logs which can be updated dynamically while running.  All emitted progress +// messages have been verified by the monitor to be included in the log's +// append-only Merkle tree with regard to the initial start-up state.  It is up +// to the user to process the monitor's progress, errors, and persist state. +// +// Implement the Matcher interface to customize which certificates should be +// included in a log's progress messages, or use any of the existing matchers +// provided by this package (see for example MatchAll and MatchWildcards). +package monitor + +import ( +	"context" +	"crypto/sha256" +	"fmt" +	"net/http" +	"sync" +	"time" + +	ct "github.com/google/certificate-transparency-go" +	"github.com/google/certificate-transparency-go/client" +	"github.com/google/certificate-transparency-go/jsonclient" +	"github.com/google/certificate-transparency-go/scanner" +	"rgdd.se/silent-ct/internal/merkle" +) + +const ( +	UserAgentPrefix   = "rgdd.se/silent-ct" +	DefaultContact    = "unknown-user" +	DefaultChunkSize  = 256 // TODO: increase me +	DefaultBatchSize  = 128 // TODO: increase me +	DefaultNumWorkers = 2 +) + +type Config struct { +	Contact    string // Something that help log operators get in touch +	ChunkSize  int    // Min number of leaves to propagate a chunk without matches +	BatchSize  int    // Max number of certificates to accept per worker +	NumWorkers int    // Number of parallel workers to use for each log + +	// Callback determines which certificates are interesting to detect +	Callback Matcher +} + +type Monitor struct { +	Config +} + +func New(cfg Config) (Monitor, error) { +	if cfg.Contact == "" { +		cfg.Contact = "unknown-user" +	} +	if cfg.ChunkSize <= 0 { +		cfg.ChunkSize = DefaultChunkSize +	} +	if cfg.BatchSize <= 0 { +		cfg.BatchSize = DefaultBatchSize +	} +	if cfg.NumWorkers <= 0 { +		cfg.NumWorkers = DefaultNumWorkers +	} +	if cfg.Callback == nil { +		cfg.Callback = &MatchAll{} +	} +	return Monitor{Config: cfg}, nil +} + +func (mon *Monitor) Run(ctx context.Context, metadataCh chan []MessageLogConfig, eventCh chan MessageLogProgress, errorCh chan error) { +	var wg sync.WaitGroup +	var sctx context.Context +	var cancel context.CancelFunc + +	for { +		select { +		case <-ctx.Done(): +			return +		case metadata := <-metadataCh: +			fmt.Printf("DEBUG: received new list with %d logs\n", len(metadata)) +			if cancel != nil { +				fmt.Printf("DEBUG: stopping all log tailers\n") +				cancel() +				wg.Wait() +			} + +			sctx, cancel = context.WithCancel(ctx) +			for _, md := range metadata { +				fmt.Printf("DEBUG: starting log tailer %s\n", md.Metadata.URL) +				wg.Add(1) +				go func(lcfg MessageLogConfig) { +					defer wg.Done() + +					opts := jsonclient.Options{Logger: &noout{}, UserAgent: UserAgentPrefix + ":" + mon.Contact} +					cli, err := client.New(string(lcfg.Metadata.URL), &http.Client{}, opts) +					if err != nil { +						errorCh <- fmt.Errorf("unable to configure %s: %v", lcfg.Metadata.URL, err) +						return +					} + +					chunkCh := make(chan *chunk) +					defer close(chunkCh) + +					t := tail{mon.Config, *cli, chunkCh, eventCh, errorCh} +					if err := t.run(sctx, lcfg.State); err != nil { +						errorCh <- fmt.Errorf("unable to continue tailing %s: %v", lcfg.Metadata.URL, err) +					} +				}(md) +			} +		} +	} +} + +type tail struct { +	mcfg Config +	cli  client.LogClient + +	chunkCh chan *chunk +	eventCh chan MessageLogProgress +	errorCh chan error +} + +func (t *tail) run(ctx context.Context, state MonitorState) error { +	var wg sync.WaitGroup +	defer wg.Wait() + +	mctx, cancel := context.WithCancel(ctx) +	defer cancel() + +	wg.Add(1) +	go func() { +		defer wg.Done() +		defer cancel() +		t.sequence(mctx, state) +	}() + +	fetcher := scanner.NewFetcher(&t.cli, &scanner.FetcherOptions{ +		BatchSize:     t.mcfg.BatchSize, +		StartIndex:    int64(state.NextIndex), +		ParallelFetch: t.mcfg.NumWorkers, +		Continuous:    true, +	}) +	callback := func(eb scanner.EntryBatch) { +		c := chunk{startIndex: uint64(eb.Start)} +		for i := 0; i < len(eb.Entries); i++ { +			c.leafHashes = append(c.leafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput)) +			match, err := t.mcfg.Callback.Match(eb.Entries[i].LeafInput, eb.Entries[i].ExtraData) +			if err != nil { +				c.errors = append(c.errors, fmt.Errorf("while processing index %d for %s: %v", i, t.cli.BaseURI(), err)) +			} else if match { +				c.matches = append(c.matches, LogEntry{ +					LeafIndex: uint64(i), +					LeafData:  eb.Entries[i].LeafInput, +					ExtraData: eb.Entries[i].ExtraData, +				}) +			} +		} +		t.chunkCh <- &c +	} +	return fetcher.Run(mctx, callback) +} + +func (t *tail) sequence(ctx context.Context, state MonitorState) { +	heap := newChunks() +	for { +		select { +		case <-ctx.Done(): +			return +		case c := <-t.chunkCh: +			heap.push(c) +			if heap.gap(state.NextIndex) { +				continue +			} +			c = heap.pop() +			if len(c.matches) == 0 && len(c.leafHashes) < t.mcfg.ChunkSize { +				heap.push(c) +				continue // TODO: don't trigger if we havn't run nextState for too long +			} +			nextState, err := t.nextState(ctx, state, c) +			if err != nil { +				t.errorCh <- err +				heap.push(c) +				continue +			} + +			state = nextState +			t.eventCh <- MessageLogProgress{State: state, Matches: c.matches, Errors: c.errors} +		} +	} +} + +func (t *tail) nextState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) { +	newState, err := t.nextConsistentState(ctx, state) +	if err != nil { +		return MonitorState{}, err +	} +	newState, err = t.nextIncludedState(ctx, state, c) +	if err != nil { +		return MonitorState{}, err +	} +	return newState, nil +} + +func (t *tail) nextConsistentState(ctx context.Context, state MonitorState) (MonitorState, error) { +	sth, err := getSignedTreeHead(ctx, &t.cli) +	if err != nil { +		return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-sth", Err: err} +	} +	oldSize := state.TreeSize +	oldRoot := state.SHA256RootHash +	newSize := sth.TreeSize +	newRoot := sth.SHA256RootHash + +	proof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize) +	if err != nil { +		return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", STH: sth, Err: err} +	} +	if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(proof)); err != nil { +		return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", STH: sth, Err: err} +	} + +	fmt.Printf("DEBUG: consistently updated STH from size %d to %d\n", oldSize, newSize) +	return MonitorState{LogState: LogState{*sth}, NextIndex: state.NextIndex}, nil +} + +func (t *tail) nextIncludedState(ctx context.Context, state MonitorState, c *chunk) (MonitorState, error) { +	leafHash := c.leafHashes[0] +	oldSize := state.NextIndex + uint64(len(c.leafHashes)) +	iproof, err := getInclusionProof(ctx, &t.cli, leafHash, oldSize) +	if err != nil { +		err = fmt.Errorf("leaf hash %x and tree size %d: %v", leafHash[:], oldSize, err) +		return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-inclusion", Err: err} +	} +	if got, want := uint64(iproof.LeafIndex), state.NextIndex; got != want { +		err := fmt.Errorf("leaf hash %x and tree size %d: expected leaf index %d but got %d", leafHash[:], oldSize, got, want) +		return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "proof-index", Err: err} +	} +	oldRoot, err := merkle.TreeHeadFromRangeProof(c.leafHashes, state.NextIndex, unslice(iproof.AuditPath)) +	if err != nil { +		return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "inclusion", Err: err} +	} + +	newSize := state.TreeSize +	newRoot := state.SHA256RootHash +	cproof, err := getConsistencyProof(ctx, &t.cli, oldSize, newSize) +	if err != nil { +		err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err) +		return MonitorState{}, ErrorFetch{URL: t.cli.BaseURI(), Msg: "get-consistency", Err: err} +	} +	if err := merkle.VerifyConsistency(oldSize, newSize, oldRoot, newRoot, unslice(cproof)); err != nil { +		err = fmt.Errorf("from size %d to %d: %v", oldSize, newSize, err) +		return MonitorState{}, ErrorMerkleTree{URL: t.cli.BaseURI(), Msg: "consistency", Err: err} +	} + +	state.NextIndex += uint64(len(c.leafHashes)) +	return state, nil +} + +func getInclusionProof(ctx context.Context, cli *client.LogClient, leafHash [sha256.Size]byte, size uint64) (*ct.GetProofByHashResponse, error) { +	rctx, cancel := context.WithTimeout(ctx, 10*time.Second) +	defer cancel() +	return cli.GetProofByHash(rctx, leafHash[:], size) +} + +func getConsistencyProof(ctx context.Context, cli *client.LogClient, oldSize, newSize uint64) ([][]byte, error) { +	if oldSize == 0 || oldSize >= newSize { +		return [][]byte{}, nil +	} +	rctx, cancel := context.WithTimeout(ctx, 10*time.Second) +	defer cancel() +	return cli.GetSTHConsistency(rctx, oldSize, newSize) +} + +func getSignedTreeHead(ctx context.Context, cli *client.LogClient) (*ct.SignedTreeHead, error) { +	rctx, cancel := context.WithTimeout(ctx, 10*time.Second) +	defer cancel() +	return cli.GetSTH(rctx) +} + +func unslice(hashes [][]byte) [][sha256.Size]byte { +	var ret [][sha256.Size]byte +	for _, hash := range hashes { +		var h [sha256.Size]byte +		copy(h[:], hash) +		ret = append(ret, h) +	} +	return ret +} | 
