aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd_assemble.go51
-rw-r--r--cmd_collect.go29
-rw-r--r--internal/chunk/chunk.go2
3 files changed, 54 insertions, 28 deletions
diff --git a/cmd_assemble.go b/cmd_assemble.go
index ae6af50..69a7173 100644
--- a/cmd_assemble.go
+++ b/cmd_assemble.go
@@ -26,7 +26,8 @@ func assemble(opts options) error {
if err := json.Unmarshal(metadataBytes, &md); err != nil {
return err
}
- var files []string
+ var sanFiles []string
+ var noticeFiles []string
var sths []ct.SignedTreeHead
for _, log := range logs(md) {
id, _ := log.Key.ID()
@@ -45,17 +46,18 @@ func assemble(opts options) error {
return fmt.Errorf("%s: root hash mismatch")
}
- files = append(files, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ sanFiles = append(sanFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ noticeFiles = append(noticeFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.noticeFile))
sths = append(sths, sth)
}
- logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(files))
+ logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(sanFiles))
archiveDir := fmt.Sprintf("%s/%s-ct-sans", opts.archiveDirectory, now.Format("2006-01-02"))
if err := os.MkdirAll(archiveDir, os.ModePerm); err != nil {
return err
}
sansFile := fmt.Sprintf("%s/%s", archiveDir, opts.sansFile)
- if err := dedup(opts, sansFile, files); err != nil {
+ if err := dedup(opts, sansFile, sanFiles); err != nil {
return err
}
size, err := fileSize(sansFile)
@@ -64,8 +66,26 @@ func assemble(opts options) error {
}
logger.Printf("INFO: created %s (%s)", sansFile, size)
+ logger.Printf("INFO: adding notice file")
+ var notes []byte
+ for _, noticeFile := range noticeFiles {
+ b, err := os.ReadFile(noticeFile)
+ if errors.Is(err, os.ErrNotExist) {
+ continue // no notes, great
+ } else if err != nil {
+ return err
+ }
+
+ notes = append(notes, b...)
+ }
+ if err := os.WriteFile(fmt.Sprintf("%s/%s", archiveDir, opts.noticeFile), notes, 0644); err != nil {
+ return err
+ }
+ numNotes := len(bytes.Split(notes, []byte("\n"))) - 1
+ logger.Printf("INFO: %d notes in total\n", numNotes)
+
logger.Printf("INFO: adding README")
- readme, err := makeREADME(opts, sths, now)
+ readme, err := makeREADME(opts, sths, numNotes, now)
if err != nil {
return err
}
@@ -116,24 +136,11 @@ func dedup(opts options, outputFile string, inputFiles []string) error {
return nil
}
-func makeREADME(opts options, sths []ct.SignedTreeHead, now time.Time) (string, error) {
+func makeREADME(opts options, sths []ct.SignedTreeHead, numNotes int, now time.Time) (string, error) {
snapshotTime, err := readSnapshotTime(opts)
if err != nil {
return "", err
}
-
- noticeFile := opts.Directory + "/" + opts.noticeFile
- notice, err := noticeReport(noticeFile)
- if err != nil {
- // TODO: start writing notice prints to a separate file in data/
- // by default, then make this a hard error. This needs to be
- // done manually now by grepping for NOTICE in collect.stdout.
- logger.Printf("WARNING: could not find notice file, skipping")
- notice = "UNKNOWN"
- } else {
- // TODO: save notice file
- }
-
return fmt.Sprintf(`# ct-sans dataset
Dataset assembled at %s. Contents:
@@ -151,12 +158,12 @@ The signed [metadata file][] and tree heads were downloaded at
[metadata file]: https://groups.google.com/a/chromium.org/g/ct-policy/c/IdbrdAcDQto
In total, %d certificates were downloaded from %d CT logs;
-%s certificates contained SANs that could not be parsed.
+%d certificates contained SANs that could not be parsed.
For more information about these errors, see %s.
The SANs data set is sorted and de-duplicated, one SAN per line.
-`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.sansFile, opts.noticeFile,
- snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), notice, noticeFile), nil
+`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.noticeFile, opts.sansFile,
+ snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), numNotes, opts.noticeFile), nil
}
func fileSize(name string) (string, error) {
diff --git a/cmd_collect.go b/cmd_collect.go
index 4d93271..742884a 100644
--- a/cmd_collect.go
+++ b/cmd_collect.go
@@ -3,7 +3,6 @@ package main
import (
"container/heap"
"context"
- "crypto/sha256"
"encoding/json"
"fmt"
logger "log"
@@ -121,15 +120,18 @@ func collect(opts options) error {
// chunk that a single sequencer can verify and persist
//
callback := func(eb scanner.EntryBatch) {
- leafHashes := [][sha256.Size]byte{}
+ c := &chunk.Chunk{Start: eb.Start}
for i := 0; i < len(eb.Entries); i++ {
- leafHashes = append(leafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput))
+ c.LeafHashes = append(c.LeafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput))
}
- sans, errs := x509.SANsFromLeafEntries(eb.Start, eb.Entries)
+
+ var errs []error
+ c.SANs, errs = x509.SANsFromLeafEntries(eb.Start, eb.Entries)
for _, err := range errs {
- logger.Printf("NOTICE: %s: %v", *log.Description, err)
+ c.Notes = append(c.Notes, fmt.Sprintf("NOTICE: %s: %v", *log.Description, err))
}
- chunksCh <- &chunk.Chunk{eb.Start, leafHashes, sans}
+
+ chunksCh <- c
}
if err := fetcher.Run(ctx, callback); err != nil {
@@ -275,6 +277,21 @@ func persist(c *chunk.Chunk,
return false, err
}
+ // Persist notes to disk
+ if len(c.Notes) > 0 {
+ fpn, err := os.OpenFile(fmt.Sprintf("%s/%x/%s", opts.logDirectory, logID, opts.noticeFile), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
+ if err != nil {
+ return false, err
+ }
+ defer fpn.Close()
+ if _, err := fpn.WriteString(strings.Join(c.Notes, "\n") + "\n"); err != nil {
+ return false, err
+ }
+ if err := fpn.Sync(); err != nil {
+ return false, err
+ }
+ }
+
// Persist new tree state to disk
b, err := json.Marshal(&newTH)
if err != nil {
diff --git a/internal/chunk/chunk.go b/internal/chunk/chunk.go
index 7fccc9b..64adc76 100644
--- a/internal/chunk/chunk.go
+++ b/internal/chunk/chunk.go
@@ -18,6 +18,7 @@ type Chunk struct {
Start int64 // index of first leaf in this chunk
LeafHashes [][sha256.Size]byte // in-order leaf hashes in this chunk
SANs []string // sans of all leaves in this chunk
+ Notes []string // notes about this chunk, e.g., errors
}
// ChunkHeap is a min-heap of chunks wrt. to start indices. Use TPush() and
@@ -60,6 +61,7 @@ func (h *ChunkHeap) Sequence(start int64) bool {
s.LeafHashes = append(s.LeafHashes, c.LeafHashes...)
s.SANs = append(s.SANs, c.SANs...)
+ s.Notes = append(s.Notes, c.Notes...)
}
// Put back the largest in-order chunk we have