aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRasmus Dahlberg <rasmus@rgdd.se>2023-03-23 11:09:54 +0100
committerRasmus Dahlberg <rasmus@rgdd.se>2023-03-23 11:58:16 +0100
commitad9fb49670e28414637761bac4b8e8940e2d6770 (patch)
tree920b0d7a266dbeed0ff0aa442752adb60d585748
parent38df474cf30b0b1d077c8d53b353a859af99c7d6 (diff)
Automate handling of notice file
Here's a hacky tool to migrate our ongoing v0.0.1 measurement once it's done. I.e., just split-up the NOTICE prints we have in collect.stdout, putting them in per-log notice files that happens automatically now. ``` // Package main provides a hacky tool that extracts NOTICE: <log desc> prints // from a file collect.stdout, putting them in the logs data directories as // notice.txt. Only meant to migrate away from v0.0.1 that did not store // per-log notice files automatically, which makes things less error-prone. package main import ( "bytes" "encoding/json" "fmt" logger "log" "os" "strings" "gitlab.torproject.org/rgdd/ct/pkg/metadata" ) func main() { directory := "../data" logDirectory := fmt.Sprintf("%s/logs", directory) noticeFile := "../collect.stdout" b, err := os.ReadFile(fmt.Sprintf("%s/metadata.json", directory)) if err != nil { logger.Fatal(err) } var md metadata.Metadata if err := json.Unmarshal(b, &md); err != nil { logger.Fatal(err) } if b, err = os.ReadFile(noticeFile); err != nil { logger.Fatal(err) } lines := bytes.Split(b, []byte("\n")) for _, log := range logs(md) { id, _ := log.Key.ID() desc := *log.Description var notes []byte var numNotes int for _, line := range lines[:len(lines)-1] { if strings.Contains(string(line), fmt.Sprintf("NOTICE: %s", desc)) { notes = append(notes, line...) notes = append(notes, []byte("\n")...) numNotes += 1 } } if len(notes) == 0 { logger.Printf("%s: no notices", desc) continue } logger.Printf("%s: %d notices", desc, numNotes) if err := os.WriteFile(fmt.Sprintf("%s/%x/notice.txt", logDirectory, id[:]), notes, 0644); err != nil { logger.Fatal(err) } } } func logs(md metadata.Metadata) (logs []metadata.Log) { for _, operators := range md.Operators { for _, log := range operators.Logs { if log.Description == nil { logger.Printf("WARNING: skipping log without description") continue } if log.State == nil { continue // skip logs with unknown states } if log.State.Name == metadata.LogStatePending { continue // pending logs do not count towards CT-compliance } if log.State.Name == metadata.LogStateRetired { continue // retired logs are not necessarily reachable } if log.State.Name == metadata.LogStateRejected { continue // rejected logs do not count towards CT-compliance } logs = append(logs, log) } } return } ```
-rw-r--r--cmd_assemble.go51
-rw-r--r--cmd_collect.go29
-rw-r--r--internal/chunk/chunk.go2
3 files changed, 54 insertions, 28 deletions
diff --git a/cmd_assemble.go b/cmd_assemble.go
index ae6af50..69a7173 100644
--- a/cmd_assemble.go
+++ b/cmd_assemble.go
@@ -26,7 +26,8 @@ func assemble(opts options) error {
if err := json.Unmarshal(metadataBytes, &md); err != nil {
return err
}
- var files []string
+ var sanFiles []string
+ var noticeFiles []string
var sths []ct.SignedTreeHead
for _, log := range logs(md) {
id, _ := log.Key.ID()
@@ -45,17 +46,18 @@ func assemble(opts options) error {
return fmt.Errorf("%s: root hash mismatch")
}
- files = append(files, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ sanFiles = append(sanFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ noticeFiles = append(noticeFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.noticeFile))
sths = append(sths, sth)
}
- logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(files))
+ logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(sanFiles))
archiveDir := fmt.Sprintf("%s/%s-ct-sans", opts.archiveDirectory, now.Format("2006-01-02"))
if err := os.MkdirAll(archiveDir, os.ModePerm); err != nil {
return err
}
sansFile := fmt.Sprintf("%s/%s", archiveDir, opts.sansFile)
- if err := dedup(opts, sansFile, files); err != nil {
+ if err := dedup(opts, sansFile, sanFiles); err != nil {
return err
}
size, err := fileSize(sansFile)
@@ -64,8 +66,26 @@ func assemble(opts options) error {
}
logger.Printf("INFO: created %s (%s)", sansFile, size)
+ logger.Printf("INFO: adding notice file")
+ var notes []byte
+ for _, noticeFile := range noticeFiles {
+ b, err := os.ReadFile(noticeFile)
+ if errors.Is(err, os.ErrNotExist) {
+ continue // no notes, great
+ } else if err != nil {
+ return err
+ }
+
+ notes = append(notes, b...)
+ }
+ if err := os.WriteFile(fmt.Sprintf("%s/%s", archiveDir, opts.noticeFile), notes, 0644); err != nil {
+ return err
+ }
+ numNotes := len(bytes.Split(notes, []byte("\n"))) - 1
+ logger.Printf("INFO: %d notes in total\n", numNotes)
+
logger.Printf("INFO: adding README")
- readme, err := makeREADME(opts, sths, now)
+ readme, err := makeREADME(opts, sths, numNotes, now)
if err != nil {
return err
}
@@ -116,24 +136,11 @@ func dedup(opts options, outputFile string, inputFiles []string) error {
return nil
}
-func makeREADME(opts options, sths []ct.SignedTreeHead, now time.Time) (string, error) {
+func makeREADME(opts options, sths []ct.SignedTreeHead, numNotes int, now time.Time) (string, error) {
snapshotTime, err := readSnapshotTime(opts)
if err != nil {
return "", err
}
-
- noticeFile := opts.Directory + "/" + opts.noticeFile
- notice, err := noticeReport(noticeFile)
- if err != nil {
- // TODO: start writing notice prints to a separate file in data/
- // by default, then make this a hard error. This needs to be
- // done manually now by grepping for NOTICE in collect.stdout.
- logger.Printf("WARNING: could not find notice file, skipping")
- notice = "UNKNOWN"
- } else {
- // TODO: save notice file
- }
-
return fmt.Sprintf(`# ct-sans dataset
Dataset assembled at %s. Contents:
@@ -151,12 +158,12 @@ The signed [metadata file][] and tree heads were downloaded at
[metadata file]: https://groups.google.com/a/chromium.org/g/ct-policy/c/IdbrdAcDQto
In total, %d certificates were downloaded from %d CT logs;
-%s certificates contained SANs that could not be parsed.
+%d certificates contained SANs that could not be parsed.
For more information about these errors, see %s.
The SANs data set is sorted and de-duplicated, one SAN per line.
-`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.sansFile, opts.noticeFile,
- snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), notice, noticeFile), nil
+`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.noticeFile, opts.sansFile,
+ snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), numNotes, opts.noticeFile), nil
}
func fileSize(name string) (string, error) {
diff --git a/cmd_collect.go b/cmd_collect.go
index 4d93271..742884a 100644
--- a/cmd_collect.go
+++ b/cmd_collect.go
@@ -3,7 +3,6 @@ package main
import (
"container/heap"
"context"
- "crypto/sha256"
"encoding/json"
"fmt"
logger "log"
@@ -121,15 +120,18 @@ func collect(opts options) error {
// chunk that a single sequencer can verify and persist
//
callback := func(eb scanner.EntryBatch) {
- leafHashes := [][sha256.Size]byte{}
+ c := &chunk.Chunk{Start: eb.Start}
for i := 0; i < len(eb.Entries); i++ {
- leafHashes = append(leafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput))
+ c.LeafHashes = append(c.LeafHashes, merkle.HashLeafNode(eb.Entries[i].LeafInput))
}
- sans, errs := x509.SANsFromLeafEntries(eb.Start, eb.Entries)
+
+ var errs []error
+ c.SANs, errs = x509.SANsFromLeafEntries(eb.Start, eb.Entries)
for _, err := range errs {
- logger.Printf("NOTICE: %s: %v", *log.Description, err)
+ c.Notes = append(c.Notes, fmt.Sprintf("NOTICE: %s: %v", *log.Description, err))
}
- chunksCh <- &chunk.Chunk{eb.Start, leafHashes, sans}
+
+ chunksCh <- c
}
if err := fetcher.Run(ctx, callback); err != nil {
@@ -275,6 +277,21 @@ func persist(c *chunk.Chunk,
return false, err
}
+ // Persist notes to disk
+ if len(c.Notes) > 0 {
+ fpn, err := os.OpenFile(fmt.Sprintf("%s/%x/%s", opts.logDirectory, logID, opts.noticeFile), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
+ if err != nil {
+ return false, err
+ }
+ defer fpn.Close()
+ if _, err := fpn.WriteString(strings.Join(c.Notes, "\n") + "\n"); err != nil {
+ return false, err
+ }
+ if err := fpn.Sync(); err != nil {
+ return false, err
+ }
+ }
+
// Persist new tree state to disk
b, err := json.Marshal(&newTH)
if err != nil {
diff --git a/internal/chunk/chunk.go b/internal/chunk/chunk.go
index 7fccc9b..64adc76 100644
--- a/internal/chunk/chunk.go
+++ b/internal/chunk/chunk.go
@@ -18,6 +18,7 @@ type Chunk struct {
Start int64 // index of first leaf in this chunk
LeafHashes [][sha256.Size]byte // in-order leaf hashes in this chunk
SANs []string // sans of all leaves in this chunk
+ Notes []string // notes about this chunk, e.g., errors
}
// ChunkHeap is a min-heap of chunks wrt. to start indices. Use TPush() and
@@ -60,6 +61,7 @@ func (h *ChunkHeap) Sequence(start int64) bool {
s.LeafHashes = append(s.LeafHashes, c.LeafHashes...)
s.SANs = append(s.SANs, c.SANs...)
+ s.Notes = append(s.Notes, c.Notes...)
}
// Put back the largest in-order chunk we have