aboutsummaryrefslogtreecommitdiff
path: root/cmd_assemble.go
diff options
context:
space:
mode:
authorRasmus Dahlberg <rasmus@rgdd.se>2023-03-23 11:09:54 +0100
committerRasmus Dahlberg <rasmus@rgdd.se>2023-03-23 11:58:16 +0100
commitad9fb49670e28414637761bac4b8e8940e2d6770 (patch)
tree920b0d7a266dbeed0ff0aa442752adb60d585748 /cmd_assemble.go
parent38df474cf30b0b1d077c8d53b353a859af99c7d6 (diff)
Automate handling of notice file
Here's a hacky tool to migrate our ongoing v0.0.1 measurement once it's done. I.e., just split-up the NOTICE prints we have in collect.stdout, putting them in per-log notice files that happens automatically now. ``` // Package main provides a hacky tool that extracts NOTICE: <log desc> prints // from a file collect.stdout, putting them in the logs data directories as // notice.txt. Only meant to migrate away from v0.0.1 that did not store // per-log notice files automatically, which makes things less error-prone. package main import ( "bytes" "encoding/json" "fmt" logger "log" "os" "strings" "gitlab.torproject.org/rgdd/ct/pkg/metadata" ) func main() { directory := "../data" logDirectory := fmt.Sprintf("%s/logs", directory) noticeFile := "../collect.stdout" b, err := os.ReadFile(fmt.Sprintf("%s/metadata.json", directory)) if err != nil { logger.Fatal(err) } var md metadata.Metadata if err := json.Unmarshal(b, &md); err != nil { logger.Fatal(err) } if b, err = os.ReadFile(noticeFile); err != nil { logger.Fatal(err) } lines := bytes.Split(b, []byte("\n")) for _, log := range logs(md) { id, _ := log.Key.ID() desc := *log.Description var notes []byte var numNotes int for _, line := range lines[:len(lines)-1] { if strings.Contains(string(line), fmt.Sprintf("NOTICE: %s", desc)) { notes = append(notes, line...) notes = append(notes, []byte("\n")...) numNotes += 1 } } if len(notes) == 0 { logger.Printf("%s: no notices", desc) continue } logger.Printf("%s: %d notices", desc, numNotes) if err := os.WriteFile(fmt.Sprintf("%s/%x/notice.txt", logDirectory, id[:]), notes, 0644); err != nil { logger.Fatal(err) } } } func logs(md metadata.Metadata) (logs []metadata.Log) { for _, operators := range md.Operators { for _, log := range operators.Logs { if log.Description == nil { logger.Printf("WARNING: skipping log without description") continue } if log.State == nil { continue // skip logs with unknown states } if log.State.Name == metadata.LogStatePending { continue // pending logs do not count towards CT-compliance } if log.State.Name == metadata.LogStateRetired { continue // retired logs are not necessarily reachable } if log.State.Name == metadata.LogStateRejected { continue // rejected logs do not count towards CT-compliance } logs = append(logs, log) } } return } ```
Diffstat (limited to 'cmd_assemble.go')
-rw-r--r--cmd_assemble.go51
1 files changed, 29 insertions, 22 deletions
diff --git a/cmd_assemble.go b/cmd_assemble.go
index ae6af50..69a7173 100644
--- a/cmd_assemble.go
+++ b/cmd_assemble.go
@@ -26,7 +26,8 @@ func assemble(opts options) error {
if err := json.Unmarshal(metadataBytes, &md); err != nil {
return err
}
- var files []string
+ var sanFiles []string
+ var noticeFiles []string
var sths []ct.SignedTreeHead
for _, log := range logs(md) {
id, _ := log.Key.ID()
@@ -45,17 +46,18 @@ func assemble(opts options) error {
return fmt.Errorf("%s: root hash mismatch")
}
- files = append(files, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ sanFiles = append(sanFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.sansFile))
+ noticeFiles = append(noticeFiles, fmt.Sprintf("%s/%x/%s", opts.logDirectory, id[:], opts.noticeFile))
sths = append(sths, sth)
}
- logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(files))
+ logger.Printf("INFO: merging and de-duplicating %d input files with GNU sort", len(sanFiles))
archiveDir := fmt.Sprintf("%s/%s-ct-sans", opts.archiveDirectory, now.Format("2006-01-02"))
if err := os.MkdirAll(archiveDir, os.ModePerm); err != nil {
return err
}
sansFile := fmt.Sprintf("%s/%s", archiveDir, opts.sansFile)
- if err := dedup(opts, sansFile, files); err != nil {
+ if err := dedup(opts, sansFile, sanFiles); err != nil {
return err
}
size, err := fileSize(sansFile)
@@ -64,8 +66,26 @@ func assemble(opts options) error {
}
logger.Printf("INFO: created %s (%s)", sansFile, size)
+ logger.Printf("INFO: adding notice file")
+ var notes []byte
+ for _, noticeFile := range noticeFiles {
+ b, err := os.ReadFile(noticeFile)
+ if errors.Is(err, os.ErrNotExist) {
+ continue // no notes, great
+ } else if err != nil {
+ return err
+ }
+
+ notes = append(notes, b...)
+ }
+ if err := os.WriteFile(fmt.Sprintf("%s/%s", archiveDir, opts.noticeFile), notes, 0644); err != nil {
+ return err
+ }
+ numNotes := len(bytes.Split(notes, []byte("\n"))) - 1
+ logger.Printf("INFO: %d notes in total\n", numNotes)
+
logger.Printf("INFO: adding README")
- readme, err := makeREADME(opts, sths, now)
+ readme, err := makeREADME(opts, sths, numNotes, now)
if err != nil {
return err
}
@@ -116,24 +136,11 @@ func dedup(opts options, outputFile string, inputFiles []string) error {
return nil
}
-func makeREADME(opts options, sths []ct.SignedTreeHead, now time.Time) (string, error) {
+func makeREADME(opts options, sths []ct.SignedTreeHead, numNotes int, now time.Time) (string, error) {
snapshotTime, err := readSnapshotTime(opts)
if err != nil {
return "", err
}
-
- noticeFile := opts.Directory + "/" + opts.noticeFile
- notice, err := noticeReport(noticeFile)
- if err != nil {
- // TODO: start writing notice prints to a separate file in data/
- // by default, then make this a hard error. This needs to be
- // done manually now by grepping for NOTICE in collect.stdout.
- logger.Printf("WARNING: could not find notice file, skipping")
- notice = "UNKNOWN"
- } else {
- // TODO: save notice file
- }
-
return fmt.Sprintf(`# ct-sans dataset
Dataset assembled at %s. Contents:
@@ -151,12 +158,12 @@ The signed [metadata file][] and tree heads were downloaded at
[metadata file]: https://groups.google.com/a/chromium.org/g/ct-policy/c/IdbrdAcDQto
In total, %d certificates were downloaded from %d CT logs;
-%s certificates contained SANs that could not be parsed.
+%d certificates contained SANs that could not be parsed.
For more information about these errors, see %s.
The SANs data set is sorted and de-duplicated, one SAN per line.
-`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.sansFile, opts.noticeFile,
- snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), notice, noticeFile), nil
+`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.noticeFile, opts.sansFile,
+ snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), numNotes, opts.noticeFile), nil
}
func fileSize(name string) (string, error) {