From ad9fb49670e28414637761bac4b8e8940e2d6770 Mon Sep 17 00:00:00 2001
From: Rasmus Dahlberg <rasmus@rgdd.se>
Date: Thu, 23 Mar 2023 11:09:54 +0100
Subject: Automate handling of notice file

Here's a hacky tool to migrate our ongoing v0.0.1 measurement once it's
done.  I.e., just split-up the NOTICE prints we have in collect.stdout,
putting them in per-log notice files that happens automatically now.

```
// Package main provides a hacky tool that extracts NOTICE: <log desc> prints
// from a file collect.stdout, putting them in the logs data directories as
// notice.txt.  Only meant to migrate away from v0.0.1 that did not store
// per-log notice files automatically, which makes things less error-prone.
package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	logger "log"
	"os"
	"strings"

	"gitlab.torproject.org/rgdd/ct/pkg/metadata"
)

func main() {
	directory := "../data"
	logDirectory := fmt.Sprintf("%s/logs", directory)
	noticeFile := "../collect.stdout"

	b, err := os.ReadFile(fmt.Sprintf("%s/metadata.json", directory))
	if err != nil {
		logger.Fatal(err)
	}
	var md metadata.Metadata
	if err := json.Unmarshal(b, &md); err != nil {
		logger.Fatal(err)
	}
	if b, err = os.ReadFile(noticeFile); err != nil {
		logger.Fatal(err)
	}

	lines := bytes.Split(b, []byte("\n"))
	for _, log := range logs(md) {
		id, _ := log.Key.ID()
		desc := *log.Description

		var notes []byte
		var numNotes int
		for _, line := range lines[:len(lines)-1] {
			if strings.Contains(string(line), fmt.Sprintf("NOTICE: %s", desc)) {
				notes = append(notes, line...)
				notes = append(notes, []byte("\n")...)
				numNotes += 1
			}
		}

		if len(notes) == 0 {
			logger.Printf("%s: no notices", desc)
			continue
		}

		logger.Printf("%s: %d notices", desc, numNotes)
		if err := os.WriteFile(fmt.Sprintf("%s/%x/notice.txt", logDirectory, id[:]), notes, 0644); err != nil {
			logger.Fatal(err)
		}
	}
}

func logs(md metadata.Metadata) (logs []metadata.Log) {
	for _, operators := range md.Operators {
		for _, log := range operators.Logs {
			if log.Description == nil {
				logger.Printf("WARNING: skipping log without description")
				continue
			}
			if log.State == nil {
				continue // skip logs with unknown states
			}
			if log.State.Name == metadata.LogStatePending {
				continue // pending logs do not count towards CT-compliance
			}
			if log.State.Name == metadata.LogStateRetired {
				continue // retired logs are not necessarily reachable
			}
			if log.State.Name == metadata.LogStateRejected {
				continue // rejected logs do not count towards CT-compliance
			}

			logs = append(logs, log)
		}
	}
	return
}
```
---
 internal/chunk/chunk.go | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'internal')
diff --git a/internal/chunk/chunk.go b/internal/chunk/chunk.go
index 7fccc9b..64adc76 100644
--- a/internal/chunk/chunk.go
+++ b/internal/chunk/chunk.go
@@ -18,6 +18,7 @@ type Chunk struct {
 	Start      int64               // index of first leaf in this chunk
 	LeafHashes [][sha256.Size]byte // in-order leaf hashes in this chunk
 	SANs       []string            // sans of all leaves in this chunk
+	Notes      []string            // notes about this chunk, e.g., errors
 }
 
 // ChunkHeap is a min-heap of chunks wrt. to start indices.  Use TPush() and
@@ -60,6 +61,7 @@ func (h *ChunkHeap) Sequence(start int64) bool {
 
 		s.LeafHashes = append(s.LeafHashes, c.LeafHashes...)
 		s.SANs = append(s.SANs, c.SANs...)
+		s.Notes = append(s.Notes, c.Notes...)
 	}
 
 	// Put back the largest in-order chunk we have
-- 
cgit v1.2.3