aboutsummaryrefslogtreecommitdiff
path: root/cmd_assemble.go
diff options
context:
space:
mode:
Diffstat (limited to 'cmd_assemble.go')
-rw-r--r--cmd_assemble.go110
1 files changed, 16 insertions, 94 deletions
diff --git a/cmd_assemble.go b/cmd_assemble.go
index f084193..ae6af50 100644
--- a/cmd_assemble.go
+++ b/cmd_assemble.go
@@ -10,10 +10,8 @@ import (
"os"
"os/exec"
"strconv"
- "strings"
"time"
- "git.cs.kau.se/rasmoste/ct-sans/internal/sanitize"
ct "github.com/google/certificate-transparency-go"
"gitlab.torproject.org/rgdd/ct/pkg/metadata"
)
@@ -66,31 +64,8 @@ func assemble(opts options) error {
}
logger.Printf("INFO: created %s (%s)", sansFile, size)
- logger.Printf("INFO: sanitizing SANs to visit")
- visitFile := fmt.Sprintf("%s/%s", archiveDir, opts.visitFile)
- skip, tweak, err := sanitizeSANs(sansFile, visitFile)
- if err != nil {
- return err
- }
- logger.Printf("INFO: ruled out %d lines while sanitizing domains", skip)
- logger.Printf("INFO: tweaked %d domains with \"*.\" prefixes", tweak)
- logger.Printf("INFO: created %s", visitFile)
-
- logger.Printf("INFO: de-duplicating %s with GNU sort", visitFile)
- wipFile := fmt.Sprintf("%s.tmp", visitFile)
- if err := dedup(opts, wipFile, []string{visitFile}); err != nil {
- return err
- }
- if err := os.Rename(wipFile, visitFile); err != nil {
- return err
- }
- if size, err = fileSize(visitFile); err != nil {
- return err
- }
- logger.Printf("INFO: %s is down to %s", visitFile, size)
-
logger.Printf("INFO: adding README")
- readme, err := makeREADME(opts, sths, skip, tweak, now)
+ readme, err := makeREADME(opts, sths, now)
if err != nil {
return err
}
@@ -115,7 +90,7 @@ func assemble(opts options) error {
if err != nil {
return err
}
- if err := os.WriteFile(fmt.Sprintf("%s/sths.json", archiveDir), sthsBytes, 0644); err != nil {
+ if err := os.WriteFile(fmt.Sprintf("%s/%s", archiveDir, opts.sthsFile), sthsBytes, 0644); err != nil {
return err
}
@@ -141,45 +116,7 @@ func dedup(opts options, outputFile string, inputFiles []string) error {
return nil
}
-func sanitizeSANs(sansFile, visitFile string) (int, int, error) {
- fpSANs, err := os.OpenFile(sansFile, os.O_RDONLY, 0644)
- if err != nil {
- return 0, 0, err
- }
- defer fpSANs.Close()
- scanner := bufio.NewScanner(fpSANs)
- max := 128 * 1024 * 1024
- buf := make([]byte, 0, max)
- scanner.Buffer(buf, max)
-
- fpVisit, err := os.OpenFile(visitFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
- if err != nil {
- return 0, 0, err
- }
- defer fpVisit.Close()
-
- skip := 0
- tweak := 0
- for scanner.Scan() {
- domain, err := sanitize.SanitizeDNSName(scanner.Text())
- if err != nil {
- skip += 1
- }
- if strings.HasPrefix(domain, "*.") {
- tweak += 1
- domain = domain[2:]
- }
- if _, err := fpVisit.WriteString(domain + "\n"); err != nil {
- return 0, 0, err
- }
- }
- if err := fpVisit.Sync(); err != nil {
- return 0, 0, err
- }
- return skip, tweak, nil
-}
-
-func makeREADME(opts options, sths []ct.SignedTreeHead, skip, tweak int, now time.Time) (string, error) {
+func makeREADME(opts options, sths []ct.SignedTreeHead, now time.Time) (string, error) {
snapshotTime, err := readSnapshotTime(opts)
if err != nil {
return "", err
@@ -202,39 +139,24 @@ func makeREADME(opts options, sths []ct.SignedTreeHead, skip, tweak int, now tim
Dataset assembled at %s. Contents:
- README.md
- - metadata.json
- - metadata.sig
- - sths.json
- - sans.lst (see below)
- - visit.lst (see below)
- - notice.txt (see below)
+ - %s
+ - %s
+ - %s
+ - %s
+ - %s
-The signed [metadata file][] and tree heads were downloaded at %s.
+The signed [metadata file][] and tree heads were downloaded at
+%s.
[metadata file]: https://groups.google.com/a/chromium.org/g/ct-policy/c/IdbrdAcDQto
-## sans.lst
-
-The result of downloading %d certificates from %d CT logs.
-One SAN per line. These lines are sorted and de-duplicated.
-
-While downloading, %s certificates contained SANs that could not be parsed.
-See %s for the exact details.
-
-## visit.lst
-
-A rewrite of the sans.lst dataset to be more suitable for visits:
-
- - Leading and trailing white space is removed
- - Trailing dots are removed
- - The prefixes "http://" and "https://" are removed
- - Lines that contain non-printable ASCII characters are dropped. The
- definition of a printable ascii character is '\t' and numerical values
- 32-126. In total, %d lines were dropped.
- - Make wildcard domains into normal domains by removing the first two bytes.
- In total, %d lines were rewritten like this.
+In total, %d certificates were downloaded from %d CT logs;
+%s certificates contained SANs that could not be parsed.
+For more information about these errors, see %s.
-`, now.Format(time.UnixDate), snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), notice, noticeFile, skip, tweak), nil
+The SANs data set is sorted and de-duplicated, one SAN per line.
+`, now.Format(time.UnixDate), opts.metadataFile, opts.metadataSignatureFile, opts.sthsFile, opts.sansFile, opts.noticeFile,
+ snapshotTime.Format(time.UnixDate), numCertificates(sths), len(sths), notice, noticeFile), nil
}
func fileSize(name string) (string, error) {