aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd_collect.go2
-rw-r--r--main.go6
-rw-r--r--utils_ct.go26
-rw-r--r--utils_housekeep.go2
4 files changed, 31 insertions, 5 deletions
diff --git a/cmd_collect.go b/cmd_collect.go
index 2444d95..4402905 100644
--- a/cmd_collect.go
+++ b/cmd_collect.go
@@ -95,7 +95,7 @@ func collect(opts options) error {
BatchSize: int(opts.BatchSize),
StartIndex: th.TreeSize,
EndIndex: int64(sth.TreeSize),
- ParallelFetch: int(opts.WorkersPerLog),
+ ParallelFetch: maxWorkers(log, opts.WorkersPerLog),
})
if uint64(th.TreeSize) == sth.TreeSize {
metricsCh <- metrics{Description: *log.Description, End: th.TreeSize, Done: true}
diff --git a/main.go b/main.go
index 40c3e82..95ec97b 100644
--- a/main.go
+++ b/main.go
@@ -40,9 +40,9 @@ Help:
Options:
-d, --directory: The ct-sans working directory (Default: "data")
- -w, --workers: Number of parallel download workers per log (Default: 2)
+ -w, --workers: Max number of parallel download workers per log (Default: 2).
-k, --batch-disk: Certificate batch size before persisting (Default: 16384)
- -q, --batch-req: Certificate batch size to use in request (Default: 512)
+ -q, --batch-req: Certificate batch size to use in request (Default: 2048)
-a, --http-agent: HTTP agent to use in all request (Default: "git.cs.kau.se/rasmoste/ct-sans")
-m, --metrics: How often to emit metrics to stderr (Default: 15s)
@@ -79,7 +79,7 @@ func main() {
ctflag.String(&fs, &opts.Directory, "directory", "d", "data")
ctflag.Uint64(&fs, &opts.WorkersPerLog, "workers", "w", 2)
ctflag.Uint64(&fs, &opts.PersistSize, "batch-disk", "k", 16384)
- ctflag.Uint64(&fs, &opts.BatchSize, "batch-req", "q", 512)
+ ctflag.Uint64(&fs, &opts.BatchSize, "batch-req", "q", 2048)
ctflag.String(&fs, &opts.HTTPAgent, "http-agent", "a", "git.cs.kau.se/rasmoste/ct-sans")
ctflag.Duration(&fs, &opts.MetricsInterval, "metrics", "m", 15*time.Second)
diff --git a/utils_ct.go b/utils_ct.go
index 3c40ad0..eed3f6c 100644
--- a/utils_ct.go
+++ b/utils_ct.go
@@ -4,6 +4,7 @@ import (
"crypto/sha256"
"fmt"
"os"
+ "strings"
"gitlab.torproject.org/rgdd/ct/pkg/metadata"
)
@@ -36,6 +37,31 @@ func logs(md metadata.Metadata) (logs []metadata.Log) {
return
}
+// maxWorkers reduces the number of workers for logs that don't appreciate too
+// much parallel fetching (errors), or for which performance is equal or worse.
+// Warning: this may be system-dependent, determined "by-hand" on 2023-03-18.
+func maxWorkers(log metadata.Log, workers uint64) int {
+ if max := 40; strings.Contains(*log.Description, "Argon") && int(workers) > max {
+ return max
+ }
+ if max := 16; strings.Contains(*log.Description, "Google") && int(workers) > max {
+ return max
+ }
+ if max := 4; strings.Contains(*log.Description, "Cloudflare") && int(workers) > max {
+ return max
+ }
+ if max := 12; strings.Contains(*log.Description, "Let's Encrypt") && int(workers) > max {
+ return max
+ }
+ if max := 5; strings.Contains(*log.Description, "Sectigo") && int(workers) > max {
+ return max
+ }
+ if max := 2; strings.Contains(*log.Description, "Trust Asia") && int(workers) > max {
+ return max
+ }
+ return int(workers)
+}
+
// proof formats hashes so that they can be passed to the merkle package
func proof(hashes [][]byte) (p [][sha256.Size]byte) {
for _, hash := range hashes {
diff --git a/utils_housekeep.go b/utils_housekeep.go
index d34292f..7a60b27 100644
--- a/utils_housekeep.go
+++ b/utils_housekeep.go
@@ -23,7 +23,7 @@ type metrics struct {
}
func (m metrics) String() string {
- format := " %32s | %6.1f entries/s | Estimated done in %6.1f hours | Working on [%d, %d)\n"
+ format := " %32s | %6.1f entries/s | Estimated done in %6.2f hours | Working on [%d, %d)\n"
if m.Done {
return fmt.Sprintf(format, m.Description, float64(0), float64(0), m.End, m.End)
}