From 5c12f6ade6ebd444f05175a658e6b70bdde5046f Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sat, 18 Mar 2023 16:55:50 +0000 Subject: Tune number of workers --- cmd_collect.go | 2 +- main.go | 6 +++--- utils_ct.go | 26 ++++++++++++++++++++++++++ utils_housekeep.go | 2 +- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/cmd_collect.go b/cmd_collect.go index 2444d95..4402905 100644 --- a/cmd_collect.go +++ b/cmd_collect.go @@ -95,7 +95,7 @@ func collect(opts options) error { BatchSize: int(opts.BatchSize), StartIndex: th.TreeSize, EndIndex: int64(sth.TreeSize), - ParallelFetch: int(opts.WorkersPerLog), + ParallelFetch: maxWorkers(log, opts.WorkersPerLog), }) if uint64(th.TreeSize) == sth.TreeSize { metricsCh <- metrics{Description: *log.Description, End: th.TreeSize, Done: true} diff --git a/main.go b/main.go index 40c3e82..95ec97b 100644 --- a/main.go +++ b/main.go @@ -40,9 +40,9 @@ Help: Options: -d, --directory: The ct-sans working directory (Default: "data") - -w, --workers: Number of parallel download workers per log (Default: 2) + -w, --workers: Max number of parallel download workers per log (Default: 2). -k, --batch-disk: Certificate batch size before persisting (Default: 16384) - -q, --batch-req: Certificate batch size to use in request (Default: 512) + -q, --batch-req: Certificate batch size to use in request (Default: 2048) -a, --http-agent: HTTP agent to use in all request (Default: "git.cs.kau.se/rasmoste/ct-sans") -m, --metrics: How often to emit metrics to stderr (Default: 15s) @@ -79,7 +79,7 @@ func main() { ctflag.String(&fs, &opts.Directory, "directory", "d", "data") ctflag.Uint64(&fs, &opts.WorkersPerLog, "workers", "w", 2) ctflag.Uint64(&fs, &opts.PersistSize, "batch-disk", "k", 16384) - ctflag.Uint64(&fs, &opts.BatchSize, "batch-req", "q", 512) + ctflag.Uint64(&fs, &opts.BatchSize, "batch-req", "q", 2048) ctflag.String(&fs, &opts.HTTPAgent, "http-agent", "a", "git.cs.kau.se/rasmoste/ct-sans") ctflag.Duration(&fs, &opts.MetricsInterval, "metrics", "m", 15*time.Second) diff --git a/utils_ct.go b/utils_ct.go index 3c40ad0..eed3f6c 100644 --- a/utils_ct.go +++ b/utils_ct.go @@ -4,6 +4,7 @@ import ( "crypto/sha256" "fmt" "os" + "strings" "gitlab.torproject.org/rgdd/ct/pkg/metadata" ) @@ -36,6 +37,31 @@ func logs(md metadata.Metadata) (logs []metadata.Log) { return } +// maxWorkers reduces the number of workers for logs that don't appreciate too +// much parallel fetching (errors), or for which performance is equal or worse. +// Warning: this may be system-dependent, determined "by-hand" on 2023-03-18. +func maxWorkers(log metadata.Log, workers uint64) int { + if max := 40; strings.Contains(*log.Description, "Argon") && int(workers) > max { + return max + } + if max := 16; strings.Contains(*log.Description, "Google") && int(workers) > max { + return max + } + if max := 4; strings.Contains(*log.Description, "Cloudflare") && int(workers) > max { + return max + } + if max := 12; strings.Contains(*log.Description, "Let's Encrypt") && int(workers) > max { + return max + } + if max := 5; strings.Contains(*log.Description, "Sectigo") && int(workers) > max { + return max + } + if max := 2; strings.Contains(*log.Description, "Trust Asia") && int(workers) > max { + return max + } + return int(workers) +} + // proof formats hashes so that they can be passed to the merkle package func proof(hashes [][]byte) (p [][sha256.Size]byte) { for _, hash := range hashes { diff --git a/utils_housekeep.go b/utils_housekeep.go index d34292f..7a60b27 100644 --- a/utils_housekeep.go +++ b/utils_housekeep.go @@ -23,7 +23,7 @@ type metrics struct { } func (m metrics) String() string { - format := " %32s | %6.1f entries/s | Estimated done in %6.1f hours | Working on [%d, %d)\n" + format := " %32s | %6.1f entries/s | Estimated done in %6.2f hours | Working on [%d, %d)\n" if m.Done { return fmt.Sprintf(format, m.Description, float64(0), float64(0), m.End, m.End) } -- cgit v1.2.3