From f14b185dfe1fed4fdc951b91f3338359538c4832 Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sun, 26 Mar 2023 18:31:10 +0200 Subject: Add option to specify [START,END) lines --- README.md | 2 ++ internal/options/options.go | 10 ++++++---- main.go | 22 +++++++++++++--------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 04aafa9..8c9635b 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,8 @@ More debug notes: - My system is not fully utilized wrt. CPU/MEM/BW; an odd thing is that it seems to work fine to run multiple onion-grab instances as separate commands, e.g., 3x `-w 280` to get up to ~225 Mbps utilization (max). + Added options `-s START` and `-e END` to specify that only lines `[START, + END)` should be processed in the input file to make this less clunky. - Tinkering with with options in http.Transport doesn't seem help. - Using multiple http.Client doesn't help (e.g., one per worker) - An odd thing is that after errors, it appears that only DNS is dead. E.g., diff --git a/internal/options/options.go b/internal/options/options.go index b00d3f9..d01ba83 100644 --- a/internal/options/options.go +++ b/internal/options/options.go @@ -7,9 +7,10 @@ import ( type Options struct { // Input file - InputFile string - MaxFileBuffer int - NextLine int64 + InputFile string + MaxFileBuffer int + StartLineInclusive int64 + EndLineExclusive int64 // Website visits NumWorkers int @@ -25,7 +26,8 @@ type Options struct { func Parse() (opts Options) { flag.StringVar(&opts.InputFile, "i", "", "input file, one domain name per line") flag.IntVar(&opts.MaxFileBuffer, "b", 512, "max bytes to read from input file at once in MiB") - flag.Int64Var(&opts.NextLine, "n", 0, "next line to start reading the input file from") + flag.Int64Var(&opts.StartLineInclusive, "s", 0, "first line to read in input file, inclusive and zero-based index") + flag.Int64Var(&opts.EndLineExclusive, "e", 0, "last line to read in input file, exclusive and zero-based; 0 to disable") flag.IntVar(&opts.NumWorkers, "w", 2, "number of parallel workers") flag.DurationVar(&opts.Timeout, "t", 10*time.Second, "timeout for each website visit") diff --git a/main.go b/main.go index e4d2027..55e7989 100644 --- a/main.go +++ b/main.go @@ -90,11 +90,11 @@ func main() { log.Printf("INFO: generating work\n") nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) if !readAll { - warn := fmt.Sprintf("only read up until line %d", nextLine) - if opts.NextLine != 0 { - warn += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.NextLine) + notice := fmt.Sprintf("only read up until line %d", nextLine) + if opts.StartLineInclusive != 0 { + notice += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.StartLineInclusive) } - log.Printf("NOTICE: %s\n", warn) + log.Printf("NOTICE: %s\n", notice) } } @@ -218,18 +218,18 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) nextLine := int64(0) - if opts.NextLine > nextLine { + if opts.StartLineInclusive > nextLine { for scanner.Scan() { + nextLine++ select { case <-ctx.Done(): return nextLine, false default: } - if nextLine+1 == opts.NextLine { + if nextLine == opts.StartLineInclusive { break } - nextLine++ } } @@ -238,8 +238,12 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest startTime := time.Now().Unix() latestTime := startTime - latestCount := opts.NextLine + latestCount := opts.StartLineInclusive for scanner.Scan() { + if opts.EndLineExclusive > 0 && nextLine == opts.EndLineExclusive { + break + } + select { case <-ctx.Done(): return nextLine, false @@ -247,7 +251,7 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest now := time.Now().Unix() log.Printf("INFO: currently %.1f sites/s, %.1f sites/s since start, at line %d\n", float64(nextLine-latestCount)/float64(now-latestTime), - float64(nextLine-opts.NextLine)/float64(now-startTime), + float64(nextLine-opts.StartLineInclusive)/float64(now-startTime), nextLine, ) -- cgit v1.2.3