aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--internal/options/options.go10
-rw-r--r--main.go22
3 files changed, 21 insertions, 13 deletions
diff --git a/README.md b/README.md
index 04aafa9..8c9635b 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,8 @@ More debug notes:
- My system is not fully utilized wrt. CPU/MEM/BW; an odd thing is that it
seems to work fine to run multiple onion-grab instances as separate
commands, e.g., 3x `-w 280` to get up to ~225 Mbps utilization (max).
+ Added options `-s START` and `-e END` to specify that only lines `[START,
+ END)` should be processed in the input file to make this less clunky.
- Tinkering with with options in http.Transport doesn't seem help.
- Using multiple http.Client doesn't help (e.g., one per worker)
- An odd thing is that after errors, it appears that only DNS is dead. E.g.,
diff --git a/internal/options/options.go b/internal/options/options.go
index b00d3f9..d01ba83 100644
--- a/internal/options/options.go
+++ b/internal/options/options.go
@@ -7,9 +7,10 @@ import (
type Options struct {
// Input file
- InputFile string
- MaxFileBuffer int
- NextLine int64
+ InputFile string
+ MaxFileBuffer int
+ StartLineInclusive int64
+ EndLineExclusive int64
// Website visits
NumWorkers int
@@ -25,7 +26,8 @@ type Options struct {
func Parse() (opts Options) {
flag.StringVar(&opts.InputFile, "i", "", "input file, one domain name per line")
flag.IntVar(&opts.MaxFileBuffer, "b", 512, "max bytes to read from input file at once in MiB")
- flag.Int64Var(&opts.NextLine, "n", 0, "next line to start reading the input file from")
+ flag.Int64Var(&opts.StartLineInclusive, "s", 0, "first line to read in input file, inclusive and zero-based index")
+ flag.Int64Var(&opts.EndLineExclusive, "e", 0, "last line to read in input file, exclusive and zero-based; 0 to disable")
flag.IntVar(&opts.NumWorkers, "w", 2, "number of parallel workers")
flag.DurationVar(&opts.Timeout, "t", 10*time.Second, "timeout for each website visit")
diff --git a/main.go b/main.go
index e4d2027..55e7989 100644
--- a/main.go
+++ b/main.go
@@ -90,11 +90,11 @@ func main() {
log.Printf("INFO: generating work\n")
nextLine, readAll := workGenerator(ctx, opts, fp, questionCh)
if !readAll {
- warn := fmt.Sprintf("only read up until line %d", nextLine)
- if opts.NextLine != 0 {
- warn += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.NextLine)
+ notice := fmt.Sprintf("only read up until line %d", nextLine)
+ if opts.StartLineInclusive != 0 {
+ notice += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.StartLineInclusive)
}
- log.Printf("NOTICE: %s\n", warn)
+ log.Printf("NOTICE: %s\n", notice)
}
}
@@ -218,18 +218,18 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest
scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024)
nextLine := int64(0)
- if opts.NextLine > nextLine {
+ if opts.StartLineInclusive > nextLine {
for scanner.Scan() {
+ nextLine++
select {
case <-ctx.Done():
return nextLine, false
default:
}
- if nextLine+1 == opts.NextLine {
+ if nextLine == opts.StartLineInclusive {
break
}
- nextLine++
}
}
@@ -238,8 +238,12 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest
startTime := time.Now().Unix()
latestTime := startTime
- latestCount := opts.NextLine
+ latestCount := opts.StartLineInclusive
for scanner.Scan() {
+ if opts.EndLineExclusive > 0 && nextLine == opts.EndLineExclusive {
+ break
+ }
+
select {
case <-ctx.Done():
return nextLine, false
@@ -247,7 +251,7 @@ func workGenerator(ctx context.Context, opts options.Options, fp *os.File, quest
now := time.Now().Unix()
log.Printf("INFO: currently %.1f sites/s, %.1f sites/s since start, at line %d\n",
float64(nextLine-latestCount)/float64(now-latestTime),
- float64(nextLine-opts.NextLine)/float64(now-startTime),
+ float64(nextLine-opts.StartLineInclusive)/float64(now-startTime),
nextLine,
)