From af5be4fbd0c3824478b9cfc261b7a34e98a36e0f Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sat, 25 Mar 2023 14:11:29 +0100 Subject: Clean-up skeleton --- go.mod | 2 +- internal/options/options.go | 24 ++++-- internal/qna/qna.go | 6 +- lists/small.lst | 4 +- main.go | 176 ++++++++++++++++++++++++-------------------- 5 files changed, 119 insertions(+), 93 deletions(-) diff --git a/go.mod b/go.mod index ea65ea5..8db4fb6 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module git.cs.kau.se/rasmoste/find-onion +module git.cs.kau.se/rasmoste/onion-grab go 1.19 diff --git a/internal/options/options.go b/internal/options/options.go index 8e10686..c988713 100644 --- a/internal/options/options.go +++ b/internal/options/options.go @@ -6,19 +6,31 @@ import ( ) type Options struct { - InputFile string - NumWorkers int + // Input file + InputFile string + MaxFileBuffer int + NextLine int64 + + // Website visits + NumWorkers int + Timeout time.Duration + MaxResponse int64 + + // Health and metrics MetricsInterval time.Duration - Timeout time.Duration - MaxResponse int64 } func Parse() (opts Options) { flag.StringVar(&opts.InputFile, "i", "lists/small.lst", "input file, one domain name per line") - flag.IntVar(&opts.NumWorkers, "w", 10, "number of parallel workers") + flag.IntVar(&opts.MaxFileBuffer, "b", 512, "max bytes to read from input file at once in MiB") + flag.Int64Var(&opts.NextLine, "n", 0, "next line to start reading the input file from") + + flag.IntVar(&opts.NumWorkers, "w", 2, "number of parallel workers") flag.DurationVar(&opts.Timeout, "t", 10*time.Second, "timeout for each website visit") + flag.Int64Var(&opts.MaxResponse, "r", 128, "max response body size to accept in MiB") + flag.DurationVar(&opts.MetricsInterval, "m", 5*time.Second, "how often to emit metrics") - flag.Int64Var(&opts.MaxResponse, "r", 128*1024*1024, "max response body size to accept") + flag.Parse() return } diff --git a/internal/qna/qna.go b/internal/qna/qna.go index 5336811..bd2078d 100644 --- a/internal/qna/qna.go +++ b/internal/qna/qna.go @@ -7,8 +7,6 @@ type Question struct { type Answer struct { Domain string // domain name of the visited HTTPS site OK bool // true if HTTP GET request succeeded - - HTTP bool // true if onion location was found via HTTP header - HTML bool // true if onion location was found via HTML attribute - Onion string // the site's onion location URL (if any) + HTTP string // value set in the Onion-Location HTTP header (if any) + HTML string // value set in the Onion-Location HTML attribute (if any) } diff --git a/lists/small.lst b/lists/small.lst index f161918..ea1b9f7 100644 --- a/lists/small.lst +++ b/lists/small.lst @@ -1,5 +1,5 @@ -blog.torproject.org www.eff.org www.kau.se -www.torproject.org www.nytimes.com +www.torproject.org +www.qubes-os.org diff --git a/main.go b/main.go index 0efac30..f04ea17 100644 --- a/main.go +++ b/main.go @@ -1,3 +1,13 @@ +// Package main provides onion-grab, a tool that visits a list of domains +// concurrently over HTTPS to see if they have Onion-Location configured. +// +// Install: +// +// $ go install git.cs.kau.se/rasmoste/onion-grab@latest +// +// Usage: +// +// $ onion-grab -h package main import ( @@ -12,9 +22,9 @@ import ( "syscall" "time" - "git.cs.kau.se/rasmoste/find-onion/internal/onionloc" - "git.cs.kau.se/rasmoste/find-onion/internal/options" - "git.cs.kau.se/rasmoste/find-onion/internal/qna" + "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc" + "git.cs.kau.se/rasmoste/onion-grab/internal/options" + "git.cs.kau.se/rasmoste/onion-grab/internal/qna" ) func main() { @@ -22,19 +32,28 @@ func main() { cli := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, - MaxResponseHeaderBytes: opts.MaxResponse, + MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024, }, } - questionCh := make(chan qna.Question, 2*opts.NumWorkers) + fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) + if err != nil { + log.Printf("ERROR: %v", err) + os.Exit(1) + } + defer fp.Close() + + questionCh := make(chan qna.Question) defer close(questionCh) - answerCh := make(chan qna.Answer, 2*opts.NumWorkers) + answerCh := make(chan qna.Answer) defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { @@ -56,29 +75,17 @@ func main() { go func() { wg.Add(1) defer wg.Done() - workAggregator(ctx, cancel, opts, answerCh) + workAggregator(ctx, opts, answerCh) }() log.Printf("INFO: generating work\n") - workGenerator(ctx, cancel, opts, questionCh) - time.Sleep(time.Second) - - defer cancel() - defer time.Sleep(2 * opts.Timeout) - defer log.Printf("INFO: about to exit in %v", 2*opts.Timeout) - for { - select { - case <-ctx.Done(): - log.Printf("INFO: context cancelled") - return - case <-time.After(time.Second): + nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) + if !readAll { + warn := fmt.Sprintf("only read up until line %d", nextLine) + if opts.NextLine != 0 { + warn += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.NextLine) } - - numMessages := len(questionCh) + len(answerCh) - if numMessages == 0 { - return - } - log.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages) + log.Printf("WARNING: %s\n", warn) } } @@ -89,9 +96,9 @@ func await(ctx context.Context, cancel context.CancelFunc) { signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) select { case <-sigs: - cancel() case <-ctx.Done(): } + cancel() } func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { @@ -109,96 +116,105 @@ func workHandler(ctx context.Context, opts options.Options, cli *http.Client, qu return } + answer := qna.Answer{Domain: question.Domain} rsp, err := cli.Do(req) if err != nil { - answerCh <- qna.Answer{question.Domain, false, false, false, ""} + answerCh <- answer return } defer rsp.Body.Close() + answer.OK = true - v, ok := onionloc.HTTP(rsp) + onion, ok := onionloc.HTTP(rsp) if ok { - answerCh <- qna.Answer{question.Domain, true, true, false, v} - return + answer.HTTP = onion } - v, ok = onionloc.HTML(rsp) + onion, ok = onionloc.HTML(rsp) if ok { - answerCh <- qna.Answer{question.Domain, true, false, true, v} - return + answer.HTML = onion } - answerCh <- qna.Answer{question.Domain, true, false, false, ""} + answerCh <- answer }() } } } -func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, answerCh chan qna.Answer) { - ticker := time.NewTicker(opts.MetricsInterval) - defer ticker.Stop() - - numConnected := 0 - numOnionLocation := 0 - numVisits := 0 +func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) { + numConnect := 0 + numOnions := 0 + numAll := 0 output := func() { - log.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation) + log.Printf("INFO: %d/%d connected, %d sites configured Onion-Location\n", numConnect, numAll, numOnions) + } + + handleAnswer := func(a qna.Answer) { + numAll += 1 + if !a.OK { + return + } + + numConnect += 1 + if a.HTTP != "" || a.HTML != "" { + numOnions += 1 + fmt.Printf("%s header=%s attribute=%s\n", a.Domain, a.HTTP, a.HTML) + } } - defer output() + ticker := time.NewTicker(opts.MetricsInterval) + defer ticker.Stop() + defer output() for { select { case <-ctx.Done(): - return - case a := <-answerCh: - numVisits += 1 - if !a.OK { - continue - } - - numConnected += 1 - if a.HTTP || a.HTML { - numOnionLocation += 1 - fmt.Printf("http:%v html:%v domain:%s onion:%s \n", a.HTTP, a.HTML, a.Domain, a.Onion) + log.Printf("INFO: about to exit, reading remaining answers\n") + for { + select { + case a := <-answerCh: + handleAnswer(a) + case <-time.After(opts.Timeout): + return + } } + case a := <-answerCh: + handleAnswer(a) case <-ticker.C: output() } } } -func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, questionCh chan qna.Question) { - fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) - if err != nil { - log.Printf("ERROR: %v", err) - cancel() - return - } - - defer fp.Close() +func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) { scanner := bufio.NewScanner(fp) - max := 2 * 256 * opts.NumWorkers - buf := make([]byte, 0, max) - scanner.Buffer(buf, max) + buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024) + scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) - // TODO: track which line we would have to start from to be sure that - // we're not missing any domains on ctrl+C, OK if we go back too much? + nextLine := int64(0) for scanner.Scan() { select { case <-ctx.Done(): - return + return nextLine, false default: } - for { - if len(questionCh) < cap(questionCh) { - questionCh <- qna.Question{Domain: scanner.Text()} - break - } - select { - case <-ctx.Done(): - return - case <-time.After(time.Second): - continue - } + if nextLine == opts.NextLine { + break + } + scanner.Text() + nextLine++ + } + + for scanner.Scan() { + select { + case <-ctx.Done(): + return nextLine, false + case questionCh <- qna.Question{Domain: scanner.Text()}: + nextLine++ } } + + select { + case <-ctx.Done(): + case <-time.After(opts.Timeout): + } + return nextLine, true } -- cgit v1.2.3