From af5be4fbd0c3824478b9cfc261b7a34e98a36e0f Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Sat, 25 Mar 2023 14:11:29 +0100 Subject: Clean-up skeleton --- main.go | 176 +++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 96 insertions(+), 80 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index 0efac30..f04ea17 100644 --- a/main.go +++ b/main.go @@ -1,3 +1,13 @@ +// Package main provides onion-grab, a tool that visits a list of domains +// concurrently over HTTPS to see if they have Onion-Location configured. +// +// Install: +// +// $ go install git.cs.kau.se/rasmoste/onion-grab@latest +// +// Usage: +// +// $ onion-grab -h package main import ( @@ -12,9 +22,9 @@ import ( "syscall" "time" - "git.cs.kau.se/rasmoste/find-onion/internal/onionloc" - "git.cs.kau.se/rasmoste/find-onion/internal/options" - "git.cs.kau.se/rasmoste/find-onion/internal/qna" + "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc" + "git.cs.kau.se/rasmoste/onion-grab/internal/options" + "git.cs.kau.se/rasmoste/onion-grab/internal/qna" ) func main() { @@ -22,19 +32,28 @@ func main() { cli := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, - MaxResponseHeaderBytes: opts.MaxResponse, + MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024, }, } - questionCh := make(chan qna.Question, 2*opts.NumWorkers) + fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) + if err != nil { + log.Printf("ERROR: %v", err) + os.Exit(1) + } + defer fp.Close() + + questionCh := make(chan qna.Question) defer close(questionCh) - answerCh := make(chan qna.Answer, 2*opts.NumWorkers) + answerCh := make(chan qna.Answer) defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { @@ -56,29 +75,17 @@ func main() { go func() { wg.Add(1) defer wg.Done() - workAggregator(ctx, cancel, opts, answerCh) + workAggregator(ctx, opts, answerCh) }() log.Printf("INFO: generating work\n") - workGenerator(ctx, cancel, opts, questionCh) - time.Sleep(time.Second) - - defer cancel() - defer time.Sleep(2 * opts.Timeout) - defer log.Printf("INFO: about to exit in %v", 2*opts.Timeout) - for { - select { - case <-ctx.Done(): - log.Printf("INFO: context cancelled") - return - case <-time.After(time.Second): + nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) + if !readAll { + warn := fmt.Sprintf("only read up until line %d", nextLine) + if opts.NextLine != 0 { + warn += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.NextLine) } - - numMessages := len(questionCh) + len(answerCh) - if numMessages == 0 { - return - } - log.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages) + log.Printf("WARNING: %s\n", warn) } } @@ -89,9 +96,9 @@ func await(ctx context.Context, cancel context.CancelFunc) { signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) select { case <-sigs: - cancel() case <-ctx.Done(): } + cancel() } func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { @@ -109,96 +116,105 @@ func workHandler(ctx context.Context, opts options.Options, cli *http.Client, qu return } + answer := qna.Answer{Domain: question.Domain} rsp, err := cli.Do(req) if err != nil { - answerCh <- qna.Answer{question.Domain, false, false, false, ""} + answerCh <- answer return } defer rsp.Body.Close() + answer.OK = true - v, ok := onionloc.HTTP(rsp) + onion, ok := onionloc.HTTP(rsp) if ok { - answerCh <- qna.Answer{question.Domain, true, true, false, v} - return + answer.HTTP = onion } - v, ok = onionloc.HTML(rsp) + onion, ok = onionloc.HTML(rsp) if ok { - answerCh <- qna.Answer{question.Domain, true, false, true, v} - return + answer.HTML = onion } - answerCh <- qna.Answer{question.Domain, true, false, false, ""} + answerCh <- answer }() } } } -func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, answerCh chan qna.Answer) { - ticker := time.NewTicker(opts.MetricsInterval) - defer ticker.Stop() - - numConnected := 0 - numOnionLocation := 0 - numVisits := 0 +func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) { + numConnect := 0 + numOnions := 0 + numAll := 0 output := func() { - log.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation) + log.Printf("INFO: %d/%d connected, %d sites configured Onion-Location\n", numConnect, numAll, numOnions) + } + + handleAnswer := func(a qna.Answer) { + numAll += 1 + if !a.OK { + return + } + + numConnect += 1 + if a.HTTP != "" || a.HTML != "" { + numOnions += 1 + fmt.Printf("%s header=%s attribute=%s\n", a.Domain, a.HTTP, a.HTML) + } } - defer output() + ticker := time.NewTicker(opts.MetricsInterval) + defer ticker.Stop() + defer output() for { select { case <-ctx.Done(): - return - case a := <-answerCh: - numVisits += 1 - if !a.OK { - continue - } - - numConnected += 1 - if a.HTTP || a.HTML { - numOnionLocation += 1 - fmt.Printf("http:%v html:%v domain:%s onion:%s \n", a.HTTP, a.HTML, a.Domain, a.Onion) + log.Printf("INFO: about to exit, reading remaining answers\n") + for { + select { + case a := <-answerCh: + handleAnswer(a) + case <-time.After(opts.Timeout): + return + } } + case a := <-answerCh: + handleAnswer(a) case <-ticker.C: output() } } } -func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, questionCh chan qna.Question) { - fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) - if err != nil { - log.Printf("ERROR: %v", err) - cancel() - return - } - - defer fp.Close() +func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) { scanner := bufio.NewScanner(fp) - max := 2 * 256 * opts.NumWorkers - buf := make([]byte, 0, max) - scanner.Buffer(buf, max) + buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024) + scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) - // TODO: track which line we would have to start from to be sure that - // we're not missing any domains on ctrl+C, OK if we go back too much? + nextLine := int64(0) for scanner.Scan() { select { case <-ctx.Done(): - return + return nextLine, false default: } - for { - if len(questionCh) < cap(questionCh) { - questionCh <- qna.Question{Domain: scanner.Text()} - break - } - select { - case <-ctx.Done(): - return - case <-time.After(time.Second): - continue - } + if nextLine == opts.NextLine { + break + } + scanner.Text() + nextLine++ + } + + for scanner.Scan() { + select { + case <-ctx.Done(): + return nextLine, false + case questionCh <- qna.Question{Domain: scanner.Text()}: + nextLine++ } } + + select { + case <-ctx.Done(): + case <-time.After(opts.Timeout): + } + return nextLine, true } -- cgit v1.2.3