diff options
-rw-r--r-- | internal/options/options.go | 2 | ||||
-rw-r--r-- | internal/qna/qna.go | 14 | ||||
-rw-r--r-- | main.go | 99 |
3 files changed, 59 insertions, 56 deletions
diff --git a/internal/options/options.go b/internal/options/options.go index f7510ba..8e10686 100644 --- a/internal/options/options.go +++ b/internal/options/options.go @@ -10,6 +10,7 @@ type Options struct { NumWorkers int MetricsInterval time.Duration Timeout time.Duration + MaxResponse int64 } func Parse() (opts Options) { @@ -17,6 +18,7 @@ func Parse() (opts Options) { flag.IntVar(&opts.NumWorkers, "w", 10, "number of parallel workers") flag.DurationVar(&opts.Timeout, "t", 10*time.Second, "timeout for each website visit") flag.DurationVar(&opts.MetricsInterval, "m", 5*time.Second, "how often to emit metrics") + flag.Int64Var(&opts.MaxResponse, "r", 128*1024*1024, "max response body size to accept") flag.Parse() return } diff --git a/internal/qna/qna.go b/internal/qna/qna.go new file mode 100644 index 0000000..5336811 --- /dev/null +++ b/internal/qna/qna.go @@ -0,0 +1,14 @@ +package qna + +type Question struct { + Domain string // domain name to visit via HTTPS +} + +type Answer struct { + Domain string // domain name of the visited HTTPS site + OK bool // true if HTTP GET request succeeded + + HTTP bool // true if onion location was found via HTTP header + HTML bool // true if onion location was found via HTML attribute + Onion string // the site's onion location URL (if any) +} @@ -3,8 +3,8 @@ package main import ( "bufio" "context" + "fmt" "log" - logger "log" "net/http" "os" "os/signal" @@ -14,41 +14,28 @@ import ( "git.cs.kau.se/rasmoste/find-onion/internal/onionloc" "git.cs.kau.se/rasmoste/find-onion/internal/options" + "git.cs.kau.se/rasmoste/find-onion/internal/qna" ) -type match struct { - dnsName string - onionName string - httpHeader bool - htmlAttr bool -} - func main() { opts := options.Parse() cli := &http.Client{ Transport: &http.Transport{ - DisableKeepAlives: true, + DisableKeepAlives: true, + MaxResponseHeaderBytes: opts.MaxResponse, }, } - // sanCh is a channel of names that our workers should visit - sanCh := make(chan string, 2*opts.NumWorkers) - defer close(sanCh) - - // matchCh is a channel of matches, i.e., sites that had Onion-Location - matchCh := make(chan match, 2*opts.NumWorkers) - defer close(matchCh) + questionCh := make(chan qna.Question, 2*opts.NumWorkers) + defer close(questionCh) - // missCh is a channel of misses, i.e., the number of sites that had not - // configured Onion-Location. True means HTTP GET succeeded. - missCh := make(chan bool, 2*opts.NumWorkers) - defer close(missCh) + answerCh := make(chan qna.Answer, 2*opts.NumWorkers) + defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() ctx, cancel := context.WithCancel(context.Background()) - defer log.Printf("INFO: completed") log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { wg.Add(1) @@ -61,37 +48,37 @@ func main() { go func() { wg.Add(1) defer wg.Done() - workHandler(ctx, opts, cli, sanCh, matchCh, missCh) + workHandler(ctx, opts, cli, questionCh, answerCh) }() } - log.Printf("INFO: starting work receiver\n") + log.Printf("INFO: starting work aggregator\n") go func() { wg.Add(1) defer wg.Done() - workAggregator(ctx, cancel, opts, matchCh, missCh) + workAggregator(ctx, cancel, opts, answerCh) }() log.Printf("INFO: generating work\n") - workGenerator(ctx, cancel, opts, sanCh) + workGenerator(ctx, cancel, opts, questionCh) time.Sleep(time.Second) defer cancel() + defer time.Sleep(2 * opts.Timeout) + defer log.Printf("INFO: about to exit in %v", 2*opts.Timeout) for { select { case <-ctx.Done(): - logger.Printf("DEBUG: context cancelled, exiting...\n") + log.Printf("INFO: context cancelled") return - case <-time.After(1 * time.Second): + case <-time.After(time.Second): } - numMessages := len(sanCh) + len(matchCh) + len(missCh) + numMessages := len(questionCh) + len(answerCh) if numMessages == 0 { - logger.Printf("Exit scheduled in %v from now", 2*opts.Timeout) - time.Sleep(2 * opts.Timeout) - return // TODO: fix a cleaner exit + return } - logger.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages) + log.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages) } } @@ -107,47 +94,45 @@ func await(ctx context.Context, cancel context.CancelFunc) { } } -func workHandler(ctx context.Context, opts options.Options, cli *http.Client, - sanCh chan string, matchCh chan match, missCh chan bool) { +func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { for { select { case <-ctx.Done(): return - case san := <-sanCh: + case question := <-questionCh: func() { cctx, cancel := context.WithTimeout(ctx, opts.Timeout) defer cancel() - req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+san, nil) + req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+question.Domain, nil) if err != nil { - logger.Printf("DEBUG: %s: %v", san, err) return } rsp, err := cli.Do(req) if err != nil { - missCh <- false + answerCh <- qna.Answer{question.Domain, false, false, false, ""} return } defer rsp.Body.Close() v, ok := onionloc.HTTP(rsp) if ok { - matchCh <- match{dnsName: san, onionName: v, httpHeader: true} + answerCh <- qna.Answer{question.Domain, true, true, false, v} return } v, ok = onionloc.HTML(rsp) if ok { - matchCh <- match{dnsName: san, onionName: v, htmlAttr: true} + answerCh <- qna.Answer{question.Domain, true, false, true, v} return } - missCh <- true + answerCh <- qna.Answer{question.Domain, true, false, false, ""} }() } } } -func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, matchCh chan match, missCh chan bool) { +func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, answerCh chan qna.Answer) { ticker := time.NewTicker(opts.MetricsInterval) defer ticker.Stop() @@ -155,7 +140,7 @@ func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Opti numOnionLocation := 0 numVisits := 0 output := func() { - logger.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation) + log.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation) } defer output() @@ -163,16 +148,16 @@ func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Opti select { case <-ctx.Done(): return - case m := <-matchCh: + case a := <-answerCh: numVisits += 1 - numConnected += 1 - numOnionLocation += 1 + if !a.OK { + continue + } - log.Printf("INFO: found match %s (HTTP:%v HTML:%v)\n", m.dnsName, m.httpHeader, m.htmlAttr) - case connected := <-missCh: - numVisits += 1 - if connected { - numConnected += 1 + numConnected += 1 + if a.HTTP || a.HTML { + numOnionLocation += 1 + fmt.Printf("http:%v html:%v domain:%s onion:%s \n", a.HTTP, a.HTML, a.Domain, a.Onion) } case <-ticker.C: output() @@ -180,20 +165,22 @@ func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Opti } } -func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, sanCh chan string) { +func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, questionCh chan qna.Question) { fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) if err != nil { - logger.Printf("ERROR: %v", err) + log.Printf("ERROR: %v", err) cancel() return } defer fp.Close() scanner := bufio.NewScanner(fp) - max := 1024 * 1024 + max := 2 * 256 * opts.NumWorkers buf := make([]byte, 0, max) scanner.Buffer(buf, max) + // TODO: track which line we would have to start from to be sure that + // we're not missing any domains on ctrl+C, OK if we go back too much? for scanner.Scan() { select { case <-ctx.Done(): @@ -202,8 +189,8 @@ func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options. } for { - if len(sanCh) < cap(sanCh) { - sanCh <- scanner.Text() + if len(questionCh) < cap(questionCh) { + questionCh <- qna.Question{Domain: scanner.Text()} break } select { |