// Package main provides onion-grab, a tool that visits a list of domains over // HTTPS to see if they have Onion-Location configured. // // Install: // // $ go install git.cs.kau.se/rasmoste/onion-grab@latest // // Usage: // // $ onion-grab -h package main import ( "bufio" "context" "fmt" "log" "net/http" "os" "os/signal" "sync" "syscall" "time" "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc" "git.cs.kau.se/rasmoste/onion-grab/internal/options" "git.cs.kau.se/rasmoste/onion-grab/internal/qna" ) func main() { opts := options.Parse() cli := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024, }, } fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) if err != nil { log.Printf("ERROR: %v", err) os.Exit(1) } defer fp.Close() questionCh := make(chan qna.Question) defer close(questionCh) answerCh := make(chan qna.Answer) defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() ctx, cancel := context.WithCancel(context.Background()) defer cancel() log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { wg.Add(1) defer wg.Done() await(ctx, cancel) }() if opts.CheckerDomain != "" { log.Printf("INFO: starting checker\n") go func() { wg.Add(1) defer wg.Done() checker(ctx, cancel, &opts, cli) }() } log.Printf("INFO: starting %d workers\n", opts.NumWorkers) for i := 0; i < opts.NumWorkers; i++ { go func() { wg.Add(1) defer wg.Done() workHandler(ctx, opts, cli, questionCh, answerCh) }() } log.Printf("INFO: starting work aggregator\n") go func() { wg.Add(1) defer wg.Done() workAggregator(ctx, opts, answerCh) }() log.Printf("INFO: generating work\n") nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) if !readAll { notice := fmt.Sprintf("only read up until line %d", nextLine) if opts.StartLineInclusive != 0 { notice += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.StartLineInclusive) } log.Printf("NOTICE: %s\n", notice) } } func await(ctx context.Context, cancel context.CancelFunc) { sigs := make(chan os.Signal, 1) defer close(sigs) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) select { case <-sigs: case <-ctx.Done(): } cancel() } func checker(ctx context.Context, cancel context.CancelFunc, opts *options.Options, cli *http.Client) { question := qna.Question{opts.CheckerDomain} answerCh := make(chan qna.Answer, 1) defer close(answerCh) for { select { case <-ctx.Done(): return case <-time.After(opts.CheckerInterval): work(ctx, opts, cli, question, answerCh) answer := <-answerCh if answer.HTTP == "" && answer.HTML == "" { log.Printf("ERROR: checker expected onion for %+v", answer) cancel() return } } } } func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { for { select { case <-ctx.Done(): return case question := <-questionCh: work(ctx, &opts, cli, question, answerCh) } } } func work(ctx context.Context, opts *options.Options, cli *http.Client, question qna.Question, answerCh chan qna.Answer) { cctx, cancel := context.WithTimeout(ctx, opts.Timeout) defer cancel() answer := qna.Answer{Domain: question.Domain} req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+question.Domain, nil) if err != nil { answerCh <- answer return } rsp, err := cli.Do(req) if err != nil { answerCh <- answer return } defer rsp.Body.Close() answer.OK = true onion, ok := onionloc.HTTP(rsp) if ok { answer.HTTP = onion } onion, ok = onionloc.HTML(rsp) if ok { answer.HTML = onion } answerCh <- answer } func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) { numConnect := 0 numOnions := 0 numAll := 0 output := func() { log.Printf("SUMMARY: %d/%d connected, %d sites configured Onion-Location\n", numConnect, numAll, numOnions) } handleAnswer := func(a qna.Answer) { numAll += 1 if !a.OK { return } numConnect += 1 if a.HTTP != "" || a.HTML != "" { numOnions += 1 fmt.Printf("%s header=%s attribute=%s\n", a.Domain, a.HTTP, a.HTML) } } defer output() for { select { case <-ctx.Done(): log.Printf("INFO: about to exit, reading remaining answers\n") for { select { case a := <-answerCh: handleAnswer(a) case <-time.After(opts.Timeout + time.Second): return } } case a := <-answerCh: handleAnswer(a) } } } func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) { scanner := bufio.NewScanner(fp) buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024) scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) // roll-up to the requested start line nextLine := int64(0) if opts.StartLineInclusive > nextLine { for scanner.Scan() { nextLine++ select { case <-ctx.Done(): return nextLine, false default: } if nextLine == opts.StartLineInclusive { break } } } // initialize ticker ticker := time.NewTicker(opts.MetricsInterval) defer ticker.Stop() startTime := time.Now().Unix() latestTime := startTime latestCount := opts.StartLineInclusive // initialize rate-limit limit := time.NewTicker(time.Second) defer limit.Stop() readCount := 0 for scanner.Scan() { if opts.EndLineExclusive > 0 && nextLine == opts.EndLineExclusive { break } if readCount == opts.Limit { select { case <-ctx.Done(): return nextLine, false case <-limit.C: readCount = 0 } } select { case <-ctx.Done(): return nextLine, false case <-ticker.C: now := time.Now().Unix() log.Printf("INFO: currently %.1f sites/s, %.1f sites/s since start, at line %d\n", float64(nextLine-latestCount)/float64(now-latestTime), float64(nextLine-opts.StartLineInclusive)/float64(now-startTime), nextLine, ) latestCount = nextLine latestTime = now case questionCh <- qna.Question{Domain: scanner.Text()}: nextLine++ readCount++ } } select { case <-ctx.Done(): case <-time.After(opts.Timeout + time.Second): } return nextLine, true }