// Package main provides onion-grab, a tool that visits a list of domains over // HTTPS to see if they have Onion-Location configured. // // Install: // // $ go install git.cs.kau.se/rasmoste/onion-grab@latest // // Usage: // // $ onion-grab -h package main import ( "bufio" "context" "fmt" "log" "net/http" "os" "os/signal" "sync" "syscall" "time" "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc" "git.cs.kau.se/rasmoste/onion-grab/internal/options" "git.cs.kau.se/rasmoste/onion-grab/internal/qna" ) func main() { opts := options.Parse() cli := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024, }, } fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) if err != nil { log.Printf("ERROR: %v", err) os.Exit(1) } defer fp.Close() questionCh := make(chan qna.Question) defer close(questionCh) answerCh := make(chan qna.Answer) defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() ctx, cancel := context.WithCancel(context.Background()) defer cancel() log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { wg.Add(1) defer wg.Done() awaitHandler(ctx, cancel) }() log.Printf("INFO: starting %d workers with rate-limit %d requests/s\n", opts.NumWorkers, opts.Limit) for i := 0; i < opts.NumWorkers; i++ { go func() { wg.Add(1) defer wg.Done() workHandler(ctx, opts, cli, questionCh, answerCh) }() } log.Printf("INFO: starting work aggregator\n") go func() { wg.Add(1) defer wg.Done() workAggregator(ctx, opts, answerCh) }() log.Printf("INFO: generating work\n") nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) if !readAll { notice := fmt.Sprintf("only read up until line %d", nextLine) if opts.StartLineInclusive != 0 { notice += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.StartLineInclusive) } log.Printf("NOTICE: %s\n", notice) } } func awaitHandler(ctx context.Context, cancel context.CancelFunc) { sigs := make(chan os.Signal, 1) defer close(sigs) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) select { case <-sigs: case <-ctx.Done(): } cancel() } func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { for { select { case <-ctx.Done(): return case question := <-questionCh: work(ctx, &opts, cli, question, answerCh) } } } func work(ctx context.Context, opts *options.Options, cli *http.Client, question qna.Question, answerCh chan qna.Answer) { cctx, cancel := context.WithTimeout(ctx, opts.Timeout) defer cancel() answer := qna.Answer{Domain: question.Domain} req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+question.Domain, nil) if err != nil { answer.ReqErr = err answerCh <- answer return } rsp, err := cli.Do(req) if err != nil { answer.DoErr = err answerCh <- answer return } defer rsp.Body.Close() onion, ok := onionloc.HTTP(rsp) if ok { answer.HTTP = onion } onion, ok = onionloc.HTML(rsp) if ok { answer.HTML = onion } answerCh <- answer } func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) { p := qna.Progress{} handleAnswer := func(a qna.Answer) { p.AddAnswer(a) if a.HTTP != "" || a.HTML != "" { fmt.Printf("%s\n", a.String()) } } metrics := time.NewTicker(opts.MetricsInterval) defer metrics.Stop() for { select { case <-ctx.Done(): log.Printf("INFO: about to exit, reading remaining answers\n") for { select { case a := <-answerCh: handleAnswer(a) case <-time.After(opts.Timeout + time.Second): log.Printf("INFO: metrics@aggregator: summary: \n\n%s\n\n", p.String()) return } } case a := <-answerCh: handleAnswer(a) case <-metrics.C: log.Printf("INFO: metrics@aggregator: \n\n%s\n\n", p.String()) } } } func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) { scanner := bufio.NewScanner(fp) buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024) scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) // roll-up to the requested start line nextLine := int64(0) if opts.StartLineInclusive > nextLine { for scanner.Scan() { nextLine++ select { case <-ctx.Done(): return nextLine, false default: } if nextLine == opts.StartLineInclusive { break } } } // initialize metrics metrics := time.NewTicker(opts.MetricsInterval) defer metrics.Stop() startTime := time.Now().Unix() latestTime := startTime latestCount := opts.StartLineInclusive // initialize rate-limit limit := time.NewTicker(time.Second) defer limit.Stop() readCount := 0 for scanner.Scan() { if opts.EndLineExclusive > 0 && nextLine == opts.EndLineExclusive { break } if readCount == opts.Limit { select { case <-ctx.Done(): return nextLine, false case <-limit.C: readCount = 0 } } select { case <-ctx.Done(): return nextLine, false case questionCh <- qna.Question{Domain: trimWildcard(scanner.Text())}: nextLine++ readCount++ case <-metrics.C: now := time.Now().Unix() currRate := float64(nextLine-latestCount) / float64(now-latestTime) avgRate := float64(nextLine-opts.StartLineInclusive) / float64(now-startTime) str := fmt.Sprintf(" Current rate: %.1f sites/s\n", currRate) str += fmt.Sprintf(" Average rate: %.1f sites/s\n", avgRate) str += fmt.Sprintf(" Next line: %d\n", nextLine) log.Printf("INFO: metrics@generator:\n\n%s\n\n", str) latestCount = nextLine latestTime = now } } select { case <-ctx.Done(): case <-time.After(opts.Timeout + time.Second): } return nextLine, true } func trimWildcard(san string) string { if len(san) >= 2 && san[:2] == "*." { return san[2:] } return san }