// Package main provides onion-grab, a tool that visits a list of domains over // HTTPS to see if they have Onion-Location configured. // // Install: // // $ go install git.cs.kau.se/rasmoste/onion-grab@latest // // Usage: // // $ onion-grab -h package main import ( "bufio" "context" "fmt" "log" "net/http" "os" "os/signal" "sync" "syscall" "time" "git.cs.kau.se/rasmoste/onion-grab/internal/line" "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc" "git.cs.kau.se/rasmoste/onion-grab/internal/options" "git.cs.kau.se/rasmoste/onion-grab/internal/qna" ) func main() { opts := options.Parse() cli := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024, }, } fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) if err != nil { log.Printf("ERROR: %v", err) os.Exit(1) } defer fp.Close() questionCh := make(chan qna.Question) defer close(questionCh) answerCh := make(chan qna.Answer) defer close(answerCh) var wg sync.WaitGroup defer wg.Wait() ctx, cancel := context.WithCancel(context.Background()) defer cancel() log.Printf("INFO: starting await handler, ctrl+C to exit\n") go func() { wg.Add(1) defer wg.Done() awaitHandler(ctx, cancel) }() log.Printf("INFO: starting %d workers with rate-limit %d requests/s\n", opts.NumWorkers, opts.Limit) for i := 0; i < opts.NumWorkers; i++ { go func() { wg.Add(1) defer wg.Done() workHandler(ctx, opts, cli, questionCh, answerCh) }() } log.Printf("INFO: starting work aggregator\n") go func() { wg.Add(1) defer wg.Done() workAggregator(ctx, opts, answerCh) }() start := time.Now().Round(time.Second) defer func() { end := time.Now().Round(time.Second) log.Printf("INFO: measurement duration was %v\n", end.Sub(start)) }() log.Printf("INFO: generating work\n") nextLine, readAll := workGenerator(ctx, opts, fp, questionCh) if !readAll { notice := fmt.Sprintf("only read up until line %d", nextLine) if opts.StartLineInclusive != 0 { notice += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.StartLineInclusive) } log.Printf("NOTICE: %s\n", notice) } } func awaitHandler(ctx context.Context, cancel context.CancelFunc) { sigs := make(chan os.Signal, 1) defer close(sigs) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) select { case <-sigs: case <-ctx.Done(): } cancel() } func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) { for { select { case <-ctx.Done(): return case question := <-questionCh: work(ctx, &opts, cli, question, answerCh) } } } func work(ctx context.Context, opts *options.Options, cli *http.Client, question qna.Question, answerCh chan qna.Answer) { cctx, cancel := context.WithTimeout(ctx, opts.Timeout) defer cancel() answer := qna.Answer{Domain: question.Domain} req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+question.Domain, nil) if err != nil { answer.ReqErr = err answerCh <- answer return } rsp, err := cli.Do(req) if err != nil { answer.DoErr = err answerCh <- answer return } defer rsp.Body.Close() onion, ok := onionloc.HTTP(rsp) if ok { answer.HTTP = onion } onion, ok = onionloc.HTML(rsp) if ok { answer.HTML = onion } answerCh <- answer } func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) { p := qna.Progress{} handleAnswer := func(a qna.Answer) { p.AddAnswer(a) if a.HTTP != "" || a.HTML != "" { fmt.Printf("%s\n", a.String()) } } metrics := time.NewTicker(opts.MetricsInterval) defer metrics.Stop() for { select { case <-ctx.Done(): log.Printf("INFO: about to exit, reading remaining answers\n") for { select { case a := <-answerCh: handleAnswer(a) case <-time.After(opts.Timeout + time.Second): log.Printf("INFO: metrics@aggregator: summary: \n\n%s\n\n", p.String()) return } } case a := <-answerCh: handleAnswer(a) case <-metrics.C: log.Printf("INFO: metrics@aggregator: \n\n%s\n\n", p.String()) } } } func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) { var wg sync.WaitGroup defer wg.Wait() cctx, cancel := context.WithCancel(ctx) defer cancel() var nextLine line.Line go func() { wg.Add(1) defer wg.Done() rateMetrics(cctx, opts, &nextLine) // // Would be nice to clean this up so that the Line type with a // mutex can be eliminated; and with all metrics in one place. // }() scanner := bufio.NewScanner(fp) buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024) scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024) if opts.StartLineInclusive > nextLine.Num() { for scanner.Scan() { nextLine.Inc() select { case <-ctx.Done(): return nextLine.Num(), false default: } if nextLine.Num() == opts.StartLineInclusive { break } } } limit := time.NewTicker(time.Second) defer limit.Stop() readCount := 0 for scanner.Scan() { if opts.EndLineExclusive > 0 && nextLine.Num() == opts.EndLineExclusive { break } if readCount == opts.Limit { select { case <-ctx.Done(): return nextLine.Num(), false case <-limit.C: readCount = 0 } } select { case <-ctx.Done(): return nextLine.Num(), false case questionCh <- qna.Question{Domain: trimWildcard(scanner.Text())}: nextLine.Inc() readCount++ } } select { case <-ctx.Done(): case <-time.After(opts.Timeout + time.Second): } return nextLine.Num(), true } func rateMetrics(ctx context.Context, opts options.Options, nextLine *line.Line) { startTime := time.Now().Unix() latestTime := startTime latestCount := opts.StartLineInclusive metrics := time.NewTicker(opts.MetricsInterval) defer metrics.Stop() for { select { case <-ctx.Done(): return case <-metrics.C: nextLineNum := nextLine.Num() now := time.Now().Unix() currRate := float64(nextLineNum-latestCount) / float64(now-latestTime) avgRate := float64(nextLineNum-opts.StartLineInclusive) / float64(now-startTime) str := fmt.Sprintf(" Current rate: %.1f sites/s\n", currRate) str += fmt.Sprintf(" Average rate: %.1f sites/s\n", avgRate) str += fmt.Sprintf(" Next line: %d\n", nextLineNum) log.Printf("INFO: metrics@generator:\n\n%s\n\n", str) latestCount = nextLineNum latestTime = now } } } func trimWildcard(san string) string { if len(san) >= 2 && san[:2] == "*." { return san[2:] } return san }