diff options
author | Rasmus Dahlberg <rasmus@rgdd.se> | 2023-03-23 20:41:26 +0100 |
---|---|---|
committer | Rasmus Dahlberg <rasmus@rgdd.se> | 2023-03-23 20:41:26 +0100 |
commit | 505a4d9b91fe2597ef1fee845f8451055d2cda02 (patch) | |
tree | dc253593f84fa8f466731c2fe80bcc9dc037c7b3 | |
parent | b6b809207b1bf0730fc010d4cc08a030b636fe89 (diff) |
Add drafty skeleton
-rw-r--r-- | lists/small.lst | 5 | ||||
-rw-r--r-- | main.go | 217 |
2 files changed, 222 insertions, 0 deletions
diff --git a/lists/small.lst b/lists/small.lst new file mode 100644 index 0000000..f161918 --- /dev/null +++ b/lists/small.lst @@ -0,0 +1,5 @@ +blog.torproject.org +www.eff.org +www.kau.se +www.torproject.org +www.nytimes.com @@ -0,0 +1,217 @@ +package main + +import ( + "bufio" + "context" + "log" + logger "log" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "git.cs.kau.se/rasmoste/find-onion/internal/onionloc" + "git.cs.kau.se/rasmoste/find-onion/internal/options" +) + +type match struct { + dnsName string + onionName string + httpHeader bool + htmlAttr bool +} + +func main() { + opts := options.Parse() + cli := &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + + // sanCh is a channel of names that our workers should visit + sanCh := make(chan string, 2*opts.NumWorkers) + defer close(sanCh) + + // matchCh is a channel of matches, i.e., sites that had Onion-Location + matchCh := make(chan match, 2*opts.NumWorkers) + defer close(matchCh) + + // missCh is a channel of misses, i.e., the number of sites that had not + // configured Onion-Location. True means HTTP GET succeeded. + missCh := make(chan bool, 2*opts.NumWorkers) + defer close(missCh) + + var wg sync.WaitGroup + defer wg.Wait() + ctx, cancel := context.WithCancel(context.Background()) + + defer log.Printf("INFO: completed") + log.Printf("INFO: starting await handler, ctrl+C to exit\n") + go func() { + wg.Add(1) + defer wg.Done() + await(ctx, cancel) + }() + + log.Printf("INFO: starting %d workers\n", opts.NumWorkers) + for i := 0; i < opts.NumWorkers; i++ { + go func() { + wg.Add(1) + defer wg.Done() + workHandler(ctx, opts, cli, sanCh, matchCh, missCh) + }() + } + + log.Printf("INFO: starting work receiver\n") + go func() { + wg.Add(1) + defer wg.Done() + workAggregator(ctx, cancel, opts, matchCh, missCh) + }() + + log.Printf("INFO: generating work\n") + workGenerator(ctx, cancel, opts, sanCh) + time.Sleep(time.Second) + + defer cancel() + for { + select { + case <-ctx.Done(): + logger.Printf("DEBUG: context cancelled, exiting...\n") + return + case <-time.After(1 * time.Second): + } + + numMessages := len(sanCh) + len(matchCh) + len(missCh) + if numMessages == 0 { + logger.Printf("Exit scheduled in %v from now", 2*opts.Timeout) + time.Sleep(2 * opts.Timeout) + return // TODO: fix a cleaner exit + } + logger.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages) + } +} + +func await(ctx context.Context, cancel context.CancelFunc) { + sigs := make(chan os.Signal, 1) + defer close(sigs) + + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + select { + case <-sigs: + cancel() + case <-ctx.Done(): + } +} + +func workHandler(ctx context.Context, opts options.Options, cli *http.Client, + sanCh chan string, matchCh chan match, missCh chan bool) { + for { + select { + case <-ctx.Done(): + return + case san := <-sanCh: + func() { + cctx, cancel := context.WithTimeout(ctx, opts.Timeout) + defer cancel() + + req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+san, nil) + if err != nil { + logger.Printf("DEBUG: %s: %v", san, err) + return + } + + rsp, err := cli.Do(req) + if err != nil { + missCh <- false + return + } + defer rsp.Body.Close() + + v, ok := onionloc.HTTP(rsp) + if ok { + matchCh <- match{dnsName: san, onionName: v, httpHeader: true} + return + } + v, ok = onionloc.HTML(rsp) + if ok { + matchCh <- match{dnsName: san, onionName: v, htmlAttr: true} + return + } + missCh <- true + }() + } + } +} + +func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, matchCh chan match, missCh chan bool) { + ticker := time.NewTicker(opts.MetricsInterval) + defer ticker.Stop() + + numConnected := 0 + numOnionLocation := 0 + numVisits := 0 + output := func() { + logger.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation) + } + defer output() + + for { + select { + case <-ctx.Done(): + return + case m := <-matchCh: + numVisits += 1 + numConnected += 1 + numOnionLocation += 1 + + log.Printf("INFO: found match %s (HTTP:%v HTML:%v)\n", m.dnsName, m.httpHeader, m.htmlAttr) + case connected := <-missCh: + numVisits += 1 + if connected { + numConnected += 1 + } + case <-ticker.C: + output() + } + } +} + +func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, sanCh chan string) { + fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644) + if err != nil { + logger.Printf("ERROR: %v", err) + cancel() + return + } + + defer fp.Close() + scanner := bufio.NewScanner(fp) + max := 1024 * 1024 + buf := make([]byte, 0, max) + scanner.Buffer(buf, max) + + for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } + + for { + if len(sanCh) < cap(sanCh) { + sanCh <- scanner.Text() + break + } + select { + case <-ctx.Done(): + return + case <-time.After(time.Second): + continue + } + } + } +} |