aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lists/small.lst5
-rw-r--r--main.go217
2 files changed, 222 insertions, 0 deletions
diff --git a/lists/small.lst b/lists/small.lst
new file mode 100644
index 0000000..f161918
--- /dev/null
+++ b/lists/small.lst
@@ -0,0 +1,5 @@
+blog.torproject.org
+www.eff.org
+www.kau.se
+www.torproject.org
+www.nytimes.com
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..a68da1e
--- /dev/null
+++ b/main.go
@@ -0,0 +1,217 @@
+package main
+
+import (
+ "bufio"
+ "context"
+ "log"
+ logger "log"
+ "net/http"
+ "os"
+ "os/signal"
+ "sync"
+ "syscall"
+ "time"
+
+ "git.cs.kau.se/rasmoste/find-onion/internal/onionloc"
+ "git.cs.kau.se/rasmoste/find-onion/internal/options"
+)
+
+type match struct {
+ dnsName string
+ onionName string
+ httpHeader bool
+ htmlAttr bool
+}
+
+func main() {
+ opts := options.Parse()
+ cli := &http.Client{
+ Transport: &http.Transport{
+ DisableKeepAlives: true,
+ },
+ }
+
+ // sanCh is a channel of names that our workers should visit
+ sanCh := make(chan string, 2*opts.NumWorkers)
+ defer close(sanCh)
+
+ // matchCh is a channel of matches, i.e., sites that had Onion-Location
+ matchCh := make(chan match, 2*opts.NumWorkers)
+ defer close(matchCh)
+
+ // missCh is a channel of misses, i.e., the number of sites that had not
+ // configured Onion-Location. True means HTTP GET succeeded.
+ missCh := make(chan bool, 2*opts.NumWorkers)
+ defer close(missCh)
+
+ var wg sync.WaitGroup
+ defer wg.Wait()
+ ctx, cancel := context.WithCancel(context.Background())
+
+ defer log.Printf("INFO: completed")
+ log.Printf("INFO: starting await handler, ctrl+C to exit\n")
+ go func() {
+ wg.Add(1)
+ defer wg.Done()
+ await(ctx, cancel)
+ }()
+
+ log.Printf("INFO: starting %d workers\n", opts.NumWorkers)
+ for i := 0; i < opts.NumWorkers; i++ {
+ go func() {
+ wg.Add(1)
+ defer wg.Done()
+ workHandler(ctx, opts, cli, sanCh, matchCh, missCh)
+ }()
+ }
+
+ log.Printf("INFO: starting work receiver\n")
+ go func() {
+ wg.Add(1)
+ defer wg.Done()
+ workAggregator(ctx, cancel, opts, matchCh, missCh)
+ }()
+
+ log.Printf("INFO: generating work\n")
+ workGenerator(ctx, cancel, opts, sanCh)
+ time.Sleep(time.Second)
+
+ defer cancel()
+ for {
+ select {
+ case <-ctx.Done():
+ logger.Printf("DEBUG: context cancelled, exiting...\n")
+ return
+ case <-time.After(1 * time.Second):
+ }
+
+ numMessages := len(sanCh) + len(matchCh) + len(missCh)
+ if numMessages == 0 {
+ logger.Printf("Exit scheduled in %v from now", 2*opts.Timeout)
+ time.Sleep(2 * opts.Timeout)
+ return // TODO: fix a cleaner exit
+ }
+ logger.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages)
+ }
+}
+
+func await(ctx context.Context, cancel context.CancelFunc) {
+ sigs := make(chan os.Signal, 1)
+ defer close(sigs)
+
+ signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+ select {
+ case <-sigs:
+ cancel()
+ case <-ctx.Done():
+ }
+}
+
+func workHandler(ctx context.Context, opts options.Options, cli *http.Client,
+ sanCh chan string, matchCh chan match, missCh chan bool) {
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case san := <-sanCh:
+ func() {
+ cctx, cancel := context.WithTimeout(ctx, opts.Timeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(cctx, http.MethodGet, "https://"+san, nil)
+ if err != nil {
+ logger.Printf("DEBUG: %s: %v", san, err)
+ return
+ }
+
+ rsp, err := cli.Do(req)
+ if err != nil {
+ missCh <- false
+ return
+ }
+ defer rsp.Body.Close()
+
+ v, ok := onionloc.HTTP(rsp)
+ if ok {
+ matchCh <- match{dnsName: san, onionName: v, httpHeader: true}
+ return
+ }
+ v, ok = onionloc.HTML(rsp)
+ if ok {
+ matchCh <- match{dnsName: san, onionName: v, htmlAttr: true}
+ return
+ }
+ missCh <- true
+ }()
+ }
+ }
+}
+
+func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, matchCh chan match, missCh chan bool) {
+ ticker := time.NewTicker(opts.MetricsInterval)
+ defer ticker.Stop()
+
+ numConnected := 0
+ numOnionLocation := 0
+ numVisits := 0
+ output := func() {
+ logger.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation)
+ }
+ defer output()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case m := <-matchCh:
+ numVisits += 1
+ numConnected += 1
+ numOnionLocation += 1
+
+ log.Printf("INFO: found match %s (HTTP:%v HTML:%v)\n", m.dnsName, m.httpHeader, m.htmlAttr)
+ case connected := <-missCh:
+ numVisits += 1
+ if connected {
+ numConnected += 1
+ }
+ case <-ticker.C:
+ output()
+ }
+ }
+}
+
+func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, sanCh chan string) {
+ fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644)
+ if err != nil {
+ logger.Printf("ERROR: %v", err)
+ cancel()
+ return
+ }
+
+ defer fp.Close()
+ scanner := bufio.NewScanner(fp)
+ max := 1024 * 1024
+ buf := make([]byte, 0, max)
+ scanner.Buffer(buf, max)
+
+ for scanner.Scan() {
+ select {
+ case <-ctx.Done():
+ return
+ default:
+ }
+
+ for {
+ if len(sanCh) < cap(sanCh) {
+ sanCh <- scanner.Text()
+ break
+ }
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(time.Second):
+ continue
+ }
+ }
+ }
+}