aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRasmus Dahlberg <rasmus@rgdd.se>2023-03-25 14:11:29 +0100
committerRasmus Dahlberg <rasmus@rgdd.se>2023-03-25 14:13:23 +0100
commitaf5be4fbd0c3824478b9cfc261b7a34e98a36e0f (patch)
treea26d8d2c226827f0ccc7db54be3a70f99f420c59
parent388caec0b6ed6b9cccb15de7ee2f093361772764 (diff)
Clean-up skeleton
-rw-r--r--go.mod2
-rw-r--r--internal/options/options.go24
-rw-r--r--internal/qna/qna.go6
-rw-r--r--lists/small.lst4
-rw-r--r--main.go176
5 files changed, 119 insertions, 93 deletions
diff --git a/go.mod b/go.mod
index ea65ea5..8db4fb6 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module git.cs.kau.se/rasmoste/find-onion
+module git.cs.kau.se/rasmoste/onion-grab
go 1.19
diff --git a/internal/options/options.go b/internal/options/options.go
index 8e10686..c988713 100644
--- a/internal/options/options.go
+++ b/internal/options/options.go
@@ -6,19 +6,31 @@ import (
)
type Options struct {
- InputFile string
- NumWorkers int
+ // Input file
+ InputFile string
+ MaxFileBuffer int
+ NextLine int64
+
+ // Website visits
+ NumWorkers int
+ Timeout time.Duration
+ MaxResponse int64
+
+ // Health and metrics
MetricsInterval time.Duration
- Timeout time.Duration
- MaxResponse int64
}
func Parse() (opts Options) {
flag.StringVar(&opts.InputFile, "i", "lists/small.lst", "input file, one domain name per line")
- flag.IntVar(&opts.NumWorkers, "w", 10, "number of parallel workers")
+ flag.IntVar(&opts.MaxFileBuffer, "b", 512, "max bytes to read from input file at once in MiB")
+ flag.Int64Var(&opts.NextLine, "n", 0, "next line to start reading the input file from")
+
+ flag.IntVar(&opts.NumWorkers, "w", 2, "number of parallel workers")
flag.DurationVar(&opts.Timeout, "t", 10*time.Second, "timeout for each website visit")
+ flag.Int64Var(&opts.MaxResponse, "r", 128, "max response body size to accept in MiB")
+
flag.DurationVar(&opts.MetricsInterval, "m", 5*time.Second, "how often to emit metrics")
- flag.Int64Var(&opts.MaxResponse, "r", 128*1024*1024, "max response body size to accept")
+
flag.Parse()
return
}
diff --git a/internal/qna/qna.go b/internal/qna/qna.go
index 5336811..bd2078d 100644
--- a/internal/qna/qna.go
+++ b/internal/qna/qna.go
@@ -7,8 +7,6 @@ type Question struct {
type Answer struct {
Domain string // domain name of the visited HTTPS site
OK bool // true if HTTP GET request succeeded
-
- HTTP bool // true if onion location was found via HTTP header
- HTML bool // true if onion location was found via HTML attribute
- Onion string // the site's onion location URL (if any)
+ HTTP string // value set in the Onion-Location HTTP header (if any)
+ HTML string // value set in the Onion-Location HTML attribute (if any)
}
diff --git a/lists/small.lst b/lists/small.lst
index f161918..ea1b9f7 100644
--- a/lists/small.lst
+++ b/lists/small.lst
@@ -1,5 +1,5 @@
-blog.torproject.org
www.eff.org
www.kau.se
-www.torproject.org
www.nytimes.com
+www.torproject.org
+www.qubes-os.org
diff --git a/main.go b/main.go
index 0efac30..f04ea17 100644
--- a/main.go
+++ b/main.go
@@ -1,3 +1,13 @@
+// Package main provides onion-grab, a tool that visits a list of domains
+// concurrently over HTTPS to see if they have Onion-Location configured.
+//
+// Install:
+//
+// $ go install git.cs.kau.se/rasmoste/onion-grab@latest
+//
+// Usage:
+//
+// $ onion-grab -h
package main
import (
@@ -12,9 +22,9 @@ import (
"syscall"
"time"
- "git.cs.kau.se/rasmoste/find-onion/internal/onionloc"
- "git.cs.kau.se/rasmoste/find-onion/internal/options"
- "git.cs.kau.se/rasmoste/find-onion/internal/qna"
+ "git.cs.kau.se/rasmoste/onion-grab/internal/onionloc"
+ "git.cs.kau.se/rasmoste/onion-grab/internal/options"
+ "git.cs.kau.se/rasmoste/onion-grab/internal/qna"
)
func main() {
@@ -22,19 +32,28 @@ func main() {
cli := &http.Client{
Transport: &http.Transport{
DisableKeepAlives: true,
- MaxResponseHeaderBytes: opts.MaxResponse,
+ MaxResponseHeaderBytes: opts.MaxResponse * 1024 * 1024,
},
}
- questionCh := make(chan qna.Question, 2*opts.NumWorkers)
+ fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644)
+ if err != nil {
+ log.Printf("ERROR: %v", err)
+ os.Exit(1)
+ }
+ defer fp.Close()
+
+ questionCh := make(chan qna.Question)
defer close(questionCh)
- answerCh := make(chan qna.Answer, 2*opts.NumWorkers)
+ answerCh := make(chan qna.Answer)
defer close(answerCh)
var wg sync.WaitGroup
defer wg.Wait()
+
ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
log.Printf("INFO: starting await handler, ctrl+C to exit\n")
go func() {
@@ -56,29 +75,17 @@ func main() {
go func() {
wg.Add(1)
defer wg.Done()
- workAggregator(ctx, cancel, opts, answerCh)
+ workAggregator(ctx, opts, answerCh)
}()
log.Printf("INFO: generating work\n")
- workGenerator(ctx, cancel, opts, questionCh)
- time.Sleep(time.Second)
-
- defer cancel()
- defer time.Sleep(2 * opts.Timeout)
- defer log.Printf("INFO: about to exit in %v", 2*opts.Timeout)
- for {
- select {
- case <-ctx.Done():
- log.Printf("INFO: context cancelled")
- return
- case <-time.After(time.Second):
+ nextLine, readAll := workGenerator(ctx, opts, fp, questionCh)
+ if !readAll {
+ warn := fmt.Sprintf("only read up until line %d", nextLine)
+ if opts.NextLine != 0 {
+ warn += fmt.Sprintf(" (line %d relative to start)", nextLine-opts.NextLine)
}
-
- numMessages := len(questionCh) + len(answerCh)
- if numMessages == 0 {
- return
- }
- log.Printf("DEBUG: waiting for %d messages to be processed before exit", numMessages)
+ log.Printf("WARNING: %s\n", warn)
}
}
@@ -89,9 +96,9 @@ func await(ctx context.Context, cancel context.CancelFunc) {
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
select {
case <-sigs:
- cancel()
case <-ctx.Done():
}
+ cancel()
}
func workHandler(ctx context.Context, opts options.Options, cli *http.Client, questionCh chan qna.Question, answerCh chan qna.Answer) {
@@ -109,96 +116,105 @@ func workHandler(ctx context.Context, opts options.Options, cli *http.Client, qu
return
}
+ answer := qna.Answer{Domain: question.Domain}
rsp, err := cli.Do(req)
if err != nil {
- answerCh <- qna.Answer{question.Domain, false, false, false, ""}
+ answerCh <- answer
return
}
defer rsp.Body.Close()
+ answer.OK = true
- v, ok := onionloc.HTTP(rsp)
+ onion, ok := onionloc.HTTP(rsp)
if ok {
- answerCh <- qna.Answer{question.Domain, true, true, false, v}
- return
+ answer.HTTP = onion
}
- v, ok = onionloc.HTML(rsp)
+ onion, ok = onionloc.HTML(rsp)
if ok {
- answerCh <- qna.Answer{question.Domain, true, false, true, v}
- return
+ answer.HTML = onion
}
- answerCh <- qna.Answer{question.Domain, true, false, false, ""}
+ answerCh <- answer
}()
}
}
}
-func workAggregator(ctx context.Context, _ context.CancelFunc, opts options.Options, answerCh chan qna.Answer) {
- ticker := time.NewTicker(opts.MetricsInterval)
- defer ticker.Stop()
-
- numConnected := 0
- numOnionLocation := 0
- numVisits := 0
+func workAggregator(ctx context.Context, opts options.Options, answerCh chan qna.Answer) {
+ numConnect := 0
+ numOnions := 0
+ numAll := 0
output := func() {
- log.Printf("INFO: %d/%d connected, %d matched\n", numConnected, numVisits, numOnionLocation)
+ log.Printf("INFO: %d/%d connected, %d sites configured Onion-Location\n", numConnect, numAll, numOnions)
+ }
+
+ handleAnswer := func(a qna.Answer) {
+ numAll += 1
+ if !a.OK {
+ return
+ }
+
+ numConnect += 1
+ if a.HTTP != "" || a.HTML != "" {
+ numOnions += 1
+ fmt.Printf("%s header=%s attribute=%s\n", a.Domain, a.HTTP, a.HTML)
+ }
}
- defer output()
+ ticker := time.NewTicker(opts.MetricsInterval)
+ defer ticker.Stop()
+ defer output()
for {
select {
case <-ctx.Done():
- return
- case a := <-answerCh:
- numVisits += 1
- if !a.OK {
- continue
- }
-
- numConnected += 1
- if a.HTTP || a.HTML {
- numOnionLocation += 1
- fmt.Printf("http:%v html:%v domain:%s onion:%s \n", a.HTTP, a.HTML, a.Domain, a.Onion)
+ log.Printf("INFO: about to exit, reading remaining answers\n")
+ for {
+ select {
+ case a := <-answerCh:
+ handleAnswer(a)
+ case <-time.After(opts.Timeout):
+ return
+ }
}
+ case a := <-answerCh:
+ handleAnswer(a)
case <-ticker.C:
output()
}
}
}
-func workGenerator(ctx context.Context, cancel context.CancelFunc, opts options.Options, questionCh chan qna.Question) {
- fp, err := os.OpenFile(opts.InputFile, os.O_RDONLY, 0644)
- if err != nil {
- log.Printf("ERROR: %v", err)
- cancel()
- return
- }
-
- defer fp.Close()
+func workGenerator(ctx context.Context, opts options.Options, fp *os.File, questionCh chan qna.Question) (int64, bool) {
scanner := bufio.NewScanner(fp)
- max := 2 * 256 * opts.NumWorkers
- buf := make([]byte, 0, max)
- scanner.Buffer(buf, max)
+ buf := make([]byte, 0, opts.MaxFileBuffer*1024*1024)
+ scanner.Buffer(buf, opts.MaxFileBuffer*1024*1024)
- // TODO: track which line we would have to start from to be sure that
- // we're not missing any domains on ctrl+C, OK if we go back too much?
+ nextLine := int64(0)
for scanner.Scan() {
select {
case <-ctx.Done():
- return
+ return nextLine, false
default:
}
- for {
- if len(questionCh) < cap(questionCh) {
- questionCh <- qna.Question{Domain: scanner.Text()}
- break
- }
- select {
- case <-ctx.Done():
- return
- case <-time.After(time.Second):
- continue
- }
+ if nextLine == opts.NextLine {
+ break
+ }
+ scanner.Text()
+ nextLine++
+ }
+
+ for scanner.Scan() {
+ select {
+ case <-ctx.Done():
+ return nextLine, false
+ case questionCh <- qna.Question{Domain: scanner.Text()}:
+ nextLine++
}
}
+
+ select {
+ case <-ctx.Done():
+ case <-time.After(opts.Timeout):
+ }
+ return nextLine, true
}