From cdfc220d5df547bc4fbe3157ddfa22c614eb737c Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Thu, 23 Mar 2023 19:33:21 +0100 Subject: Add onion location parsing --- go.mod | 5 + go.sum | 2 + internal/onionloc/onionloc.go | 74 ++++++++++ internal/onionloc/onionloc_test.go | 277 +++++++++++++++++++++++++++++++++++++ 4 files changed, 358 insertions(+) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/onionloc/onionloc.go create mode 100644 internal/onionloc/onionloc_test.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ea65ea5 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module git.cs.kau.se/rasmoste/find-onion + +go 1.19 + +require golang.org/x/net v0.8.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6c4f79e --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= diff --git a/internal/onionloc/onionloc.go b/internal/onionloc/onionloc.go new file mode 100644 index 0000000..63e512a --- /dev/null +++ b/internal/onionloc/onionloc.go @@ -0,0 +1,74 @@ +package onionloc + +import ( + "net/http" + "strings" + + "golang.org/x/net/html" +) + +const ( + HTTPHeaderName = "Onion-Location" +) + +func HTTP(rsp *http.Response) (string, bool) { + v, ok := rsp.Header[HTTPHeaderName] + if !ok { + return "", false + } + if len(v) != 1 { + return "", false + } + return v[0], true +} + +func HTML(rsp *http.Response) (string, bool) { + z := html.NewTokenizer(rsp.Body) + for { + tt := z.Next() + if tt == html.ErrorToken { + return "", false // EOF and other errors + } + + switch tt { + case html.StartTagToken, html.SelfClosingTagToken: + t := z.Token() + + // Looking for the html meta tag, see: + // https://www.w3schools.com/tags/tag_meta.asp + // + // We expect two attributes: "key" and "content" + if strings.ToLower(t.Data) != "meta" { + break + } + if len(t.Attr) != 2 { + break + } + + // We're looking for the "http-equiv" key, see: + // https://www.w3schools.com/tags/att_meta_http_equiv.asp + // + // In particular with the value "onion-location", see: + // https://community.torproject.org/onion-services/advanced/onion-location/ + // + // If we have all this and a following content + // attribute, that is the Onion-Location URL that this + // page advertises. We make no attempt to validate + // whether the content value is really an onion address. + attr := t.Attr[0] + if strings.ToLower(attr.Key) != "http-equiv" { + break + } + if strings.ToLower(attr.Val) != "onion-location" { + break + } + attr = t.Attr[1] + if strings.ToLower(attr.Key) != "content" { + break + } + + return attr.Val, true + default: + } + } +} diff --git a/internal/onionloc/onionloc_test.go b/internal/onionloc/onionloc_test.go new file mode 100644 index 0000000..4d20876 --- /dev/null +++ b/internal/onionloc/onionloc_test.go @@ -0,0 +1,277 @@ +package onionloc + +import ( + "io" + "net/http" + "strings" + "testing" +) + +func TestHTML(t *testing.T) { + for _, table := range []struct { + desc string + html string + want string + }{ + {"onion location set", htmlNoOnionLocation, ""}, + {"onion location unset", htmlOnionLocation, htmlOnionLocationValue}, + } { + var r http.Response + r.Body = io.NopCloser(strings.NewReader(table.html)) + + v, ok := HTML(&r) + if got, want := ok, table.want != ""; got != want { + t.Errorf("%s: got ok %v but wanted %v", table.desc, got, want) + continue + } + if !ok { + continue + } + if got, want := v, table.want; got != want { + t.Errorf("%s: got %s but wanted %s", table.desc, got, want) + } + } +} + +// curl -s https://www.rgdd.se/ +const htmlNoOnionLocation = ` + + + + + + rgdd.se + + + + + + + + + + + + + + + + + + +
+ +
+
+
+
+
avatar +
+ +

PhD student at Karlstad University. Software engineer at Glasklar Teknik. +I am passionate about transparency logs, anonymity networks, and Linux. I have +a keen interest for the overlap between research, engineering, and operations. +Off-work I cook, walk, socialize, lift weights, and spoil my cat.

+

Project involvement

+

I am a core member of the following projects:

+
    +
  • Sigsum: a free and open source software project that makes a signer’s +key-usage transparent. It can be used as a building block to secure the +supply chain and more.
  • +
  • System Transparency: an open source project that provides a security +architecture for bare metal servers. A system’s entire boot chain becomes +protected and auditable.
  • +
  • Tor: a 501(c)(3) US nonprofit that advance human rights and defend online +privacy through free software and open networks.
  • +
+

Selected publications

+ +

Contact

+

Feel free to reach out on email or other platforms as you see fit.

+ +
+ +
+ +` + +const htmlOnionLocationValue = "http://cyigahm4clwlimo6mfl4yjie5fwfbhlbuag57xo3kimk6invht6ffrad.onion/" + +// curl -s https://www.rgdd.se/ +// then add +// somewhere in the head portion +const htmlOnionLocation = ` + + + + + + rgdd.se + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+
+
avatar +
+ +

PhD student at Karlstad University. Software engineer at Glasklar Teknik. +I am passionate about transparency logs, anonymity networks, and Linux. I have +a keen interest for the overlap between research, engineering, and operations. +Off-work I cook, walk, socialize, lift weights, and spoil my cat.

+

Project involvement

+

I am a core member of the following projects:

+
    +
  • Sigsum: a free and open source software project that makes a signer’s +key-usage transparent. It can be used as a building block to secure the +supply chain and more.
  • +
  • System Transparency: an open source project that provides a security +architecture for bare metal servers. A system’s entire boot chain becomes +protected and auditable.
  • +
  • Tor: a 501(c)(3) US nonprofit that advance human rights and defend online +privacy through free software and open networks.
  • +
+

Selected publications

+ +

Contact

+

Feel free to reach out on email or other platforms as you see fit.

+ +
+ +
+ +` -- cgit v1.2.3