diff options
author | Rasmus Dahlberg <rasmus@rgdd.se> | 2024-08-22 19:42:20 +0200 |
---|---|---|
committer | Rasmus Dahlberg <rasmus@rgdd.se> | 2024-08-22 19:45:31 +0200 |
commit | 942e8d24d75eed2473576c22c5894a72ea1f9b30 (patch) | |
tree | e825c33fb25931126d2367a97ac7e67e35d207ec | |
parent | fc6866e229ba8b4278ed1dce79351b40481e6c2a (diff) |
Add another digest script
$ ./scripts/digest2.py -i /home/rgdd/Downloads/2023-04-03-ct-sans/au-mel/*.stdout /home/rgdd/Downloads/2023-04-03-ct-sans/us-nyc/*.stdout /home/rgdd/Downloads/2023-04-03-ct-sans/de-fra/*.stdout 2>&1
digest2.py:26 INFO: found 3330 onions via Onion-Location
digest2.py:27 INFO: found 3077 via HTTP headers
digest2.py:28 INFO: found 281 via HTML tags
digest2.py:29 INFO: found 28 via both HTTP and HTML
-rwxr-xr-x | scripts/digest2.py | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/scripts/digest2.py b/scripts/digest2.py new file mode 100755 index 0000000..d01293b --- /dev/null +++ b/scripts/digest2.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 + +__program_description =''' +A script that digests the output of onion-grab. Meant to be used for sorting +out the number of onion addresses and how they were discovered via O-L. It +is digest "2" because this was added after discovering a redirect bug. So, +this output gives a better view of how common HTTP and HTML config really is. +''' + +import sys +import argparse +import logging + +log = logging.getLogger(__name__) + +import base64 +import hashlib + +def main(args): + input_lines = [] + for inputFile in args.input_file: + with open(inputFile) as fp: + input_lines += [ line for line in fp ] + + numOnion, numHTTP, numHTML = parse_input(input_lines) + log.info(f'found {numOnion} onions via Onion-Location') + log.info(f'found {numHTTP} via HTTP headers') + log.info(f'found {numHTML} via HTML tags') + log.info(f'found {numHTTP + numHTML - numOnion} via both HTTP and HTML') + +def parse_input(lines): + onion2method = {} + for line in lines: + try: + line = line[:len(line)-1] + for result in parse_line(line): + addr, isHTTP = result + addr = trim_onion(trimPath(trimScheme(addr))) + onion2method.setdefault(addr, {}) + if isHTTP: + onion2method[addr]["http"] = True + else: + onion2method[addr]["html"] = True + except Exception as e: + log.debug(f'"{line}": {e}') + + numOnion = len(onion2method) + numHTTP = 0 + numHTML = 0 + for onion in onion2method: + d = onion2method[onion] + if "http" in d: + numHTTP += 1 + if "html" in d: + numHTML += 1 + return numOnion, numHTTP, numHTML + +def parse_line(line): + ''' + Line format is: + + <domain> http=[value] html=[value] + + where at least one of http or html should have a value. Note: there has + been no vetting of what <value> is. Outputs domain and a list of values, + and bolean values indicating if the domain used an HTTP and/or HTML config. + ''' + ret = [] + + s = line.split(" ") + if len(s) != 3: + raise Exception(f'invalid line split') + + domain = s[0] + http2onion = s[1] + html2onion = s[2] + + s = http2onion.split("=") + if len(s) < 2: + raise Exception(f'invalid http split') + if len(s[1]) > 0: + ret += [ (s[1], True) ] + + s = html2onion.split("=") + if len(s) < 2: + raise Exception(f'invalid html split') + if len(s[1]) > 0: + ret += [ (s[1], False) ] + + return ret + +def trimScheme(url): + ''' + Removes required http:// or https:// scheme from url. + ''' + for scheme in [ "http://", "https://" ]: + if url.startswith(scheme): + return url[len(scheme):] + + raise Exception(f'no http or https scheme') + +def trimPath(url): + ''' + Trims the path off from the url. + ''' + return url.split("/")[0] + +def trim_onion(host): + ''' + Parses host as a v3 onion address, ports and subdomains are trimmed. + ''' + s = host.split(":") + if len(s) > 2: + raise Exception(f'invalid host name') + if len(s) == 2: + port = int(s[1]) + if port < 1 or port > 2**16 - 1: + raise Exception(f'port number not in [1, {2**16 - 1}]') + + domain = s[0] + s = domain.split(".") + if len(s) < 2: + raise Exception(f'too few labels to be an onion address') + if s[len(s)-1] != "onion": + raise Exception(f'the final DNS label must be "onion"') + if len(s[len(s)-2]) != 56: + raise Exception(f'the DNS label before ".onion" must be 56 bytes') + + assert_v3(base64.b32decode(s[len(s)-2].upper().encode('UTF-8'))) + return ".".join(s[len(s)-2:]) + +def assert_v3(blob): + ''' + https://gitweb.torproject.org/torspec.git/tree/rend-spec-v3.txt#n2240 + ''' + pubkey = blob[:32] + checksum = blob[32:34] + version = blob[34:35] + if version[0] != 3: + raise Exception(f'invalid version: {version[0]}') + + h = hashlib.sha3_256() + h.update(b'.onion checksum') + h.update(pubkey) + h.update(version) + c = h.digest() + if checksum[0] != c[0] or checksum[1] != c[1]: + raise Exception(f'invalid checksum') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__program_description) + parser.add_argument("-v", "--verbosity", type=str, default="info", + help="logging verbosity, select from debug, info, warning, error, and critical") + parser.add_argument('-i','--input-file', nargs='+', required=True, + help='input file with collected data') + + args = parser.parse_args() + logging.basicConfig(level=logging.__dict__[args.verbosity.upper()], + format='%(filename)s:%(lineno)d %(levelname)s: %(message)s') + + sys.exit(main(args)) |