#!/bin/bash # # A script that debugs how different onion-grab configurations perform with # regards to successful connections, connection errors, found onions, etc. To # conduct these tests from multiple vantage points, Mullvad VPN is used. # ### # Options you will likely want to tune yourself ### relays=(\ se-sto-wg-001\ us-nyc-wg-501\ au-mel-wg-301\ ) limits=(\ 5\ 10\ ) num_workers=16 input_file=example.lst ### # Other options ### tmp_dir=$(mktemp -d) out_dir=data/$(date +%Y%m%d-%H%M%S) mullvad_wait_s=5 timeout_s=30 set -eu function main() { num_sites=$(wc -l $input_file | cut -d' ' -f1) debug "relays=${#relays[@]} (${relays[@]})" debug "limits=${#limits[@]} (${limits[@]})" debug "num_workers=$num_workers" debug "input_file=$input_file ($num_sites sites)" debug "tmp_dir=$tmp_dir" debug "out_dir=$out_dir" debug "mullvad_wait_s=$mullvad_wait_s" debug "timeout_s=$timeout_s" num_test=$(( ${#limits[@]} * ${#relays[@]} )) debug "about to start $num_test tests" runtime_s=0 wait_mullvad_s=$(( $num_test * $mullvad_wait_s )) wait_onion_grab_s=$(( $num_test * $timeout_s * 2 )) for limit in ${limits[@]}; do for relay in ${relays[@]}; do seconds=$(( $num_sites / $limit )) runtime_s=$(( $runtime_s + $seconds )) done done now=$(date +%s) estimated_done=$(( $now + $runtime_s + $wait_mullvad_s + $wait_onion_grab_s )) debug "estimated done? earliest $(date -d @$estimated_done +"%Y-%m-%d %H:%M:%S %Z")" i=1 for limit in ${limits[@]}; do for relay in ${relays[@]}; do mullvad disconnect >/dev/null mullvad relay set hostname $relay >/dev/null mullvad connect >/dev/null sleep $mullvad_wait_s ip=$(curl -s https://am.i.mullvad.net) debug "working on test $i/$num_test at relay $relay ($ip) with limit $limit sites/s" shuf -o "$tmp_dir/input.lst" "$input_file" stdout_file=$tmp_dir/$relay-$limit.stdout stderr_file=$tmp_dir/$relay-$limit.stderr onion-grab -i "$tmp_dir/input.lst" -t "$timeout_s"s -l "$limit" -w "$num_workers" >"$stdout_file" 2>"$stderr_file" i=$(( $i + 1 )) done done mkdir -p "$out_dir" for limit in ${limits[@]}; do all_file=$tmp_dir/dup.txt rm -f "$all_file" for relay in ${relays[@]}; do stdout_file=$tmp_dir/$relay-$limit.stdout stderr_file=$tmp_dir/$relay-$limit.stderr cat "$stdout_file" >> "$all_file" avg_rate=$(grep 'Average rate:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3)s if [[ -z "$avg_rate" ]]; then avg_rate="n/a" fi num_onion=$(wc -l "$stdout_file" | cut -d' ' -f1) processed=$(grep 'Processed:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2) success=$(grep 'Success:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2) failure=$(grep 'Failure:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2) failure_dns=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2) failure_dns_detailed=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-) info "relay:$relay limit:$limit/s avg-rate:$avg_rate onions:$num_onion connected:$success/$processed dns-error:$failure_dns/$failure $failure_dns_detailed" output_file=$out_dir/$relay-l$limit.txt mv "$stdout_file" "$output_file" done num_sites=$(cut -d' ' -f1 "$all_file" | sort -Vu | wc -l | cut -d' ' -f1) info "all relays with limit $limit/s: $num_sites unique sites set onion location header or attribute, digested results below" ./digest.py -v info -i "$all_file" -o "$out_dir/onion-l$limit.txt" -d "$out_dir/domain-l$limit.txt" done info "stored output in $out_dir" rm -rf "$tmp_dir" } function debug() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [DEBU] $@" >&2; } function info() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [INFO] $@" >&2; } main $@