aboutsummaryrefslogtreecommitdiff
#!/bin/bash

#
# A script that debugs how different onion-grab configurations perform with
# regards to successful connections, connection errors, found onions, etc.  To
# conduct these tests from multiple vantage points, Mullvad VPN is used.
#

###
# Options you will likely want to tune yourself
###
relays=(\
	se-sto-wg-001\
	us-nyc-wg-501\
	au-mel-wg-301\
)
limits=(\
	5\
	10\
)
num_workers=16
input_file=example.lst
response_max_mib=8
metrics_interval=10s

###
# Other options
###
tmp_dir=$(mktemp -d)
out_dir=data/$(date +%Y%m%d-%H%M%S)
mullvad_wait_s=5
timeout_s=30

set -eu
function main() {
	num_sites=$(wc -l $input_file | cut -d' ' -f1)
	debug "relays=${#relays[@]} (${relays[@]})"
	debug "limits=${#limits[@]} (${limits[@]})"
	debug "num_workers=$num_workers"
	debug "input_file=$input_file ($num_sites sites)"
	debug "response_max_mib=$response_max_mib"
	debug "metrics_interval=$metrics_interval"
	debug "tmp_dir=$tmp_dir"
	debug "out_dir=$out_dir"
	debug "mullvad_wait_s=$mullvad_wait_s"
	debug "timeout_s=$timeout_s"

	num_test=$(( ${#limits[@]} * ${#relays[@]} ))
	debug "about to start $num_test tests"

	runtime_s=0
	wait_mullvad_s=$(( $num_test * $mullvad_wait_s ))
	wait_onion_grab_s=$(( $num_test * $timeout_s * 2 ))
	for limit in ${limits[@]}; do
		for relay in ${relays[@]}; do
			seconds=$(( $num_sites / $limit ))
			runtime_s=$(( $runtime_s + $seconds ))
		done
	done
	now=$(date +%s)
	estimated_done=$(( $now + $runtime_s + $wait_mullvad_s + $wait_onion_grab_s ))
	debug "estimated done? approximately $(date -d @$estimated_done +"%Y-%m-%d %H:%M:%S %Z")"

	i=1
	for limit in ${limits[@]}; do
		for relay in ${relays[@]}; do
			mullvad disconnect >/dev/null
			mullvad relay set hostname $relay >/dev/null
			mullvad connect >/dev/null
			sleep $mullvad_wait_s

			ip=$(curl -s https://ifconfig.me)
			debug "working on test $i/$num_test at relay $relay ($ip) with limit $limit sites/s"

			shuf -o "$tmp_dir/input.lst" "$input_file"
			stdout_file=$tmp_dir/$relay-$limit.stdout
			stderr_file=$tmp_dir/$relay-$limit.stderr
			onion-grab -i "$tmp_dir/input.lst" -t "$timeout_s"s -l "$limit" -w "$num_workers"\
				-r "$response_max_mib" -m "$metrics_interval" >"$stdout_file" 2>"$stderr_file"

			i=$(( $i + 1 ))
		done
	done

	mkdir -p "$out_dir"
	for limit in ${limits[@]}; do
		all_file=$tmp_dir/all.txt
		lim_file=$tmp_dir/lim.txt
		rm -f "$lim_file"

		for relay in ${relays[@]}; do
			stdout_file=$tmp_dir/$relay-$limit.stdout
			stderr_file=$tmp_dir/$relay-$limit.stderr
			cat "$stdout_file" >> "$all_file"
			cat "$stdout_file" >> "$lim_file"

			avg_rate=$(grep 'Average rate:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3)
			if [[ -z "$avg_rate" ]]; then
				avg_rate="n/a"
			else
				avg_rate="$avg_rate"/s
			fi

			num_onion=$(wc -l "$stdout_file" | cut -d' ' -f1)
			processed=$(grep 'Processed:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			success=$(grep 'Success:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure=$(grep 'Failure:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_req=$(grep 'Req:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_dns=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_tcp=$(grep 'TCP:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_tls=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_3xx=$(grep '3xx:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_eof=$(grep 'EOF:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_ctx=$(grep 'CTX:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_qqq=$(grep '???:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_dns_detailed=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
			failure_tcp_detailed=$(grep 'TCP:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
			failure_tls_detailed=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
			info "relay:$relay limit:$limit/s rate:$avg_rate onions:$num_onion"\
				"connected:$success/$processed freq:$failure_req"\
				"fdns:$failure_dns $failure_dns_detailed"\
				"ftcp:$failure_tcp $failure_tcp_detailed"\
				"ftls:$failure_tls $failure_tls_detailed"\
				"f3xx:$failure_3xx feof:$failure_eof fctx:$failure_ctx f???:$failure_qqq"

			output_file=$out_dir/$relay-l$limit.txt
			mv "$stdout_file" "$output_file"
		done

		num_sites=$(cut -d' ' -f1 "$lim_file" | sort -Vu | wc -l | cut -d' ' -f1)
		info "all relays with limit $limit/s: $num_sites unique sites set onion location header or attribute, digested results below"
		./digest.py -v info -i "$lim_file" -o "$out_dir/onion-l$limit.txt" -d "$out_dir/domain-l$limit.txt"
	done

	info "digested results for all tests below"
	./digest.py -v info -i "$all_file" -o "$out_dir/onion-all.txt" -d "$out_dir/domain-all.txt"

	info "stored output in $out_dir"
	rm -rf "$tmp_dir"
}

function debug() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [DEBU] $@" >&2; }
function info()  { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [INFO] $@" >&2; }

main $@