aboutsummaryrefslogtreecommitdiff
path: root/scripts/test.sh
blob: 886d3f88bf204b8495ea62f3ef524ae6c944db3a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash

#
# A script that debugs how different onion-grab configurations perform with
# regards to successful connections, connection errors, found onions, etc.  To
# conduct these tests from multiple vantage points, Mullvad VPN is used.
#

###
# Options you will likely want to tune yourself
###
relays=(\
	se-sto-wg-001\
	us-nyc-wg-501\
	au-mel-wg-301\
)
limits=(\
	5\
	10\
)
num_workers=16
input_file=example.lst

###
# Other options
###
tmp_dir=$(mktemp -d)
out_dir=data/$(date +%Y%m%d-%H%M%S)
mullvad_wait_s=5
timeout_s=30

set -eu
function main() {
	num_sites=$(wc -l $input_file | cut -d' ' -f1)
	debug "relays=${#relays[@]} (${relays[@]})"
	debug "limits=${#limits[@]} (${limits[@]})"
	debug "num_workers=$num_workers"
	debug "input_file=$input_file ($num_sites sites)"
	debug "tmp_dir=$tmp_dir"
	debug "out_dir=$out_dir"
	debug "mullvad_wait_s=$mullvad_wait_s"
	debug "timeout_s=$timeout_s"

	num_test=$(( ${#limits[@]} * ${#relays[@]} ))
	debug "about to start $num_test tests"

	runtime_s=0
	wait_mullvad_s=$(( $num_test * $mullvad_wait_s ))
	wait_onion_grab_s=$(( $num_test * $timeout_s * 2 ))
	for limit in ${limits[@]}; do
		for relay in ${relays[@]}; do
			seconds=$(( $num_sites / $limit ))
			runtime_s=$(( $runtime_s + $seconds ))
		done
	done
	now=$(date +%s)
	estimated_done=$(( $now + $runtime_s + $wait_mullvad_s + $wait_onion_grab_s ))
	debug "estimated done? earliest $(date -d @$estimated_done +"%Y-%m-%d %H:%M:%S %Z")"

	i=1
	for limit in ${limits[@]}; do
		for relay in ${relays[@]}; do
			mullvad disconnect >/dev/null
			mullvad relay set hostname $relay >/dev/null
			mullvad connect >/dev/null
			sleep $mullvad_wait_s

			ip=$(curl -s https://am.i.mullvad.net)
			debug "working on test $i/$num_test at relay $relay ($ip) with limit $limit sites/s"

			shuf -o "$tmp_dir/input.lst" "$input_file"
			stdout_file=$tmp_dir/$relay-$limit.stdout
			stderr_file=$tmp_dir/$relay-$limit.stderr
			onion-grab -i "$tmp_dir/input.lst" -t "$timeout_s"s -l "$limit" -w "$num_workers" >"$stdout_file" 2>"$stderr_file"

			i=$(( $i + 1 ))
		done
	done

	mkdir -p "$out_dir"
	for limit in ${limits[@]}; do
		all_file=$tmp_dir/all.txt
		lim_file=$tmp_dir/lim.txt
		rm -f "$lim_file"

		for relay in ${relays[@]}; do
			stdout_file=$tmp_dir/$relay-$limit.stdout
			stderr_file=$tmp_dir/$relay-$limit.stderr
			cat "$stdout_file" >> "$all_file"
			cat "$stdout_file" >> "$lim_file"

			avg_rate=$(grep 'Average rate:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3)
			if [[ -z "$avg_rate" ]]; then
				avg_rate="n/a"
			else
				avg_rate="$avg_rate"/s
			fi

			num_onion=$(wc -l "$stdout_file" | cut -d' ' -f1)
			processed=$(grep 'Processed:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			success=$(grep 'Success:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure=$(grep 'Failure:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_req=$(grep 'Req:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_dns=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_tcp=$(grep 'TCP:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_tls=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_3xx=$(grep '3xx:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_ctx=$(grep 'CTX:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_qqq=$(grep '???:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
			failure_dns_detailed=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
			failure_tls_detailed=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
			info "relay:$relay limit:$limit/s rate:$avg_rate onions:$num_onion connected:$success/$processed"\
				"freq:$failure_req fdns:$failure_dns $failure_dns_detailed ftcp:$failure_tcp"\
				"ftls:$failure_tls $failure_tls_detailed f3xx:$failure_3xx fctx:$failure_ctx f???:$failure_qqq"

			output_file=$out_dir/$relay-l$limit.txt
			mv "$stdout_file" "$output_file"
		done

		num_sites=$(cut -d' ' -f1 "$lim_file" | sort -Vu | wc -l | cut -d' ' -f1)
		info "all relays with limit $limit/s: $num_sites unique sites set onion location header or attribute, digested results below"
		./digest.py -v info -i "$lim_file" -o "$out_dir/onion-l$limit.txt" -d "$out_dir/domain-l$limit.txt"
	done

	info "digested results for all tests below"
	./digest.py -v info -i "$all_file" -o "$out_dir/onion-all.txt" -d "$out_dir/domain-all.txt"

	info "stored output in $out_dir"
	rm -rf "$tmp_dir"
}

function debug() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [DEBU] $@" >&2; }
function info()  { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [INFO] $@" >&2; }

main $@