blob: 30c46d0a37220fb8b13e93506badda15d98dbd4a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
#!/bin/bash
#
# A script that debugs how different onion-grab configurations perform with
# regards to successful connections, connection errors, found onions, etc. To
# conduct these tests from multiple vantage points, Mullvad VPN is used.
#
###
# Options you will likely want to tune yourself
###
relays=(\
se-sto-wg-001\
us-nyc-wg-501\
au-mel-wg-301\
)
limits=(\
5\
10\
)
num_workers=16
input_file=example.lst
###
# Other options
###
tmp_dir=$(mktemp -d)
out_dir=data/$(date +%Y%m%d-%H%M%S)
mullvad_wait_s=5
timeout_s=30
set -eu
function main() {
num_sites=$(wc -l $input_file | cut -d' ' -f1)
debug "relays=${#relays[@]} (${relays[@]})"
debug "limits=${#limits[@]} (${limits[@]})"
debug "num_workers=$num_workers"
debug "input_file=$input_file ($num_sites sites)"
debug "tmp_dir=$tmp_dir"
debug "out_dir=$out_dir"
debug "mullvad_wait_s=$mullvad_wait_s"
debug "timeout_s=$timeout_s"
num_test=$(( ${#limits[@]} * ${#relays[@]} ))
debug "about to start $num_test tests"
runtime_s=0
wait_mullvad_s=$(( $num_test * $mullvad_wait_s ))
wait_onion_grab_s=$(( $num_test * $timeout_s * 2 ))
for limit in ${limits[@]}; do
for relay in ${relays[@]}; do
seconds=$(( $num_sites / $limit ))
runtime_s=$(( $runtime_s + $seconds ))
done
done
now=$(date +%s)
estimated_done=$(( $now + $runtime_s + $wait_mullvad_s + $wait_onion_grab_s ))
debug "estimated done? approximately $(date -d @$estimated_done +"%Y-%m-%d %H:%M:%S %Z")"
i=1
for limit in ${limits[@]}; do
for relay in ${relays[@]}; do
mullvad disconnect >/dev/null
mullvad relay set hostname $relay >/dev/null
mullvad connect >/dev/null
sleep $mullvad_wait_s
ip=$(curl -s https://ifconfig.me)
debug "working on test $i/$num_test at relay $relay ($ip) with limit $limit sites/s"
shuf -o "$tmp_dir/input.lst" "$input_file"
stdout_file=$tmp_dir/$relay-$limit.stdout
stderr_file=$tmp_dir/$relay-$limit.stderr
onion-grab -i "$tmp_dir/input.lst" -t "$timeout_s"s -l "$limit" -w "$num_workers" >"$stdout_file" 2>"$stderr_file"
i=$(( $i + 1 ))
done
done
mkdir -p "$out_dir"
for limit in ${limits[@]}; do
all_file=$tmp_dir/all.txt
lim_file=$tmp_dir/lim.txt
rm -f "$lim_file"
for relay in ${relays[@]}; do
stdout_file=$tmp_dir/$relay-$limit.stdout
stderr_file=$tmp_dir/$relay-$limit.stderr
cat "$stdout_file" >> "$all_file"
cat "$stdout_file" >> "$lim_file"
avg_rate=$(grep 'Average rate:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3)
if [[ -z "$avg_rate" ]]; then
avg_rate="n/a"
else
avg_rate="$avg_rate"/s
fi
num_onion=$(wc -l "$stdout_file" | cut -d' ' -f1)
processed=$(grep 'Processed:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
success=$(grep 'Success:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure=$(grep 'Failure:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_req=$(grep 'Req:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_dns=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_tcp=$(grep 'TCP:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_tls=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_3xx=$(grep '3xx:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_eof=$(grep 'EOF:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_ctx=$(grep 'CTX:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_qqq=$(grep '???:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f2)
failure_dns_detailed=$(grep 'DNS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
failure_tcp_detailed=$(grep 'TCP:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
failure_tls_detailed=$(grep 'TLS:' "$stderr_file" | tail -n1 | xargs | cut -d' ' -f3-)
info "relay:$relay limit:$limit/s rate:$avg_rate onions:$num_onion"\
"connected:$success/$processed freq:$failure_req"\
"fdns:$failure_dns $failure_dns_detailed"\
"ftcp:$failure_tcp $failure_tcp_detailed"\
"ftls:$failure_tls $failure_tls_detailed"\
"f3xx:$failure_3xx feof:$failure_eof fctx:$failure_ctx f???:$failure_qqq"
output_file=$out_dir/$relay-l$limit.txt
mv "$stdout_file" "$output_file"
done
num_sites=$(cut -d' ' -f1 "$lim_file" | sort -Vu | wc -l | cut -d' ' -f1)
info "all relays with limit $limit/s: $num_sites unique sites set onion location header or attribute, digested results below"
./digest.py -v info -i "$lim_file" -o "$out_dir/onion-l$limit.txt" -d "$out_dir/domain-l$limit.txt"
done
info "digested results for all tests below"
./digest.py -v info -i "$all_file" -o "$out_dir/onion-all.txt" -d "$out_dir/domain-all.txt"
info "stored output in $out_dir"
rm -rf "$tmp_dir"
}
function debug() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [DEBU] $@" >&2; }
function info() { echo "$(date +"%Y-%m-%d %H:%M:%S %Z") [INFO] $@" >&2; }
main $@
|