xref: /aosp_15_r20/external/rappor/bin/test.sh (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1#!/bin/bash
2usage() {
3echo "
4
5 Simple smoke test for the decode-dist tool.  This will fail if your machine
6 doesn't have the right R libraries.
7
8 Usage:
9   ./test.sh <function name>
10
11 Example:
12   ./test.sh decode-assoc-R-smoke       # test pure R implementation
13   ./test.sh decode-assoc-cpp-smoke     # test with analysis/cpp/fast_em.cc
14   ./test.sh decode-assoc-cpp-converge  # run for longer with C++
15   ./test.sh decode-assoc-tensorflow
16"
17}
18
19set -o nounset
20set -o pipefail
21set -o errexit
22
23readonly THIS_DIR=$(dirname $0)
24readonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd)
25readonly EM_CPP_EXECUTABLE=$RAPPOR_SRC/analysis/cpp/_tmp/fast_em
26
27source $RAPPOR_SRC/util.sh
28
29readonly ASSOC_TESTDATA_DIR=_tmp/decode-assoc-test
30readonly DIST_TESTDATA_DIR=_tmp/decode-dist-test
31
32# Clear the R cache for the map files.
33clear-cached-files() {
34  local dir=$1
35  find $dir -name '*.rda' | xargs --no-run-if-empty -- rm --verbose
36}
37
38write-dist-testdata() {
39  local input_dir=$DIST_TESTDATA_DIR/input
40
41  mkdir -p $input_dir
42
43  clear-cached-files $DIST_TESTDATA_DIR
44
45  # Right now, we copy a case from regtest.sh.  (./demo.sh quick-python creates
46  # just this case)
47  local case_dir=$RAPPOR_SRC/_tmp/python/demo3
48
49  cp --verbose $case_dir/1/case_counts.csv $input_dir/counts.csv
50  cp --verbose $case_dir/case_map.csv $input_dir/map.csv
51  cp --verbose $case_dir/case_params.csv $input_dir/params.csv
52}
53
54decode-dist() {
55  write-dist-testdata
56
57  local output_dir=$DIST_TESTDATA_DIR
58
59  local input_dir=$DIST_TESTDATA_DIR/input
60
61  # Uses the ./demo.sh regtest files
62  time $RAPPOR_SRC/bin/decode-dist \
63    --counts $input_dir/counts.csv \
64    --map $input_dir/map.csv \
65    --params $input_dir/params.csv \
66    --output-dir $output_dir
67
68  echo
69  head $output_dir/results.csv
70  echo
71  cat $output_dir/metrics.json
72}
73
74write-assoc-testdata() {
75  # 'build' has intermediate build files, 'input' is the final input to the
76  # decode-assoc tool.
77  local build_dir=$ASSOC_TESTDATA_DIR/build
78  local input_dir=$ASSOC_TESTDATA_DIR/input
79
80  mkdir -p $build_dir $input_dir
81
82  clear-cached-files $ASSOC_TESTDATA_DIR
83
84  cat >$build_dir/true_values.csv <<EOF 
85domain,flag..HTTPS
86google.com,1
87google.com,1
88google.com,1
89google.com,1
90google.com,0
91yahoo.com,1
92yahoo.com,0
93bing.com,1
94bing.com,1
95bing.com,0
96EOF
97
98  local num_bits=8
99  local num_hashes=1
100  local num_cohorts=128
101
102  local prob_p=0.25
103  local prob_q=0.75
104  local prob_f=0.5
105
106  # 10 items in the input. 50,000 items is enough to eyeball accuracy of
107  # results.
108  local assoc_testdata_count=5000
109
110  PYTHONPATH=$RAPPOR_SRC/client/python \
111    $RAPPOR_SRC/tests/rappor_sim.py \
112    --assoc-testdata $assoc_testdata_count \
113    --num-bits $num_bits \
114    --num-hashes $num_hashes \
115    --num-cohorts $num_cohorts \
116    -p $prob_p \
117    -q $prob_q \
118    -f $prob_f \
119    < $build_dir/true_values.csv \
120    > $input_dir/reports.csv
121
122  # Output two bad rows: each row is missing one of the columns.
123  cat >$build_dir/bad_rows.txt <<EOF
124c0,0,10101010,
125c0,0,,0
126EOF
127
128  # Make CSV file with the header
129  cat - $build_dir/bad_rows.txt > $input_dir/bad_rows.csv <<EOF
130client,cohort,domain,flag..HTTPS
131EOF
132
133  # Make reports file with bad rows
134  cat $input_dir/reports.csv $build_dir/bad_rows.txt > $input_dir/reports_bad_rows.csv
135
136  # Define a string variable and a boolean varaible.
137  cat >$input_dir/rappor-vars.csv <<EOF 
138metric, var, var_type, params
139m,domain,string,m_params
140m,flag..HTTPS,boolean,m_params
141EOF
142
143  cat >$input_dir/m_params.csv <<EOF
144k,h,m,p,q,f
145$num_bits,$num_hashes,$num_cohorts,$prob_p,$prob_q,$prob_f
146EOF
147
148  # Add a string with a double quote to test quoting behavior
149  cat >$build_dir/domain_candidates.csv <<EOF
150google.com
151yahoo.com
152bing.com
153q"q
154EOF
155
156  # Hash candidates to create map.
157  $RAPPOR_SRC/bin/hash-candidates $input_dir/m_params.csv \
158    < $build_dir/domain_candidates.csv \
159    > $input_dir/domain_map.csv
160
161  banner "Wrote testdata in $input_dir (intermediate files in $build_dir)"
162}
163
164# Helper function to run decode-assoc with testdata.
165decode-assoc-helper() {
166  write-assoc-testdata
167
168  local output_dir=$1
169  shift
170
171  local build_dir=$ASSOC_TESTDATA_DIR/build
172  local input_dir=$ASSOC_TESTDATA_DIR/input
173
174  time $RAPPOR_SRC/bin/decode-assoc \
175    --metric-name m \
176    --schema $input_dir/rappor-vars.csv \
177    --reports $input_dir/reports.csv \
178    --params-dir $input_dir \
179    --var1 domain \
180    --var2 flag..HTTPS \
181    --map1 $input_dir/domain_map.csv \
182    --create-bool-map \
183    --max-em-iters 10 \
184    --num-cores 2 \
185    --output-dir $output_dir \
186    --tmp-dir $output_dir \
187    "$@"
188
189  head $output_dir/assoc-*
190
191  # Print true values for comparison
192  echo
193  echo "$build_dir/true_values.csv:"
194  cat "$build_dir/true_values.csv"
195}
196
197# Quick smoke test for R version.
198decode-assoc-R-smoke() {
199  local output_dir=_tmp/R
200  mkdir -p $output_dir
201  decode-assoc-helper $output_dir
202}
203
204# Test what happens when there are bad rows.
205decode-assoc-bad-rows() {
206  local output_dir=_tmp/bad
207  mkdir -p $output_dir
208
209  # Later flags override earlier ones
210
211  # Reports + bad rows
212  decode-assoc-helper $output_dir \
213    --reports _tmp/reports_bad_rows.csv \
214    --remove-bad-rows \
215    "$@"
216
217  # ONLY bad rows
218  decode-assoc-helper $output_dir \
219    --reports _tmp/bad_rows.csv \
220    --remove-bad-rows \
221    "$@"
222}
223
224build-em-executable() {
225  pushd $RAPPOR_SRC/analysis/cpp >/dev/null
226  ./run.sh build-fast-em
227  popd >/dev/null
228}
229
230decode-assoc-cpp-smoke() {
231  local output_dir=_tmp/cpp
232  mkdir -p $output_dir
233
234  build-em-executable
235
236  decode-assoc-helper $output_dir \
237    --em-executable "$EM_CPP_EXECUTABLE" "$@"
238}
239
240decode-assoc-cpp-converge() {
241  # With the data we have, this converges and exits before 1000 iterations.
242  decode-assoc-cpp-smoke --max-em-iters 1000
243}
244
245decode-assoc-tensorflow() {
246  local output_dir=_tmp/tensorflow
247  mkdir -p $output_dir
248
249  decode-assoc-helper $output_dir \
250    --em-executable $RAPPOR_SRC/analysis/tensorflow/fast_em.sh "$@"
251}
252
253decode-assoc-tensorflow-converge() {
254  decode-assoc-tensorflow --max-em-iters 1000
255}
256
257if test $# -eq 0 ; then
258  usage
259else
260  "$@"
261fi
262