xref: /aosp_15_r20/external/rappor/bin/test.sh (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/bin/bash
2*2abb3134SXin Liusage() {
3*2abb3134SXin Liecho "
4*2abb3134SXin Li
5*2abb3134SXin Li Simple smoke test for the decode-dist tool.  This will fail if your machine
6*2abb3134SXin Li doesn't have the right R libraries.
7*2abb3134SXin Li
8*2abb3134SXin Li Usage:
9*2abb3134SXin Li   ./test.sh <function name>
10*2abb3134SXin Li
11*2abb3134SXin Li Example:
12*2abb3134SXin Li   ./test.sh decode-assoc-R-smoke       # test pure R implementation
13*2abb3134SXin Li   ./test.sh decode-assoc-cpp-smoke     # test with analysis/cpp/fast_em.cc
14*2abb3134SXin Li   ./test.sh decode-assoc-cpp-converge  # run for longer with C++
15*2abb3134SXin Li   ./test.sh decode-assoc-tensorflow
16*2abb3134SXin Li"
17*2abb3134SXin Li}
18*2abb3134SXin Li
19*2abb3134SXin Liset -o nounset
20*2abb3134SXin Liset -o pipefail
21*2abb3134SXin Liset -o errexit
22*2abb3134SXin Li
23*2abb3134SXin Lireadonly THIS_DIR=$(dirname $0)
24*2abb3134SXin Lireadonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd)
25*2abb3134SXin Lireadonly EM_CPP_EXECUTABLE=$RAPPOR_SRC/analysis/cpp/_tmp/fast_em
26*2abb3134SXin Li
27*2abb3134SXin Lisource $RAPPOR_SRC/util.sh
28*2abb3134SXin Li
29*2abb3134SXin Lireadonly ASSOC_TESTDATA_DIR=_tmp/decode-assoc-test
30*2abb3134SXin Lireadonly DIST_TESTDATA_DIR=_tmp/decode-dist-test
31*2abb3134SXin Li
32*2abb3134SXin Li# Clear the R cache for the map files.
33*2abb3134SXin Liclear-cached-files() {
34*2abb3134SXin Li  local dir=$1
35*2abb3134SXin Li  find $dir -name '*.rda' | xargs --no-run-if-empty -- rm --verbose
36*2abb3134SXin Li}
37*2abb3134SXin Li
38*2abb3134SXin Liwrite-dist-testdata() {
39*2abb3134SXin Li  local input_dir=$DIST_TESTDATA_DIR/input
40*2abb3134SXin Li
41*2abb3134SXin Li  mkdir -p $input_dir
42*2abb3134SXin Li
43*2abb3134SXin Li  clear-cached-files $DIST_TESTDATA_DIR
44*2abb3134SXin Li
45*2abb3134SXin Li  # Right now, we copy a case from regtest.sh.  (./demo.sh quick-python creates
46*2abb3134SXin Li  # just this case)
47*2abb3134SXin Li  local case_dir=$RAPPOR_SRC/_tmp/python/demo3
48*2abb3134SXin Li
49*2abb3134SXin Li  cp --verbose $case_dir/1/case_counts.csv $input_dir/counts.csv
50*2abb3134SXin Li  cp --verbose $case_dir/case_map.csv $input_dir/map.csv
51*2abb3134SXin Li  cp --verbose $case_dir/case_params.csv $input_dir/params.csv
52*2abb3134SXin Li}
53*2abb3134SXin Li
54*2abb3134SXin Lidecode-dist() {
55*2abb3134SXin Li  write-dist-testdata
56*2abb3134SXin Li
57*2abb3134SXin Li  local output_dir=$DIST_TESTDATA_DIR
58*2abb3134SXin Li
59*2abb3134SXin Li  local input_dir=$DIST_TESTDATA_DIR/input
60*2abb3134SXin Li
61*2abb3134SXin Li  # Uses the ./demo.sh regtest files
62*2abb3134SXin Li  time $RAPPOR_SRC/bin/decode-dist \
63*2abb3134SXin Li    --counts $input_dir/counts.csv \
64*2abb3134SXin Li    --map $input_dir/map.csv \
65*2abb3134SXin Li    --params $input_dir/params.csv \
66*2abb3134SXin Li    --output-dir $output_dir
67*2abb3134SXin Li
68*2abb3134SXin Li  echo
69*2abb3134SXin Li  head $output_dir/results.csv
70*2abb3134SXin Li  echo
71*2abb3134SXin Li  cat $output_dir/metrics.json
72*2abb3134SXin Li}
73*2abb3134SXin Li
74*2abb3134SXin Liwrite-assoc-testdata() {
75*2abb3134SXin Li  # 'build' has intermediate build files, 'input' is the final input to the
76*2abb3134SXin Li  # decode-assoc tool.
77*2abb3134SXin Li  local build_dir=$ASSOC_TESTDATA_DIR/build
78*2abb3134SXin Li  local input_dir=$ASSOC_TESTDATA_DIR/input
79*2abb3134SXin Li
80*2abb3134SXin Li  mkdir -p $build_dir $input_dir
81*2abb3134SXin Li
82*2abb3134SXin Li  clear-cached-files $ASSOC_TESTDATA_DIR
83*2abb3134SXin Li
84*2abb3134SXin Li  cat >$build_dir/true_values.csv <<EOF 
85*2abb3134SXin Lidomain,flag..HTTPS
86*2abb3134SXin Ligoogle.com,1
87*2abb3134SXin Ligoogle.com,1
88*2abb3134SXin Ligoogle.com,1
89*2abb3134SXin Ligoogle.com,1
90*2abb3134SXin Ligoogle.com,0
91*2abb3134SXin Liyahoo.com,1
92*2abb3134SXin Liyahoo.com,0
93*2abb3134SXin Libing.com,1
94*2abb3134SXin Libing.com,1
95*2abb3134SXin Libing.com,0
96*2abb3134SXin LiEOF
97*2abb3134SXin Li
98*2abb3134SXin Li  local num_bits=8
99*2abb3134SXin Li  local num_hashes=1
100*2abb3134SXin Li  local num_cohorts=128
101*2abb3134SXin Li
102*2abb3134SXin Li  local prob_p=0.25
103*2abb3134SXin Li  local prob_q=0.75
104*2abb3134SXin Li  local prob_f=0.5
105*2abb3134SXin Li
106*2abb3134SXin Li  # 10 items in the input. 50,000 items is enough to eyeball accuracy of
107*2abb3134SXin Li  # results.
108*2abb3134SXin Li  local assoc_testdata_count=5000
109*2abb3134SXin Li
110*2abb3134SXin Li  PYTHONPATH=$RAPPOR_SRC/client/python \
111*2abb3134SXin Li    $RAPPOR_SRC/tests/rappor_sim.py \
112*2abb3134SXin Li    --assoc-testdata $assoc_testdata_count \
113*2abb3134SXin Li    --num-bits $num_bits \
114*2abb3134SXin Li    --num-hashes $num_hashes \
115*2abb3134SXin Li    --num-cohorts $num_cohorts \
116*2abb3134SXin Li    -p $prob_p \
117*2abb3134SXin Li    -q $prob_q \
118*2abb3134SXin Li    -f $prob_f \
119*2abb3134SXin Li    < $build_dir/true_values.csv \
120*2abb3134SXin Li    > $input_dir/reports.csv
121*2abb3134SXin Li
122*2abb3134SXin Li  # Output two bad rows: each row is missing one of the columns.
123*2abb3134SXin Li  cat >$build_dir/bad_rows.txt <<EOF
124*2abb3134SXin Lic0,0,10101010,
125*2abb3134SXin Lic0,0,,0
126*2abb3134SXin LiEOF
127*2abb3134SXin Li
128*2abb3134SXin Li  # Make CSV file with the header
129*2abb3134SXin Li  cat - $build_dir/bad_rows.txt > $input_dir/bad_rows.csv <<EOF
130*2abb3134SXin Liclient,cohort,domain,flag..HTTPS
131*2abb3134SXin LiEOF
132*2abb3134SXin Li
133*2abb3134SXin Li  # Make reports file with bad rows
134*2abb3134SXin Li  cat $input_dir/reports.csv $build_dir/bad_rows.txt > $input_dir/reports_bad_rows.csv
135*2abb3134SXin Li
136*2abb3134SXin Li  # Define a string variable and a boolean varaible.
137*2abb3134SXin Li  cat >$input_dir/rappor-vars.csv <<EOF 
138*2abb3134SXin Limetric, var, var_type, params
139*2abb3134SXin Lim,domain,string,m_params
140*2abb3134SXin Lim,flag..HTTPS,boolean,m_params
141*2abb3134SXin LiEOF
142*2abb3134SXin Li
143*2abb3134SXin Li  cat >$input_dir/m_params.csv <<EOF
144*2abb3134SXin Lik,h,m,p,q,f
145*2abb3134SXin Li$num_bits,$num_hashes,$num_cohorts,$prob_p,$prob_q,$prob_f
146*2abb3134SXin LiEOF
147*2abb3134SXin Li
148*2abb3134SXin Li  # Add a string with a double quote to test quoting behavior
149*2abb3134SXin Li  cat >$build_dir/domain_candidates.csv <<EOF
150*2abb3134SXin Ligoogle.com
151*2abb3134SXin Liyahoo.com
152*2abb3134SXin Libing.com
153*2abb3134SXin Liq"q
154*2abb3134SXin LiEOF
155*2abb3134SXin Li
156*2abb3134SXin Li  # Hash candidates to create map.
157*2abb3134SXin Li  $RAPPOR_SRC/bin/hash-candidates $input_dir/m_params.csv \
158*2abb3134SXin Li    < $build_dir/domain_candidates.csv \
159*2abb3134SXin Li    > $input_dir/domain_map.csv
160*2abb3134SXin Li
161*2abb3134SXin Li  banner "Wrote testdata in $input_dir (intermediate files in $build_dir)"
162*2abb3134SXin Li}
163*2abb3134SXin Li
164*2abb3134SXin Li# Helper function to run decode-assoc with testdata.
165*2abb3134SXin Lidecode-assoc-helper() {
166*2abb3134SXin Li  write-assoc-testdata
167*2abb3134SXin Li
168*2abb3134SXin Li  local output_dir=$1
169*2abb3134SXin Li  shift
170*2abb3134SXin Li
171*2abb3134SXin Li  local build_dir=$ASSOC_TESTDATA_DIR/build
172*2abb3134SXin Li  local input_dir=$ASSOC_TESTDATA_DIR/input
173*2abb3134SXin Li
174*2abb3134SXin Li  time $RAPPOR_SRC/bin/decode-assoc \
175*2abb3134SXin Li    --metric-name m \
176*2abb3134SXin Li    --schema $input_dir/rappor-vars.csv \
177*2abb3134SXin Li    --reports $input_dir/reports.csv \
178*2abb3134SXin Li    --params-dir $input_dir \
179*2abb3134SXin Li    --var1 domain \
180*2abb3134SXin Li    --var2 flag..HTTPS \
181*2abb3134SXin Li    --map1 $input_dir/domain_map.csv \
182*2abb3134SXin Li    --create-bool-map \
183*2abb3134SXin Li    --max-em-iters 10 \
184*2abb3134SXin Li    --num-cores 2 \
185*2abb3134SXin Li    --output-dir $output_dir \
186*2abb3134SXin Li    --tmp-dir $output_dir \
187*2abb3134SXin Li    "$@"
188*2abb3134SXin Li
189*2abb3134SXin Li  head $output_dir/assoc-*
190*2abb3134SXin Li
191*2abb3134SXin Li  # Print true values for comparison
192*2abb3134SXin Li  echo
193*2abb3134SXin Li  echo "$build_dir/true_values.csv:"
194*2abb3134SXin Li  cat "$build_dir/true_values.csv"
195*2abb3134SXin Li}
196*2abb3134SXin Li
197*2abb3134SXin Li# Quick smoke test for R version.
198*2abb3134SXin Lidecode-assoc-R-smoke() {
199*2abb3134SXin Li  local output_dir=_tmp/R
200*2abb3134SXin Li  mkdir -p $output_dir
201*2abb3134SXin Li  decode-assoc-helper $output_dir
202*2abb3134SXin Li}
203*2abb3134SXin Li
204*2abb3134SXin Li# Test what happens when there are bad rows.
205*2abb3134SXin Lidecode-assoc-bad-rows() {
206*2abb3134SXin Li  local output_dir=_tmp/bad
207*2abb3134SXin Li  mkdir -p $output_dir
208*2abb3134SXin Li
209*2abb3134SXin Li  # Later flags override earlier ones
210*2abb3134SXin Li
211*2abb3134SXin Li  # Reports + bad rows
212*2abb3134SXin Li  decode-assoc-helper $output_dir \
213*2abb3134SXin Li    --reports _tmp/reports_bad_rows.csv \
214*2abb3134SXin Li    --remove-bad-rows \
215*2abb3134SXin Li    "$@"
216*2abb3134SXin Li
217*2abb3134SXin Li  # ONLY bad rows
218*2abb3134SXin Li  decode-assoc-helper $output_dir \
219*2abb3134SXin Li    --reports _tmp/bad_rows.csv \
220*2abb3134SXin Li    --remove-bad-rows \
221*2abb3134SXin Li    "$@"
222*2abb3134SXin Li}
223*2abb3134SXin Li
224*2abb3134SXin Libuild-em-executable() {
225*2abb3134SXin Li  pushd $RAPPOR_SRC/analysis/cpp >/dev/null
226*2abb3134SXin Li  ./run.sh build-fast-em
227*2abb3134SXin Li  popd >/dev/null
228*2abb3134SXin Li}
229*2abb3134SXin Li
230*2abb3134SXin Lidecode-assoc-cpp-smoke() {
231*2abb3134SXin Li  local output_dir=_tmp/cpp
232*2abb3134SXin Li  mkdir -p $output_dir
233*2abb3134SXin Li
234*2abb3134SXin Li  build-em-executable
235*2abb3134SXin Li
236*2abb3134SXin Li  decode-assoc-helper $output_dir \
237*2abb3134SXin Li    --em-executable "$EM_CPP_EXECUTABLE" "$@"
238*2abb3134SXin Li}
239*2abb3134SXin Li
240*2abb3134SXin Lidecode-assoc-cpp-converge() {
241*2abb3134SXin Li  # With the data we have, this converges and exits before 1000 iterations.
242*2abb3134SXin Li  decode-assoc-cpp-smoke --max-em-iters 1000
243*2abb3134SXin Li}
244*2abb3134SXin Li
245*2abb3134SXin Lidecode-assoc-tensorflow() {
246*2abb3134SXin Li  local output_dir=_tmp/tensorflow
247*2abb3134SXin Li  mkdir -p $output_dir
248*2abb3134SXin Li
249*2abb3134SXin Li  decode-assoc-helper $output_dir \
250*2abb3134SXin Li    --em-executable $RAPPOR_SRC/analysis/tensorflow/fast_em.sh "$@"
251*2abb3134SXin Li}
252*2abb3134SXin Li
253*2abb3134SXin Lidecode-assoc-tensorflow-converge() {
254*2abb3134SXin Li  decode-assoc-tensorflow --max-em-iters 1000
255*2abb3134SXin Li}
256*2abb3134SXin Li
257*2abb3134SXin Liif test $# -eq 0 ; then
258*2abb3134SXin Li  usage
259*2abb3134SXin Lielse
260*2abb3134SXin Li  "$@"
261*2abb3134SXin Lifi
262