1#!/bin/bash 2usage() { 3echo " 4 5 Simple smoke test for the decode-dist tool. This will fail if your machine 6 doesn't have the right R libraries. 7 8 Usage: 9 ./test.sh <function name> 10 11 Example: 12 ./test.sh decode-assoc-R-smoke # test pure R implementation 13 ./test.sh decode-assoc-cpp-smoke # test with analysis/cpp/fast_em.cc 14 ./test.sh decode-assoc-cpp-converge # run for longer with C++ 15 ./test.sh decode-assoc-tensorflow 16" 17} 18 19set -o nounset 20set -o pipefail 21set -o errexit 22 23readonly THIS_DIR=$(dirname $0) 24readonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd) 25readonly EM_CPP_EXECUTABLE=$RAPPOR_SRC/analysis/cpp/_tmp/fast_em 26 27source $RAPPOR_SRC/util.sh 28 29readonly ASSOC_TESTDATA_DIR=_tmp/decode-assoc-test 30readonly DIST_TESTDATA_DIR=_tmp/decode-dist-test 31 32# Clear the R cache for the map files. 33clear-cached-files() { 34 local dir=$1 35 find $dir -name '*.rda' | xargs --no-run-if-empty -- rm --verbose 36} 37 38write-dist-testdata() { 39 local input_dir=$DIST_TESTDATA_DIR/input 40 41 mkdir -p $input_dir 42 43 clear-cached-files $DIST_TESTDATA_DIR 44 45 # Right now, we copy a case from regtest.sh. (./demo.sh quick-python creates 46 # just this case) 47 local case_dir=$RAPPOR_SRC/_tmp/python/demo3 48 49 cp --verbose $case_dir/1/case_counts.csv $input_dir/counts.csv 50 cp --verbose $case_dir/case_map.csv $input_dir/map.csv 51 cp --verbose $case_dir/case_params.csv $input_dir/params.csv 52} 53 54decode-dist() { 55 write-dist-testdata 56 57 local output_dir=$DIST_TESTDATA_DIR 58 59 local input_dir=$DIST_TESTDATA_DIR/input 60 61 # Uses the ./demo.sh regtest files 62 time $RAPPOR_SRC/bin/decode-dist \ 63 --counts $input_dir/counts.csv \ 64 --map $input_dir/map.csv \ 65 --params $input_dir/params.csv \ 66 --output-dir $output_dir 67 68 echo 69 head $output_dir/results.csv 70 echo 71 cat $output_dir/metrics.json 72} 73 74write-assoc-testdata() { 75 # 'build' has intermediate build files, 'input' is the final input to the 76 # decode-assoc tool. 77 local build_dir=$ASSOC_TESTDATA_DIR/build 78 local input_dir=$ASSOC_TESTDATA_DIR/input 79 80 mkdir -p $build_dir $input_dir 81 82 clear-cached-files $ASSOC_TESTDATA_DIR 83 84 cat >$build_dir/true_values.csv <<EOF 85domain,flag..HTTPS 86google.com,1 87google.com,1 88google.com,1 89google.com,1 90google.com,0 91yahoo.com,1 92yahoo.com,0 93bing.com,1 94bing.com,1 95bing.com,0 96EOF 97 98 local num_bits=8 99 local num_hashes=1 100 local num_cohorts=128 101 102 local prob_p=0.25 103 local prob_q=0.75 104 local prob_f=0.5 105 106 # 10 items in the input. 50,000 items is enough to eyeball accuracy of 107 # results. 108 local assoc_testdata_count=5000 109 110 PYTHONPATH=$RAPPOR_SRC/client/python \ 111 $RAPPOR_SRC/tests/rappor_sim.py \ 112 --assoc-testdata $assoc_testdata_count \ 113 --num-bits $num_bits \ 114 --num-hashes $num_hashes \ 115 --num-cohorts $num_cohorts \ 116 -p $prob_p \ 117 -q $prob_q \ 118 -f $prob_f \ 119 < $build_dir/true_values.csv \ 120 > $input_dir/reports.csv 121 122 # Output two bad rows: each row is missing one of the columns. 123 cat >$build_dir/bad_rows.txt <<EOF 124c0,0,10101010, 125c0,0,,0 126EOF 127 128 # Make CSV file with the header 129 cat - $build_dir/bad_rows.txt > $input_dir/bad_rows.csv <<EOF 130client,cohort,domain,flag..HTTPS 131EOF 132 133 # Make reports file with bad rows 134 cat $input_dir/reports.csv $build_dir/bad_rows.txt > $input_dir/reports_bad_rows.csv 135 136 # Define a string variable and a boolean varaible. 137 cat >$input_dir/rappor-vars.csv <<EOF 138metric, var, var_type, params 139m,domain,string,m_params 140m,flag..HTTPS,boolean,m_params 141EOF 142 143 cat >$input_dir/m_params.csv <<EOF 144k,h,m,p,q,f 145$num_bits,$num_hashes,$num_cohorts,$prob_p,$prob_q,$prob_f 146EOF 147 148 # Add a string with a double quote to test quoting behavior 149 cat >$build_dir/domain_candidates.csv <<EOF 150google.com 151yahoo.com 152bing.com 153q"q 154EOF 155 156 # Hash candidates to create map. 157 $RAPPOR_SRC/bin/hash-candidates $input_dir/m_params.csv \ 158 < $build_dir/domain_candidates.csv \ 159 > $input_dir/domain_map.csv 160 161 banner "Wrote testdata in $input_dir (intermediate files in $build_dir)" 162} 163 164# Helper function to run decode-assoc with testdata. 165decode-assoc-helper() { 166 write-assoc-testdata 167 168 local output_dir=$1 169 shift 170 171 local build_dir=$ASSOC_TESTDATA_DIR/build 172 local input_dir=$ASSOC_TESTDATA_DIR/input 173 174 time $RAPPOR_SRC/bin/decode-assoc \ 175 --metric-name m \ 176 --schema $input_dir/rappor-vars.csv \ 177 --reports $input_dir/reports.csv \ 178 --params-dir $input_dir \ 179 --var1 domain \ 180 --var2 flag..HTTPS \ 181 --map1 $input_dir/domain_map.csv \ 182 --create-bool-map \ 183 --max-em-iters 10 \ 184 --num-cores 2 \ 185 --output-dir $output_dir \ 186 --tmp-dir $output_dir \ 187 "$@" 188 189 head $output_dir/assoc-* 190 191 # Print true values for comparison 192 echo 193 echo "$build_dir/true_values.csv:" 194 cat "$build_dir/true_values.csv" 195} 196 197# Quick smoke test for R version. 198decode-assoc-R-smoke() { 199 local output_dir=_tmp/R 200 mkdir -p $output_dir 201 decode-assoc-helper $output_dir 202} 203 204# Test what happens when there are bad rows. 205decode-assoc-bad-rows() { 206 local output_dir=_tmp/bad 207 mkdir -p $output_dir 208 209 # Later flags override earlier ones 210 211 # Reports + bad rows 212 decode-assoc-helper $output_dir \ 213 --reports _tmp/reports_bad_rows.csv \ 214 --remove-bad-rows \ 215 "$@" 216 217 # ONLY bad rows 218 decode-assoc-helper $output_dir \ 219 --reports _tmp/bad_rows.csv \ 220 --remove-bad-rows \ 221 "$@" 222} 223 224build-em-executable() { 225 pushd $RAPPOR_SRC/analysis/cpp >/dev/null 226 ./run.sh build-fast-em 227 popd >/dev/null 228} 229 230decode-assoc-cpp-smoke() { 231 local output_dir=_tmp/cpp 232 mkdir -p $output_dir 233 234 build-em-executable 235 236 decode-assoc-helper $output_dir \ 237 --em-executable "$EM_CPP_EXECUTABLE" "$@" 238} 239 240decode-assoc-cpp-converge() { 241 # With the data we have, this converges and exits before 1000 iterations. 242 decode-assoc-cpp-smoke --max-em-iters 1000 243} 244 245decode-assoc-tensorflow() { 246 local output_dir=_tmp/tensorflow 247 mkdir -p $output_dir 248 249 decode-assoc-helper $output_dir \ 250 --em-executable $RAPPOR_SRC/analysis/tensorflow/fast_em.sh "$@" 251} 252 253decode-assoc-tensorflow-converge() { 254 decode-assoc-tensorflow --max-em-iters 1000 255} 256 257if test $# -eq 0 ; then 258 usage 259else 260 "$@" 261fi 262