#!/bin/bash usage() { echo " Simple smoke test for the decode-dist tool. This will fail if your machine doesn't have the right R libraries. Usage: ./test.sh Example: ./test.sh decode-assoc-R-smoke # test pure R implementation ./test.sh decode-assoc-cpp-smoke # test with analysis/cpp/fast_em.cc ./test.sh decode-assoc-cpp-converge # run for longer with C++ ./test.sh decode-assoc-tensorflow " } set -o nounset set -o pipefail set -o errexit readonly THIS_DIR=$(dirname $0) readonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd) readonly EM_CPP_EXECUTABLE=$RAPPOR_SRC/analysis/cpp/_tmp/fast_em source $RAPPOR_SRC/util.sh readonly ASSOC_TESTDATA_DIR=_tmp/decode-assoc-test readonly DIST_TESTDATA_DIR=_tmp/decode-dist-test # Clear the R cache for the map files. clear-cached-files() { local dir=$1 find $dir -name '*.rda' | xargs --no-run-if-empty -- rm --verbose } write-dist-testdata() { local input_dir=$DIST_TESTDATA_DIR/input mkdir -p $input_dir clear-cached-files $DIST_TESTDATA_DIR # Right now, we copy a case from regtest.sh. (./demo.sh quick-python creates # just this case) local case_dir=$RAPPOR_SRC/_tmp/python/demo3 cp --verbose $case_dir/1/case_counts.csv $input_dir/counts.csv cp --verbose $case_dir/case_map.csv $input_dir/map.csv cp --verbose $case_dir/case_params.csv $input_dir/params.csv } decode-dist() { write-dist-testdata local output_dir=$DIST_TESTDATA_DIR local input_dir=$DIST_TESTDATA_DIR/input # Uses the ./demo.sh regtest files time $RAPPOR_SRC/bin/decode-dist \ --counts $input_dir/counts.csv \ --map $input_dir/map.csv \ --params $input_dir/params.csv \ --output-dir $output_dir echo head $output_dir/results.csv echo cat $output_dir/metrics.json } write-assoc-testdata() { # 'build' has intermediate build files, 'input' is the final input to the # decode-assoc tool. local build_dir=$ASSOC_TESTDATA_DIR/build local input_dir=$ASSOC_TESTDATA_DIR/input mkdir -p $build_dir $input_dir clear-cached-files $ASSOC_TESTDATA_DIR cat >$build_dir/true_values.csv < $input_dir/reports.csv # Output two bad rows: each row is missing one of the columns. cat >$build_dir/bad_rows.txt < $input_dir/bad_rows.csv < $input_dir/reports_bad_rows.csv # Define a string variable and a boolean varaible. cat >$input_dir/rappor-vars.csv <$input_dir/m_params.csv <$build_dir/domain_candidates.csv < $input_dir/domain_map.csv banner "Wrote testdata in $input_dir (intermediate files in $build_dir)" } # Helper function to run decode-assoc with testdata. decode-assoc-helper() { write-assoc-testdata local output_dir=$1 shift local build_dir=$ASSOC_TESTDATA_DIR/build local input_dir=$ASSOC_TESTDATA_DIR/input time $RAPPOR_SRC/bin/decode-assoc \ --metric-name m \ --schema $input_dir/rappor-vars.csv \ --reports $input_dir/reports.csv \ --params-dir $input_dir \ --var1 domain \ --var2 flag..HTTPS \ --map1 $input_dir/domain_map.csv \ --create-bool-map \ --max-em-iters 10 \ --num-cores 2 \ --output-dir $output_dir \ --tmp-dir $output_dir \ "$@" head $output_dir/assoc-* # Print true values for comparison echo echo "$build_dir/true_values.csv:" cat "$build_dir/true_values.csv" } # Quick smoke test for R version. decode-assoc-R-smoke() { local output_dir=_tmp/R mkdir -p $output_dir decode-assoc-helper $output_dir } # Test what happens when there are bad rows. decode-assoc-bad-rows() { local output_dir=_tmp/bad mkdir -p $output_dir # Later flags override earlier ones # Reports + bad rows decode-assoc-helper $output_dir \ --reports _tmp/reports_bad_rows.csv \ --remove-bad-rows \ "$@" # ONLY bad rows decode-assoc-helper $output_dir \ --reports _tmp/bad_rows.csv \ --remove-bad-rows \ "$@" } build-em-executable() { pushd $RAPPOR_SRC/analysis/cpp >/dev/null ./run.sh build-fast-em popd >/dev/null } decode-assoc-cpp-smoke() { local output_dir=_tmp/cpp mkdir -p $output_dir build-em-executable decode-assoc-helper $output_dir \ --em-executable "$EM_CPP_EXECUTABLE" "$@" } decode-assoc-cpp-converge() { # With the data we have, this converges and exits before 1000 iterations. decode-assoc-cpp-smoke --max-em-iters 1000 } decode-assoc-tensorflow() { local output_dir=_tmp/tensorflow mkdir -p $output_dir decode-assoc-helper $output_dir \ --em-executable $RAPPOR_SRC/analysis/tensorflow/fast_em.sh "$@" } decode-assoc-tensorflow-converge() { decode-assoc-tensorflow --max-em-iters 1000 } if test $# -eq 0 ; then usage else "$@" fi