1#!/bin/bash 2# 3# Usage: 4# ./dist.sh <function name> 5 6set -o nounset 7set -o pipefail 8set -o errexit 9 10readonly THIS_DIR=$(dirname $0) 11readonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd) 12 13source $RAPPOR_SRC/util.sh # log, banner 14source $RAPPOR_SRC/pipeline/tools-lib.sh 15source $RAPPOR_SRC/pipeline/alarm-lib.sh 16 17readonly DECODE_DIST=${DEP_DECODE_DIST:-$RAPPOR_SRC/bin/decode-dist} 18 19readonly NUM_ARGS=7 # used for xargs 20 21decode-dist-one() { 22 # Job constants 23 local rappor_src=$1 24 local timeout_secs=$2 25 local min_reports=$3 26 shift 3 # job constants do not vary per task and are not part of the spec 27 28 # 7 spec variables 29 local num_reports=$1 # unused, only for filtering 30 local metric_name=$2 31 local date=$3 32 local counts=$4 33 local params=$5 34 local map=$6 35 local results_dir=$7 36 37 local task_dir=$results_dir/$metric_name/$date 38 mkdir --verbose -p $task_dir 39 40 local log_file=$task_dir/log.txt 41 local status_file=$task_dir/STATUS.txt 42 43 # Record the spec so we know params, counts, etc. 44 echo "$@" > $task_dir/spec.txt 45 46 if test $num_reports -lt $min_reports; then 47 local msg="SKIPPED because $num_reports reports is less than $min_reports" 48 # Duplicate this message 49 echo "$msg" > $status_file 50 echo "$msg" > $log_file 51 return 52 fi 53 54 # Run it with a timeout, and record status in the task dir. 55 { time \ 56 alarm-status $status_file $timeout_secs \ 57 $DECODE_DIST \ 58 --counts $counts \ 59 --params $params \ 60 --map $map \ 61 --output-dir $task_dir \ 62 --adjust-counts-hack 63 } >$log_file 2>&1 64 65 # TODO: Don't pass --adjust-counts-hack unless the user asks for it. 66} 67 68# Print the number of processes to use. 69# NOTE: This is copied from google/rappor regtest.sh. 70# It also doesn't take into account the fact that we are memory-bound. 71# 72# 128 GiB / 4GiB would also imply about 32 processes though. 73num-processes() { 74 local processors=$(grep -c ^processor /proc/cpuinfo || echo 4) 75 if test $processors -gt 1; then # leave one CPU for the OS 76 processors=$(expr $processors - 1) 77 fi 78 echo $processors 79} 80 81#readonly DEFAULT_MAX_PROCS=6 # for andychu2.hot, to avoid locking up UI 82#readonly DEFAULT_MAX_PROCS=16 # for rappor-ac.hot, to avoid thrashing 83readonly DEFAULT_MAX_PROCS=$(num-processes) 84 85#readonly DEFAULT_MAX_TASKS=12 86readonly DEFAULT_MAX_TASKS=10000 # more than the max 87 88# NOTE: Since we have 125 GB RAM, and processes can take up to 12 gigs of RAM, 89# only use parallelism of 10, even though we have 31 cores. 90 91readonly DEFAULT_MIN_REPORTS=5000 92 93 94decode-dist-many() { 95 local job_dir=$1 96 local spec_list=$2 97 local timeout_secs=${3:-1200} # default timeout 98 local max_procs=${4:-$DEFAULT_MAX_PROCS} 99 local rappor_src=${5:-$RAPPOR_SRC} 100 local min_reports=${6:-$DEFAULT_MIN_REPORTS} 101 102 local interval_secs=5 103 local pid_dir="$job_dir/pids" 104 local sys_mem="$job_dir/system-mem.csv" 105 mkdir --verbose -p $pid_dir 106 107 time cat $spec_list \ 108 | xargs --verbose -n $NUM_ARGS -P $max_procs --no-run-if-empty -- \ 109 $0 decode-dist-one $rappor_src $timeout_secs $min_reports 110} 111 112# Combine/summarize results and task metadata from the parallel decode-dist 113# processes. Render them as HTML. 114combine-and-render-html() { 115 local jobs_base_dir=$1 116 local job_dir=$2 117 118 banner "Combining dist task status" 119 TOOLS-cook combine-dist-task-status $jobs_base_dir $job_dir 120 121 banner "Combining dist results" 122 TOOLS-cook combine-dist-results $jobs_base_dir $job_dir 123 124 banner "Splitting out status per metric, and writing overview" 125 TOOLS-cook dist-metric-status $job_dir 126 127 # The task-status.csv file should have the a JOB ID. 128 banner "Building overview.html and per-metric HTML" 129 TOOLS-gen-ui build-html1 $job_dir 130 131 banner "Building individual results.html (for ONE day)" 132 TOOLS-gen-ui results-html $job_dir 133} 134 135"$@" 136