xref: /aosp_15_r20/external/rappor/pipeline/dist.sh (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/bin/bash
2*2abb3134SXin Li#
3*2abb3134SXin Li# Usage:
4*2abb3134SXin Li#   ./dist.sh <function name>
5*2abb3134SXin Li
6*2abb3134SXin Liset -o nounset
7*2abb3134SXin Liset -o pipefail
8*2abb3134SXin Liset -o errexit
9*2abb3134SXin Li
10*2abb3134SXin Lireadonly THIS_DIR=$(dirname $0)
11*2abb3134SXin Lireadonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd)
12*2abb3134SXin Li
13*2abb3134SXin Lisource $RAPPOR_SRC/util.sh  # log, banner
14*2abb3134SXin Lisource $RAPPOR_SRC/pipeline/tools-lib.sh
15*2abb3134SXin Lisource $RAPPOR_SRC/pipeline/alarm-lib.sh
16*2abb3134SXin Li
17*2abb3134SXin Lireadonly DECODE_DIST=${DEP_DECODE_DIST:-$RAPPOR_SRC/bin/decode-dist}
18*2abb3134SXin Li
19*2abb3134SXin Lireadonly NUM_ARGS=7  # used for xargs
20*2abb3134SXin Li
21*2abb3134SXin Lidecode-dist-one() {
22*2abb3134SXin Li  # Job constants
23*2abb3134SXin Li  local rappor_src=$1
24*2abb3134SXin Li  local timeout_secs=$2
25*2abb3134SXin Li  local min_reports=$3
26*2abb3134SXin Li  shift 3  # job constants do not vary per task and are not part of the spec
27*2abb3134SXin Li
28*2abb3134SXin Li  # 7 spec variables
29*2abb3134SXin Li  local num_reports=$1  # unused, only for filtering
30*2abb3134SXin Li  local metric_name=$2
31*2abb3134SXin Li  local date=$3
32*2abb3134SXin Li  local counts=$4
33*2abb3134SXin Li  local params=$5
34*2abb3134SXin Li  local map=$6
35*2abb3134SXin Li  local results_dir=$7
36*2abb3134SXin Li
37*2abb3134SXin Li  local task_dir=$results_dir/$metric_name/$date
38*2abb3134SXin Li  mkdir --verbose -p $task_dir
39*2abb3134SXin Li
40*2abb3134SXin Li  local log_file=$task_dir/log.txt
41*2abb3134SXin Li  local status_file=$task_dir/STATUS.txt
42*2abb3134SXin Li
43*2abb3134SXin Li  # Record the spec so we know params, counts, etc.
44*2abb3134SXin Li  echo "$@" > $task_dir/spec.txt
45*2abb3134SXin Li
46*2abb3134SXin Li  if test $num_reports -lt $min_reports; then
47*2abb3134SXin Li    local msg="SKIPPED because $num_reports reports is less than $min_reports"
48*2abb3134SXin Li    # Duplicate this message
49*2abb3134SXin Li    echo "$msg" > $status_file
50*2abb3134SXin Li    echo "$msg" > $log_file
51*2abb3134SXin Li    return
52*2abb3134SXin Li  fi
53*2abb3134SXin Li
54*2abb3134SXin Li  # Run it with a timeout, and record status in the task dir.
55*2abb3134SXin Li  { time \
56*2abb3134SXin Li      alarm-status $status_file $timeout_secs \
57*2abb3134SXin Li        $DECODE_DIST \
58*2abb3134SXin Li          --counts $counts \
59*2abb3134SXin Li          --params $params \
60*2abb3134SXin Li          --map $map \
61*2abb3134SXin Li          --output-dir $task_dir \
62*2abb3134SXin Li          --adjust-counts-hack
63*2abb3134SXin Li  } >$log_file 2>&1
64*2abb3134SXin Li
65*2abb3134SXin Li  # TODO: Don't pass --adjust-counts-hack unless the user asks for it.
66*2abb3134SXin Li}
67*2abb3134SXin Li
68*2abb3134SXin Li# Print the number of processes to use.
69*2abb3134SXin Li# NOTE: This is copied from google/rappor regtest.sh.
70*2abb3134SXin Li# It also doesn't take into account the fact that we are memory-bound.
71*2abb3134SXin Li#
72*2abb3134SXin Li# 128 GiB / 4GiB would also imply about 32 processes though.
73*2abb3134SXin Linum-processes() {
74*2abb3134SXin Li  local processors=$(grep -c ^processor /proc/cpuinfo || echo 4)
75*2abb3134SXin Li  if test $processors -gt 1; then  # leave one CPU for the OS
76*2abb3134SXin Li    processors=$(expr $processors - 1)
77*2abb3134SXin Li  fi
78*2abb3134SXin Li  echo $processors
79*2abb3134SXin Li}
80*2abb3134SXin Li
81*2abb3134SXin Li#readonly DEFAULT_MAX_PROCS=6  # for andychu2.hot, to avoid locking up UI
82*2abb3134SXin Li#readonly DEFAULT_MAX_PROCS=16  # for rappor-ac.hot, to avoid thrashing
83*2abb3134SXin Lireadonly DEFAULT_MAX_PROCS=$(num-processes)
84*2abb3134SXin Li
85*2abb3134SXin Li#readonly DEFAULT_MAX_TASKS=12
86*2abb3134SXin Lireadonly DEFAULT_MAX_TASKS=10000  # more than the max
87*2abb3134SXin Li
88*2abb3134SXin Li# NOTE: Since we have 125 GB RAM, and processes can take up to 12 gigs of RAM,
89*2abb3134SXin Li# only use parallelism of 10, even though we have 31 cores.
90*2abb3134SXin Li
91*2abb3134SXin Lireadonly DEFAULT_MIN_REPORTS=5000
92*2abb3134SXin Li
93*2abb3134SXin Li
94*2abb3134SXin Lidecode-dist-many() {
95*2abb3134SXin Li  local job_dir=$1
96*2abb3134SXin Li  local spec_list=$2
97*2abb3134SXin Li  local timeout_secs=${3:-1200}  # default timeout
98*2abb3134SXin Li  local max_procs=${4:-$DEFAULT_MAX_PROCS}
99*2abb3134SXin Li  local rappor_src=${5:-$RAPPOR_SRC}
100*2abb3134SXin Li  local min_reports=${6:-$DEFAULT_MIN_REPORTS}
101*2abb3134SXin Li
102*2abb3134SXin Li  local interval_secs=5
103*2abb3134SXin Li  local pid_dir="$job_dir/pids"
104*2abb3134SXin Li  local sys_mem="$job_dir/system-mem.csv"
105*2abb3134SXin Li  mkdir --verbose -p $pid_dir
106*2abb3134SXin Li
107*2abb3134SXin Li  time cat $spec_list \
108*2abb3134SXin Li    | xargs --verbose -n $NUM_ARGS -P $max_procs --no-run-if-empty -- \
109*2abb3134SXin Li      $0 decode-dist-one $rappor_src $timeout_secs $min_reports
110*2abb3134SXin Li}
111*2abb3134SXin Li
112*2abb3134SXin Li# Combine/summarize results and task metadata from the parallel decode-dist
113*2abb3134SXin Li# processes.  Render them as HTML.
114*2abb3134SXin Licombine-and-render-html() {
115*2abb3134SXin Li  local jobs_base_dir=$1
116*2abb3134SXin Li  local job_dir=$2
117*2abb3134SXin Li
118*2abb3134SXin Li  banner "Combining dist task status"
119*2abb3134SXin Li  TOOLS-cook combine-dist-task-status $jobs_base_dir $job_dir
120*2abb3134SXin Li
121*2abb3134SXin Li  banner "Combining dist results"
122*2abb3134SXin Li  TOOLS-cook combine-dist-results $jobs_base_dir $job_dir
123*2abb3134SXin Li
124*2abb3134SXin Li  banner "Splitting out status per metric, and writing overview"
125*2abb3134SXin Li  TOOLS-cook dist-metric-status $job_dir
126*2abb3134SXin Li
127*2abb3134SXin Li  # The task-status.csv file should have the a JOB ID.
128*2abb3134SXin Li  banner "Building overview.html and per-metric HTML"
129*2abb3134SXin Li  TOOLS-gen-ui build-html1 $job_dir
130*2abb3134SXin Li
131*2abb3134SXin Li  banner "Building individual results.html (for ONE day)"
132*2abb3134SXin Li  TOOLS-gen-ui results-html $job_dir
133*2abb3134SXin Li}
134*2abb3134SXin Li
135*2abb3134SXin Li"$@"
136