xref: /aosp_15_r20/external/rappor/pipeline/cook.sh (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/bin/bash
2*2abb3134SXin Li#
3*2abb3134SXin Li# Take the raw data from the analysis and massage it into various formats
4*2abb3134SXin Li# suitable for display.
5*2abb3134SXin Li#
6*2abb3134SXin Li# Usage:
7*2abb3134SXin Li#   ./cook.sh <function name>
8*2abb3134SXin Li
9*2abb3134SXin Liset -o nounset
10*2abb3134SXin Liset -o pipefail
11*2abb3134SXin Liset -o errexit
12*2abb3134SXin Li
13*2abb3134SXin Lireadonly THIS_DIR=$(dirname $0)
14*2abb3134SXin Lireadonly RAPPOR_SRC=$(cd $THIS_DIR/.. && pwd)
15*2abb3134SXin Li
16*2abb3134SXin Lisource $RAPPOR_SRC/pipeline/tools-lib.sh
17*2abb3134SXin Li
18*2abb3134SXin Li
19*2abb3134SXin Listatus-files() {
20*2abb3134SXin Li  local dir=$1
21*2abb3134SXin Li  find $dir -name STATUS.txt
22*2abb3134SXin Li}
23*2abb3134SXin Li
24*2abb3134SXin Liresults-files() {
25*2abb3134SXin Li  local dir=$1
26*2abb3134SXin Li  find $dir -name results.csv
27*2abb3134SXin Li}
28*2abb3134SXin Li
29*2abb3134SXin Licount-results() {
30*2abb3134SXin Li  # first field of each line is one of {OK, TIMEOUT, FAIL, SKIPPED}
31*2abb3134SXin Li  status-files "$@" \
32*2abb3134SXin Li    | xargs cat \
33*2abb3134SXin Li    | cut -d ' ' -f 1 \
34*2abb3134SXin Li    | sort | uniq -c | sort -n -r
35*2abb3134SXin Li}
36*2abb3134SXin Li
37*2abb3134SXin Li#
38*2abb3134SXin Li# For dist cron job
39*2abb3134SXin Li#
40*2abb3134SXin Li
41*2abb3134SXin Li# Combine status of tasks over multiple jobs.  Each row is a task (decode-dist
42*2abb3134SXin Li# invocation).  This has the number of reports.
43*2abb3134SXin Licombine-dist-task-status() {
44*2abb3134SXin Li  local base_dir=${1:-~/rappor/cron}
45*2abb3134SXin Li  local job_dir=${2:-~/rappor/cron/2015-05-22__05-58-01}
46*2abb3134SXin Li
47*2abb3134SXin Li  local out=$job_dir/task-status.csv
48*2abb3134SXin Li
49*2abb3134SXin Li  # Ignore memory for now.
50*2abb3134SXin Li  time status-files $base_dir | TOOLS-combine-status dist > $out
51*2abb3134SXin Li  echo "Wrote $out"
52*2abb3134SXin Li}
53*2abb3134SXin Li
54*2abb3134SXin Li# Create a single dist.csv time series for a GIVEN metric.
55*2abb3134SXin Licombine-dist-results-one() {
56*2abb3134SXin Li  local base_dir=$1
57*2abb3134SXin Li  local job_dir=$2
58*2abb3134SXin Li  local metric_name=$3
59*2abb3134SXin Li  #echo FOO $base_dir $metric_name
60*2abb3134SXin Li
61*2abb3134SXin Li  local out_dir=$job_dir/cooked/$metric_name
62*2abb3134SXin Li  mkdir -p $out_dir
63*2abb3134SXin Li
64*2abb3134SXin Li  # Glob to capture this specific metric name over ALL job IDs.
65*2abb3134SXin Li  find $base_dir/*/raw/$metric_name -name STATUS.txt \
66*2abb3134SXin Li    | TOOLS-combine-results dist 5 \
67*2abb3134SXin Li    > $out_dir/dist.csv
68*2abb3134SXin Li}
69*2abb3134SXin Li
70*2abb3134SXin Li# Creates a dist.csv file for EACH metric.  TODO: Rename one/many
71*2abb3134SXin Licombine-dist-results() {
72*2abb3134SXin Li  local base_dir=${1:-~/rappor/cron}
73*2abb3134SXin Li  local job_dir=${2:-~/rappor/cron/2015-05-22__05-58-01}
74*2abb3134SXin Li
75*2abb3134SXin Li  # Direct subdirs of 'raw' are metrics.  Just print filename.
76*2abb3134SXin Li  find $base_dir/*/raw -mindepth 1 -maxdepth 1 -type d -a -printf '%f\n' \
77*2abb3134SXin Li    | sort | uniq \
78*2abb3134SXin Li    | xargs --verbose -n1 -- \
79*2abb3134SXin Li      $0 combine-dist-results-one $base_dir $job_dir
80*2abb3134SXin Li}
81*2abb3134SXin Li
82*2abb3134SXin Li# Take the task-status.csv file, which has row key (metric, date).  Writes
83*2abb3134SXin Li# num_reports.csv and status.csv per metric, and a single overview.csv for all
84*2abb3134SXin Li# metrics.
85*2abb3134SXin Lidist-metric-status() {
86*2abb3134SXin Li  local job_dir=${1:-_tmp/results-10}
87*2abb3134SXin Li  local out_dir=$job_dir/cooked
88*2abb3134SXin Li
89*2abb3134SXin Li  TOOLS-metric-status dist $job_dir/task-status.csv $out_dir
90*2abb3134SXin Li}
91*2abb3134SXin Li
92*2abb3134SXin Li#
93*2abb3134SXin Li# For association analysis cron job
94*2abb3134SXin Li#
95*2abb3134SXin Li
96*2abb3134SXin Licombine-assoc-task-status() {
97*2abb3134SXin Li  local base_dir=${1:-~/rappor/chrome-assoc-smoke}
98*2abb3134SXin Li  local job_dir=${2:-$base_dir/smoke1}
99*2abb3134SXin Li
100*2abb3134SXin Li  local out=$job_dir/assoc-task-status.csv
101*2abb3134SXin Li
102*2abb3134SXin Li  time find $base_dir -name assoc-status.txt \
103*2abb3134SXin Li    | TOOLS-combine-status assoc \
104*2abb3134SXin Li    > $out
105*2abb3134SXin Li
106*2abb3134SXin Li  echo "Wrote $out"
107*2abb3134SXin Li}
108*2abb3134SXin Li
109*2abb3134SXin Li# Create a single assoc.csv time series for a GIVEN (var1, var2) pair.
110*2abb3134SXin Licombine-assoc-results-one() {
111*2abb3134SXin Li  local base_dir=$1
112*2abb3134SXin Li  local job_dir=$2
113*2abb3134SXin Li  local metric_pair_rel_path=$3
114*2abb3134SXin Li
115*2abb3134SXin Li  local out_dir=$job_dir/cooked/$metric_pair_rel_path
116*2abb3134SXin Li  mkdir -p $out_dir
117*2abb3134SXin Li
118*2abb3134SXin Li  # Glob to capture this specific metric name over ALL job IDs.
119*2abb3134SXin Li  find $base_dir/*/raw/$metric_pair_rel_path -name assoc-status.txt \
120*2abb3134SXin Li    | TOOLS-combine-results assoc 5 \
121*2abb3134SXin Li    > $out_dir/assoc-results-series.csv
122*2abb3134SXin Li}
123*2abb3134SXin Li
124*2abb3134SXin Li# Creates a dist.csv file for EACH metric.  TODO: Rename one/many
125*2abb3134SXin Licombine-assoc-results() {
126*2abb3134SXin Li  local base_dir=${1:-~/rappor/chrome-assoc-smoke}
127*2abb3134SXin Li  local job_dir=${2:-$base_dir/smoke3}
128*2abb3134SXin Li
129*2abb3134SXin Li  # Direct subdirs of 'raw' are metrics, and subdirs of that are variable
130*2abb3134SXin Li  # pairs.  Print "$metric_name/$pair_name".
131*2abb3134SXin Li  find $base_dir/*/raw -mindepth 2 -maxdepth 2 -type d -a -printf '%P\n' \
132*2abb3134SXin Li    | sort | uniq \
133*2abb3134SXin Li    | xargs --verbose -n1 -- \
134*2abb3134SXin Li      $0 combine-assoc-results-one $base_dir $job_dir
135*2abb3134SXin Li}
136*2abb3134SXin Li
137*2abb3134SXin Li# Take the assoc-task-status.csv file, which has row key (metric, date).  Writes
138*2abb3134SXin Li# num_reports.csv and status.csv per metric, and a single overview.csv for all
139*2abb3134SXin Li# metrics.
140*2abb3134SXin Liassoc-metric-status() {
141*2abb3134SXin Li  local job_dir=${1:-~/rappor/chrome-assoc-smoke/smoke3}
142*2abb3134SXin Li  local out_dir=$job_dir/cooked
143*2abb3134SXin Li
144*2abb3134SXin Li  TOOLS-metric-status assoc $job_dir/assoc-task-status.csv $out_dir
145*2abb3134SXin Li}
146*2abb3134SXin Li
147*2abb3134SXin Li"$@"
148