xref: /aosp_15_r20/external/crosvm/tools/contrib/vcpu_blocker_analyzer/src/main.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2024 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::HashMap;
6 use std::fs::File;
7 use std::io;
8 use std::io::stdout;
9 use std::io::BufRead;
10 use std::io::BufReader;
11 use std::io::Write;
12 
13 use anyhow::Context;
14 use anyhow::Result;
15 use argh::FromArgs;
16 use env_logger::Env;
17 use parse::parse_event;
18 use parse::parse_sched_switch;
19 use parse::parse_sched_waking;
20 use parse::parse_task_rename;
21 use parse::parse_vcpu_id;
22 use parse::Event;
23 
24 mod parse;
25 
26 const VCPU_PROC_PREFIX: &str = "crosvm_vcpu";
27 
28 #[derive(Debug, FromArgs)]
29 /// Bottleneck analysis of virtio device processes.
30 struct Args {
31     /// path to the input trace-cmd report output
32     #[argh(option, short = 'i')]
33     input: String,
34 
35     /// log level (default: INFO)
36     #[argh(option, short = 'l', default = "String::from(\"INFO\")")]
37     log_level: String,
38 
39     /// show the result in the tast JSON format
40     #[argh(switch, short = 't')]
41     tast_json: bool,
42 
43     /// minimum duration to show a process (default: 0.2s)
44     #[argh(option, short = 'm', default = "0.2")]
45     minimum_duration: f64,
46 }
47 
48 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
49 pub enum ProcState {
50     Unknown,
51     Running,
52     Sleep,
53     Runnable,
54     Preempted,
55     Dead,
56     Other,
57 }
58 
59 impl std::fmt::Display for ProcState {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result60     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61         match self {
62             ProcState::Unknown => write!(f, "unknown"),
63             ProcState::Running => write!(f, "running"),
64             ProcState::Sleep => write!(f, "sleep"),
65             ProcState::Runnable => write!(f, "runnable"),
66             ProcState::Preempted => write!(f, "preempted"),
67             ProcState::Dead => write!(f, "dead"),
68             ProcState::Other => write!(f, "other"),
69         }
70     }
71 }
72 
73 struct VCPUState {
74     state: ProcState,
75     timestamp: f64,
76     last_preemptor: i32,
77 }
78 
79 impl VCPUState {
new() -> Self80     fn new() -> Self {
81         VCPUState {
82             state: ProcState::Unknown,
83             timestamp: 0.0,
84             last_preemptor: -1,
85         }
86     }
87 
set_state(&mut self, new_state: ProcState, time_stamp: f64) -> (ProcState, f64)88     fn set_state(&mut self, new_state: ProcState, time_stamp: f64) -> (ProcState, f64) {
89         let duration = if self.timestamp != 0.0 {
90             time_stamp - self.timestamp
91         } else {
92             0.0
93         };
94         let prev_state = self.state;
95         self.state = new_state;
96         self.timestamp = time_stamp;
97         (prev_state, duration)
98     }
99 }
100 
main() -> Result<()>101 fn main() -> Result<()> {
102     let args: Args = argh::from_env();
103 
104     env_logger::Builder::from_env(Env::default().default_filter_or(&args.log_level)).init();
105 
106     let file = File::open(args.input)?;
107     let reader = BufReader::new(file);
108 
109     let (vcpu_state_durations, block_duration, preempted_duration, proc_names) =
110         calculate_durations(reader)?;
111 
112     let metrics = make_metrics(
113         &vcpu_state_durations,
114         &block_duration,
115         &preempted_duration,
116         &proc_names,
117         args.minimum_duration,
118     );
119 
120     if args.tast_json {
121         print_tast_json(&metrics)?;
122     } else {
123         print_text(&metrics);
124     }
125 
126     Ok(())
127 }
128 
calculate_durations<T: io::Read>( mut reader: BufReader<T>, ) -> Result<( Vec<HashMap<ProcState, f64>>, HashMap<i32, f64>, HashMap<i32, f64>, HashMap<i32, String>, )>129 fn calculate_durations<T: io::Read>(
130     mut reader: BufReader<T>,
131 ) -> Result<(
132     Vec<HashMap<ProcState, f64>>,
133     HashMap<i32, f64>,
134     HashMap<i32, f64>,
135     HashMap<i32, String>,
136 )> {
137     // Initialization
138     let mut vcpu_state_durations = Vec::<HashMap<ProcState, f64>>::new();
139     let mut block_duration = HashMap::<i32, f64>::new();
140     let mut preempted_duration = HashMap::<i32, f64>::new();
141     let mut proc_names = HashMap::<i32, String>::new();
142     let mut vcpu_states = Vec::<VCPUState>::new();
143 
144     // Read the first line to get the number of CPUs
145     let mut line = String::new();
146     reader.read_line(&mut line)?;
147     let num_cpus = line
148         .trim()
149         .strip_prefix("cpus=")
150         .and_then(|n| n.parse::<i32>().ok())
151         .context("Failed to parse number of CPUs")?;
152 
153     // Initialize data structures for VCPUs
154     for _ in 0..num_cpus {
155         vcpu_state_durations.push(HashMap::new());
156         vcpu_states.push(VCPUState::new());
157     }
158 
159     log::info!("Start processing.");
160 
161     // Process lines from the trace-cmd file
162     let mut line_number = 0;
163     for line_result in reader.lines() {
164         let line = line_result?;
165 
166         if !line.contains(VCPU_PROC_PREFIX) && !line.contains("task_rename") {
167             continue; // Skip irrelevant lines
168         }
169 
170         let event = parse_event(&line).with_context(|| {
171             format!("Failed to parse event (line {}): {}", line_number + 1, line)
172         })?;
173         update_durations(
174             &event,
175             &mut vcpu_states,
176             &mut vcpu_state_durations,
177             &mut block_duration,
178             &mut preempted_duration,
179             &mut proc_names,
180         )
181         .with_context(|| {
182             format!(
183                 "Failed to process event (line {}): {}",
184                 line_number + 1,
185                 line
186             )
187         })?;
188 
189         line_number += 1;
190     }
191     log::info!("Read {line_number} lines.");
192 
193     Ok((
194         vcpu_state_durations,
195         block_duration,
196         preempted_duration,
197         proc_names,
198     ))
199 }
200 
update_durations( event: &Event, vcpu_states: &mut [VCPUState], vcpu_state_durations: &mut [HashMap<ProcState, f64>], block_duration: &mut HashMap<i32, f64>, preempted_duration: &mut HashMap<i32, f64>, proc_names: &mut HashMap<i32, String>, ) -> Result<()>201 fn update_durations(
202     event: &Event,
203     vcpu_states: &mut [VCPUState],
204     vcpu_state_durations: &mut [HashMap<ProcState, f64>],
205     block_duration: &mut HashMap<i32, f64>,
206     preempted_duration: &mut HashMap<i32, f64>,
207     proc_names: &mut HashMap<i32, String>,
208 ) -> Result<()> {
209     match event.name.as_str() {
210         // Update the VCPU process state duration and the VCPU-blocking time of a process which
211         // waked up a VCPU process.
212         "sched_waking" => {
213             let sched_waking = parse_sched_waking(&event.details)?;
214             if !sched_waking.waked_proc_name.starts_with(VCPU_PROC_PREFIX) {
215                 // skip non-VCPU processes
216                 return Ok(());
217             }
218 
219             // Ensure a valid VCPU ID
220             let vcpu_id = parse_vcpu_id(&sched_waking.waked_proc_name)?;
221 
222             if vcpu_states[vcpu_id].state != ProcState::Unknown {
223                 *block_duration.entry(event.pid).or_default() +=
224                     event.time - vcpu_states[vcpu_id].timestamp;
225             }
226 
227             let (prev_state, dur) = vcpu_states[vcpu_id].set_state(ProcState::Runnable, event.time);
228             *vcpu_state_durations[vcpu_id].entry(prev_state).or_default() += dur;
229 
230             update_proc_name_if_missing(
231                 &sched_waking.waked_proc_name,
232                 sched_waking.waked_pid,
233                 proc_names,
234             );
235         }
236         // Update the VCPU process state duration and the VCPU-preemption time of a process if it
237         // preempted a VCPU process.
238         "sched_switch" => {
239             let sched_switch = parse_sched_switch(&event.details)?;
240             if sched_switch.prev_proc_name.starts_with(VCPU_PROC_PREFIX) {
241                 let vcpu_id = parse_vcpu_id(&sched_switch.prev_proc_name)?;
242                 if sched_switch.prev_proc_state == ProcState::Preempted {
243                     vcpu_states[vcpu_id].last_preemptor = sched_switch.new_pid;
244                 }
245                 let (prev_state, dur) =
246                     vcpu_states[vcpu_id].set_state(sched_switch.prev_proc_state, event.time);
247                 *vcpu_state_durations[vcpu_id].entry(prev_state).or_default() += dur;
248 
249                 update_proc_name_if_missing(
250                     &sched_switch.new_proc_name,
251                     sched_switch.new_pid,
252                     proc_names,
253                 );
254             }
255             if sched_switch.new_proc_name.starts_with(VCPU_PROC_PREFIX) {
256                 let vcpu_id = parse_vcpu_id(&sched_switch.new_proc_name)?;
257                 let (prev_state, dur) =
258                     vcpu_states[vcpu_id].set_state(ProcState::Running, event.time);
259                 *vcpu_state_durations[vcpu_id].entry(prev_state).or_default() += dur;
260 
261                 if prev_state == ProcState::Preempted {
262                     *preempted_duration
263                         .entry(vcpu_states[vcpu_id].last_preemptor)
264                         .or_default() += dur;
265                     vcpu_states[vcpu_id].last_preemptor = -1;
266                 }
267 
268                 update_proc_name_if_missing(
269                     &sched_switch.prev_proc_name,
270                     sched_switch.prev_pid,
271                     proc_names,
272                 );
273             }
274         }
275         "task_rename" => {
276             let comm = parse_task_rename(&event.details)?;
277             proc_names.insert(event.pid, comm);
278         }
279         _ => {}
280     }
281 
282     Ok(())
283 }
284 
285 // Update the process name only when it is missing. Callers which should not
286 // update the process name if we already know the name of pid calls this
287 // function. For example, process names which appear in events will not reflect
288 // task rename and might keep old names.
update_proc_name_if_missing(proc_name: &str, pid: i32, proc_names: &mut HashMap<i32, String>)289 fn update_proc_name_if_missing(proc_name: &str, pid: i32, proc_names: &mut HashMap<i32, String>) {
290     if pid == 0 {
291         // Special handling for "<idle>"
292         proc_names.insert(pid, "<idle>".to_string());
293     } else {
294         proc_names
295             .entry(pid)
296             .or_insert_with(|| proc_name.to_string());
297     }
298 }
299 
300 #[derive(Debug)]
301 struct Metric {
302     name: String,
303     /// The value of the metric. Currently the unit is always seconds.
304     value: f64,
305     /// The ratio of the value if it has any total value.
306     ratio: Option<Ratio>,
307 }
308 
309 #[derive(Debug)]
310 struct Ratio {
311     /// Unit: percent
312     value: f64,
313     /// Description of the total value of the ratio.
314     total_value_text: String,
315 }
316 
make_metrics( vcpu_state_durations: &[HashMap<ProcState, f64>], block_duration: &HashMap<i32, f64>, preempted_duration: &HashMap<i32, f64>, proc_names: &HashMap<i32, String>, minimum_duration: f64, ) -> Vec<Metric>317 fn make_metrics(
318     vcpu_state_durations: &[HashMap<ProcState, f64>],
319     block_duration: &HashMap<i32, f64>,
320     preempted_duration: &HashMap<i32, f64>,
321     proc_names: &HashMap<i32, String>,
322     minimum_duration: f64,
323 ) -> Vec<Metric> {
324     let mut metrics = Vec::new();
325 
326     // VCPU state metrics
327     let total_vcpu_proc_duration: HashMap<ProcState, f64> =
328         vcpu_state_durations
329             .iter()
330             .fold(HashMap::new(), |mut acc, durations| {
331                 for (state, dur) in durations.iter() {
332                     *acc.entry(*state).or_default() += dur;
333                 }
334                 acc
335             });
336 
337     let vcpu_times: Vec<f64> = vcpu_state_durations
338         .iter()
339         .map(|durations| durations.values().sum())
340         .collect();
341 
342     let total_vcpu_time: f64 = vcpu_times.iter().sum(); // Sum of durations across all vCPUs
343 
344     let proc_states_to_report: &[ProcState] = &[
345         ProcState::Running,
346         ProcState::Sleep,
347         ProcState::Runnable,
348         ProcState::Preempted,
349     ];
350     let proc_states_to_ignore: &[ProcState] =
351         &[ProcState::Dead, ProcState::Other, ProcState::Unknown];
352 
353     for (cpu, durations) in vcpu_state_durations.iter().enumerate() {
354         for state in proc_states_to_report {
355             metrics.push(Metric {
356                 name: format!("vcpu{}_{}", cpu, state),
357                 value: durations.get(state).copied().unwrap_or(0.0),
358                 ratio: Some(Ratio {
359                     value: durations.get(state).copied().unwrap_or(0.0) / vcpu_times[cpu] * 100.0,
360                     total_value_text: format!("vcpu{cpu}_time"),
361                 }),
362             });
363         }
364         for state in proc_states_to_ignore {
365             if *durations.get(state).unwrap_or(&minimum_duration) > minimum_duration {
366                 log::warn!(
367                     "{:?} duration {} > {}",
368                     state,
369                     durations.get(state).unwrap(),
370                     minimum_duration
371                 );
372             }
373         }
374     }
375 
376     // Total VCPU metrics
377     for state in proc_states_to_report {
378         // Safety: TODO
379         metrics.push(Metric {
380             name: format!("total_vcpu_{}", state),
381             value: total_vcpu_proc_duration.get(state).copied().unwrap_or(0.0),
382             ratio: Some(Ratio {
383                 value: total_vcpu_proc_duration.get(state).copied().unwrap_or(0.0)
384                     / total_vcpu_time
385                     * 100.0,
386                 total_value_text: "total_vcpu_time".to_string(),
387             }),
388         });
389     }
390     metrics.push(Metric {
391         name: "total_vcpu_time".to_string(),
392         value: total_vcpu_time,
393         ratio: None,
394     });
395 
396     // Preempted and Blocked metrics
397     metrics.extend(make_sorted_duration_metrics(
398         preempted_duration,
399         proc_names,
400         total_vcpu_time,
401         minimum_duration,
402         "preempted",
403     ));
404     metrics.extend(make_sorted_duration_metrics(
405         block_duration,
406         proc_names,
407         total_vcpu_time,
408         minimum_duration,
409         "blocked",
410     ));
411 
412     metrics
413 }
414 
make_sorted_duration_metrics( durations_by_pid: &HashMap<i32, f64>, names: &HashMap<i32, String>, total_time: f64, filter_minimum: f64, metric_prefix: &str, ) -> Vec<Metric>415 fn make_sorted_duration_metrics(
416     durations_by_pid: &HashMap<i32, f64>,
417     names: &HashMap<i32, String>,
418     total_time: f64,
419     filter_minimum: f64,
420     metric_prefix: &str,
421 ) -> Vec<Metric> {
422     let mut durations_by_name: HashMap<String, f64> = HashMap::new();
423     for (pid, dur) in durations_by_pid.iter() {
424         let proc_name = names
425             .get(pid)
426             .unwrap_or(&format!("NoProcName({})", pid))
427             .clone();
428         *durations_by_name.entry(proc_name).or_default() += dur;
429     }
430 
431     let mut names_sorted: Vec<&String> = durations_by_name.keys().collect();
432     names_sorted.sort_by(|a, b| {
433         durations_by_name[b.as_str()]
434             .partial_cmp(&durations_by_name[a.as_str()])
435             .unwrap()
436     });
437 
438     let mut metrics = Vec::new();
439     for n in names_sorted {
440         if durations_by_name[n] < filter_minimum {
441             break; // Stop if we reach durations below the threshold
442         }
443         metrics.push(Metric {
444             name: format!("{}_{}", metric_prefix, n),
445             value: durations_by_name[n],
446             ratio: Some(Ratio {
447                 value: durations_by_name[n] / total_time * 100.0,
448                 total_value_text: "total_vcpu_time".to_string(),
449             }),
450         });
451     }
452 
453     metrics
454 }
455 
print_tast_json(metrics: &[Metric]) -> Result<()>456 fn print_tast_json(metrics: &[Metric]) -> Result<()> {
457     println!("{{");
458 
459     stdout()
460         .write_all(
461             metrics
462                 .iter()
463                 .map(build_tast_metric)
464                 .collect::<Vec<_>>()
465                 .join(",")
466                 .as_bytes(),
467         )
468         .with_context(|| "Failed to write to stdout")?;
469 
470     println!("\n}}");
471     Ok(())
472 }
473 
build_tast_metric(metric: &Metric) -> String474 fn build_tast_metric(metric: &Metric) -> String {
475     // Convert a Metric to a TAST metric json string
476     let name = &metric.name;
477     let value = metric.value;
478     let mut json = format!(
479         r#"
480         "{name}": {{
481             "summary": {{
482                 "units": "sec",
483                 "improvement_direction": "down",
484                 "type": "scalar",
485                 "value": {value}
486             }}
487         }}"#
488     );
489     // Append ratio metric if present
490     if let Some(ratio) = &metric.ratio {
491         let name = format!("{}_ratio", metric.name);
492         let value = ratio.value;
493         json.push_str(&format!(
494             r#",
495         "{name}": {{
496             "summary": {{
497                 "units": "percent",
498                 "improvement_direction": "down",
499                 "type": "scalar",
500                 "value": {value}
501             }}
502         }}"#
503         ));
504     }
505     json
506 }
507 
print_text(metrics: &[Metric])508 fn print_text(metrics: &[Metric]) {
509     for metric in metrics {
510         print!("{}\t{:.4} sec", metric.name, metric.value);
511         if let Some(ratio) = &metric.ratio {
512             print!(
513                 "\t(ratio: {:.3}% of {})",
514                 ratio.value, ratio.total_value_text
515             );
516         }
517         println!();
518     }
519 }
520 
521 #[cfg(test)]
522 mod tests {
523     use rstest::*;
524 
525     use super::*;
526 
527     #[rstest]
528     #[case(
529         r#"cpus=1
530             <idle>-0 [000] 10.00: sched_waking: comm=crosvm_vcpu0 pid=2 prio=120 target_cpu=000
531             other-17 [000] 20.00: sched_stat_runtime: comm=other pid=17 runtime=1 [ns] vruntime=1 [ns]
532             <idle>-0 [000] 30.00: sched_switch: swapper/0:0 [120] R ==> crosvm_vcpu0:2 [120]
533         "#,
534         vec![(ProcState::Unknown, 0.0), (ProcState::Runnable, 20.0)].into_iter().collect()
535     )]
test_calculate_stats( #[case] test_data: &str, #[case] expected_vcpu_dur: HashMap<ProcState, f64>, )536     fn test_calculate_stats(
537         #[case] test_data: &str,
538         #[case] expected_vcpu_dur: HashMap<ProcState, f64>,
539     ) {
540         let reader = BufReader::new(test_data.as_bytes());
541         let (v_cpudur, _, _, _) = calculate_durations(reader).unwrap();
542         assert_eq!(v_cpudur[0], expected_vcpu_dur);
543     }
544 }
545