xref: /aosp_15_r20/external/crosvm/devices/src/tsc/calibrate.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::HashSet;
6 use std::iter::FromIterator;
7 use std::time::Duration;
8 use std::time::Instant;
9 
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use anyhow::Result;
13 use base::set_cpu_affinity;
14 use base::warn;
15 use remain::sorted;
16 use thiserror::Error;
17 
18 use super::grouping::*;
19 use super::rdtsc_safe;
20 
21 const TSC_CALIBRATION_SAMPLES: usize = 10;
22 const TSC_CALIBRATION_DURATION: Duration = Duration::from_millis(100);
23 // remove data that is outside 3 standard deviations off the median
24 const TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT: f64 = 3.0;
25 // We consider two TSC cores to be in sync if they are within 2 microseconds of each other.
26 // An optimal context switch takes about 1-3 microseconds.
27 const TSC_OFFSET_GROUPING_THRESHOLD: Duration = Duration::from_micros(2);
28 
29 #[sorted]
30 #[derive(Error, Debug)]
31 pub enum TscCalibrationError {
32     /// Received `err` when setting the cpu affinity to `core`
33     #[error("failed to set thread cpu affinity to core {core}: {err}")]
34     SetCpuAffinityError { core: usize, err: base::Error },
35 }
36 
37 /// Get the standard deviation of a `Vec<T>`.
standard_deviation<T: num_traits::ToPrimitive + num_traits::Num + Copy>(items: &[T]) -> f6438 pub fn standard_deviation<T: num_traits::ToPrimitive + num_traits::Num + Copy>(items: &[T]) -> f64 {
39     let sum: T = items.iter().fold(T::zero(), |acc: T, elem| acc + *elem);
40     let count = items.len();
41 
42     let mean: f64 = sum.to_f64().unwrap_or(0.0) / count as f64;
43 
44     let variance = items
45         .iter()
46         .map(|x| {
47             let diff = mean - (x.to_f64().unwrap_or(0.0));
48             diff * diff
49         })
50         .sum::<f64>();
51     (variance / count as f64).sqrt()
52 }
53 
sort_and_get_bounds(items: &mut [i128], stdev_limit: f64) -> (f64, f64)54 fn sort_and_get_bounds(items: &mut [i128], stdev_limit: f64) -> (f64, f64) {
55     items.sort_unstable();
56     let median = items[items.len() / 2];
57 
58     let standard_deviation = standard_deviation(items);
59     let lower_bound = median as f64 - stdev_limit * standard_deviation;
60     let upper_bound = median as f64 + stdev_limit * standard_deviation;
61     (lower_bound, upper_bound)
62 }
63 
64 /// Represents the host monotonic time and the TSC value at a single moment in time.
65 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
66 struct TscMoment {
67     time: Instant,
68     tsc: u64,
69 }
70 
71 impl TscMoment {
now(rdtsc: fn() -> u64) -> Self72     fn now(rdtsc: fn() -> u64) -> Self {
73         TscMoment {
74             time: Instant::now(),
75             tsc: rdtsc(),
76         }
77     }
78 
79     /// Measure the tsc frequency using two `TscMoment`s.
measure_tsc_frequency(first: &TscMoment, second: &TscMoment) -> i12880     fn measure_tsc_frequency(first: &TscMoment, second: &TscMoment) -> i128 {
81         // handle case where first is actually second in time
82         let (first, second) = if first.time > second.time {
83             (second, first)
84         } else {
85             (first, second)
86         };
87 
88         let time_delta = second.time - first.time;
89         let tsc_delta = second.tsc as i128 - first.tsc as i128;
90 
91         tsc_delta * 1_000_000_000i128 / time_delta.as_nanos() as i128
92     }
93 
94     /// Measure the tsc offset using two `TscMoment`s and the TSC frequency.
measure_tsc_offset(first: &TscMoment, second: &TscMoment, tsc_frequency: u64) -> i12895     fn measure_tsc_offset(first: &TscMoment, second: &TscMoment, tsc_frequency: u64) -> i128 {
96         // handle case where first is actually second in time
97         let (first, second) = if first.time > second.time {
98             (second, first)
99         } else {
100             (first, second)
101         };
102 
103         let tsc_delta = second.tsc as i128 - first.tsc as i128;
104         let time_delta_as_tsc_ticks =
105             (second.time - first.time).as_nanos() * tsc_frequency as u128 / 1_000_000_000u128;
106         tsc_delta - time_delta_as_tsc_ticks as i128
107     }
108 }
109 
110 #[derive(Default, Debug, Clone)]
111 pub struct TscState {
112     pub frequency: u64,
113     pub offsets: Vec<(usize, i128)>,
114     pub core_grouping: CoreGrouping,
115 }
116 
117 impl TscState {
new( tsc_frequency: u64, offsets: Vec<(usize, i128)>, in_sync_threshold: Duration, ) -> Result<Self>118     pub(crate) fn new(
119         tsc_frequency: u64,
120         offsets: Vec<(usize, i128)>,
121         in_sync_threshold: Duration,
122     ) -> Result<Self> {
123         let core_grouping = group_core_offsets(&offsets, in_sync_threshold, tsc_frequency)
124             .context("Failed to group cores by their TSC offsets")?;
125         Ok(TscState {
126             frequency: tsc_frequency,
127             offsets,
128             core_grouping,
129         })
130     }
131 }
132 
133 /// Calibrate the TSC frequency of `core`.
134 ///
135 /// This function first pins itself to `core`, generates `num_samples` start `TscMoment`s, sleeps
136 /// for `calibration_duration`, and then generates `num_samples` end `TscMoment`s. For each pair
137 /// of start and end moments, a TSC frequency value is calculated. Any frequencies that are
138 /// outside of `stddev_limit` standard deviations from the median offset are discarded, because
139 /// they may represent an interrupt that occurred while a TscMoment was generated. The remaining
140 /// non-discarded frequencies are then averaged. The function returns the TSC frequency average, as
141 /// well as a Vec of `TscMoment`s, which are all of the end moments that were associated with at
142 /// least one non-discarded frequency.
143 ///
144 /// # Arguments
145 /// * `core` - Core that this function should run on.
146 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
147 /// * `num_samples` - Number of start and end `TscMoment`s to generate.
148 /// * `calibration_duration` - How long to sleep in between gathering start and end moments.
149 /// * `stdev_limit` - Number of standard deviations outside of which frequencies are discarded.
calibrate_tsc_frequency( rdtsc: fn() -> u64, core: usize, num_samples: usize, calibration_duration: Duration, stdev_limit: f64, ) -> std::result::Result<(i128, Vec<TscMoment>), TscCalibrationError>150 fn calibrate_tsc_frequency(
151     rdtsc: fn() -> u64,
152     core: usize,
153     num_samples: usize,
154     calibration_duration: Duration,
155     stdev_limit: f64,
156 ) -> std::result::Result<(i128, Vec<TscMoment>), TscCalibrationError> {
157     set_cpu_affinity(vec![core])
158         .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
159 
160     let starts: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
161 
162     std::thread::sleep(calibration_duration);
163 
164     let ends: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
165 
166     let mut freqs = Vec::with_capacity(num_samples * num_samples);
167     for start in &starts {
168         for end in &ends {
169             freqs.push(TscMoment::measure_tsc_frequency(start, end))
170         }
171     }
172 
173     let (lower_bound, upper_bound) = sort_and_get_bounds(&mut freqs, stdev_limit);
174 
175     let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples * num_samples);
176     let mut good_end_moments: HashSet<TscMoment> = HashSet::new();
177     for i in 0..num_samples {
178         for j in 0..num_samples {
179             let freq = freqs[i * num_samples + j];
180 
181             if lower_bound < (freq as f64) && (freq as f64) < upper_bound {
182                 good_end_moments.insert(ends[j]);
183                 good_samples.push(freq);
184             }
185         }
186     }
187 
188     Ok((
189         good_samples.iter().sum::<i128>() / good_samples.len() as i128,
190         Vec::from_iter(good_end_moments),
191     ))
192 }
193 
194 /// Measure the TSC offset for `core` from core 0 where `reference_moments` were gathered.
195 ///
196 /// This function first pins itself to `core`, then generates `num_samples` `TscMoment`s for this
197 /// core, and then measures the TSC offset between those moments and all `reference_moments`. Any
198 /// moments that are outside of `stddev_limit` standard deviations from the median offset are
199 /// discarded, because they may represent an interrupt that occurred while a TscMoment was
200 /// generated. The remaining offsets are averaged and returned as nanoseconds.
201 ///
202 /// # Arguments
203 /// * `core` - Core that this function should run on.
204 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
205 /// * `tsc_frequency` - TSC frequency measured from core 0.
206 /// * `reference_moments` - `TscMoment`s gathered from core 0.
207 /// * `num_samples` - Number of `TscMoment`s to generate on this thread for measuring the offset.
208 /// * `stdev_limit` - Number of standard deviations outside of which offsets are discarded.
measure_tsc_offset( core: usize, rdtsc: fn() -> u64, tsc_frequency: u64, reference_moments: Vec<TscMoment>, num_samples: usize, stdev_limit: f64, ) -> std::result::Result<i128, TscCalibrationError>209 fn measure_tsc_offset(
210     core: usize,
211     rdtsc: fn() -> u64,
212     tsc_frequency: u64,
213     reference_moments: Vec<TscMoment>,
214     num_samples: usize,
215     stdev_limit: f64,
216 ) -> std::result::Result<i128, TscCalibrationError> {
217     set_cpu_affinity(vec![core])
218         .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
219 
220     let mut diffs: Vec<i128> = Vec::with_capacity(num_samples);
221 
222     for _ in 0..num_samples {
223         let now = TscMoment::now(rdtsc);
224         for reference_moment in &reference_moments {
225             diffs.push(TscMoment::measure_tsc_offset(
226                 reference_moment,
227                 &now,
228                 tsc_frequency,
229             ));
230         }
231     }
232 
233     let (lower_bound, upper_bound) = sort_and_get_bounds(&mut diffs, stdev_limit);
234 
235     let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples);
236     for diff in &diffs {
237         if lower_bound < (*diff as f64) && (*diff as f64) < upper_bound {
238             good_samples.push(*diff);
239         }
240     }
241 
242     let average_diff = good_samples.iter().sum::<i128>() / good_samples.len() as i128;
243 
244     // Convert the diff to nanoseconds using the tsc_frequency
245     Ok(average_diff * 1_000_000_000 / tsc_frequency as i128)
246 }
247 
248 /// Calibrate the TSC state.
249 ///
250 /// This function first runs a TSC frequency calibration thread for 100ms, which is pinned to
251 /// core0. The TSC calibration thread returns both the calibrated frequency, as well as a Vec of
252 /// TscMoment objects which were validated to be accurate (meaning it's unlikely an interrupt
253 /// occurred between moment's `time` and `tsc` values). This function then runs a tsc offset
254 /// measurement thread for each core, which takes the TSC frequency and the Vec of TscMoments and
255 /// measures whether or not the TSC values for that core are offset from core 0, and by how much.
256 /// The frequency and the per-core offsets are returned as a TscState.
calibrate_tsc_state() -> Result<TscState>257 pub fn calibrate_tsc_state() -> Result<TscState> {
258     calibrate_tsc_state_inner(
259         rdtsc_safe,
260         (0..base::number_of_logical_cores().context("Failed to get number of logical cores")?)
261             .collect(),
262     )
263 }
264 
265 /// Actually calibrate the TSC state.
266 ///
267 /// This function takes a customizable version of rdtsc and a specific set of cores to calibrate,
268 /// which is helpful for testing calibration logic and error handling.
269 ///
270 /// # Arguments
271 ///
272 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
273 /// * `cores` - Cores to measure the TSC offset of.
calibrate_tsc_state_inner(rdtsc: fn() -> u64, cores: Vec<usize>) -> Result<TscState>274 fn calibrate_tsc_state_inner(rdtsc: fn() -> u64, cores: Vec<usize>) -> Result<TscState> {
275     // For loops can't return values unfortunately
276     let mut calibration_contents: Option<(u64, Vec<TscMoment>)> = None;
277     for core in &cores {
278         // Copy the value of core to a moveable variable now.
279         let moved_core = *core;
280         let handle = std::thread::Builder::new()
281             .name(format!("tsc_calibration_core_{}", core).to_string())
282             .spawn(move || {
283                 calibrate_tsc_frequency(
284                     rdtsc,
285                     moved_core,
286                     TSC_CALIBRATION_SAMPLES,
287                     TSC_CALIBRATION_DURATION,
288                     TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
289                 )
290             })
291             .map_err(|e| {
292                 anyhow!(
293                     "TSC frequency calibration thread for core {} failed: {:?}",
294                     core,
295                     e
296                 )
297             })?;
298 
299         match handle.join() {
300             Ok(calibrate_result) => match calibrate_result {
301                 Ok((freq, reference_moments)) => {
302                     if freq <= 0 {
303                         warn!(
304                             "TSC calibration on core {} resulted in TSC frequency of {} Hz, \
305                     trying on another core.",
306                             core, freq
307                         );
308                         continue;
309                     };
310                     calibration_contents = Some((freq as u64, reference_moments));
311                     break;
312                 }
313 
314                 Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
315                     // There are several legitimate reasons why it might not be possible for crosvm
316                     // to run on some cores:
317                     //  1. Some cores may be offline.
318                     //  2. On Windows, the process affinity mask may not contain all cores.
319                     //
320                     // We thus just warn in this situation.
321                     warn!(
322                         "Failed to set thread affinity to {} during tsc frequency calibration due \
323                             to {}. This core is probably offline.",
324                         core, err
325                     );
326                 }
327             },
328             // thread failed
329             Err(e) => {
330                 return Err(anyhow!(
331                     "TSC frequency calibration thread for core {} failed: {:?}",
332                     core,
333                     e
334                 ));
335             }
336         };
337     }
338 
339     let (freq, reference_moments) =
340         calibration_contents.ok_or(anyhow!("Failed to calibrate TSC frequency on all cores"))?;
341 
342     let mut offsets: Vec<(usize, i128)> = Vec::with_capacity(cores.len());
343     for core in cores {
344         let thread_reference_moments = reference_moments.clone();
345         let handle = std::thread::Builder::new()
346             .name(format!("measure_tsc_offset_core_{}", core).to_string())
347             .spawn(move || {
348                 measure_tsc_offset(
349                     core,
350                     rdtsc,
351                     freq,
352                     thread_reference_moments,
353                     TSC_CALIBRATION_SAMPLES,
354                     TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
355                 )
356             })
357             .map_err(|e| {
358                 anyhow!(
359                     "TSC offset measurement thread for core {} failed: {:?}",
360                     core,
361                     e
362                 )
363             })?;
364         let offset = match handle.join() {
365             // thread succeeded
366             Ok(measurement_result) => match measurement_result {
367                 Ok(offset) => Some(offset),
368                 Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
369                     // There are several legitimate reasons why it might not be possible for crosvm
370                     // to run on some cores:
371                     //  1. Some cores may be offline.
372                     //  2. On Windows, the process affinity mask may not contain all cores.
373                     //
374                     // We thus just warn in this situation.
375                     warn!(
376                         "Failed to set thread affinity to {} during tsc offset measurement due \
377                         to {}. This core is probably offline.",
378                         core, err
379                     );
380                     None
381                 }
382             },
383             // thread failed
384             Err(e) => {
385                 return Err(anyhow!(
386                     "TSC offset measurement thread for core {} failed: {:?}",
387                     core,
388                     e
389                 ));
390             }
391         };
392 
393         if let Some(offset) = offset {
394             offsets.push((core, offset));
395         }
396     }
397 
398     TscState::new(freq, offsets, TSC_OFFSET_GROUPING_THRESHOLD)
399 }
400 
401 #[cfg(test)]
402 mod tests {
403     use std::arch::x86_64::__rdtscp;
404     use std::arch::x86_64::_rdtsc;
405 
406     use super::*;
407 
408     const ACCEPTABLE_OFFSET_MEASUREMENT_ERROR: i128 = 2_000i128;
409 
410     #[test]
test_handle_offline_core()411     fn test_handle_offline_core() {
412         // This test imitates what would happen if a core is offline, and set_cpu_affinity fails.
413         // The calibration should not fail, and the extra core should not appear in the list of
414         // offsets.
415 
416         let num_cores =
417             base::number_of_logical_cores().expect("number of logical cores should not fail");
418 
419         let too_may_cores = num_cores + 2;
420         let host_state = calibrate_tsc_state_inner(rdtsc_safe, (0..too_may_cores).collect())
421             .expect("calibrate tsc state should not fail");
422 
423         // First assert that the number of offsets measured is at most num_cores (it might be
424         // less if the current host has some offline cores).
425         assert!(host_state.offsets.len() <= num_cores);
426 
427         for (core, _) in host_state.offsets {
428             // Assert that all offsets that we have are for cores 0..num_cores.
429             assert!(core < num_cores);
430         }
431     }
432 
433     #[test]
test_frequency_higher_than_u32()434     fn test_frequency_higher_than_u32() {
435         // This test is making sure that we're not truncating our TSC frequencies in the case that
436         // they are greater than u32::MAX.
437 
438         let host_state = calibrate_tsc_state_inner(
439             rdtsc_safe,
440             (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
441                 .collect(),
442         )
443         .expect("failed to calibrate host freq");
444 
445         // We use a static multiplier of 1000 here because the function has to be static (fn).
446         // 1000 should work for tsc frequency > 4.2MHz, which should apply to basically any
447         // processor. This if statement checks and bails early if that's not the case.
448         if host_state.frequency * 1000 < (u32::MAX as u64) {
449             return;
450         }
451 
452         fn rdtsc_frequency_higher_than_u32() -> u64 {
453             // SAFETY: trivially safe
454             unsafe { _rdtsc() }.wrapping_mul(1000)
455         }
456 
457         let state = calibrate_tsc_state_inner(
458             rdtsc_frequency_higher_than_u32,
459             (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
460                 .collect(),
461         )
462         .unwrap();
463 
464         let expected_freq = host_state.frequency * 1000;
465         let margin_of_error = expected_freq / 100;
466         assert!(state.frequency < expected_freq + margin_of_error);
467         assert!(state.frequency > expected_freq - margin_of_error);
468     }
469 
470     #[test]
471     #[ignore]
test_offset_identification_core_0()472     fn test_offset_identification_core_0() {
473         fn rdtsc_with_core_0_offset_by_100_000() -> u64 {
474             let mut id = 0u32;
475             // SAFETY: trivially safe
476             let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
477             if id == 0 {
478                 value += 100_000;
479             }
480 
481             value
482         }
483 
484         // This test only works if the host has >=2 logical cores.
485         let num_cores =
486             base::number_of_logical_cores().expect("Failed to get number of logical cores");
487         if num_cores < 2 {
488             return;
489         }
490 
491         let state = calibrate_tsc_state_inner(
492             rdtsc_with_core_0_offset_by_100_000,
493             (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
494                 .collect(),
495         )
496         .unwrap();
497 
498         for core in 0..num_cores {
499             let expected_offset_ns = if core > 0 {
500                 -100_000i128 * 1_000_000_000i128 / state.frequency as i128
501             } else {
502                 0i128
503             };
504             assert!(
505                 state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
506             );
507             assert!(
508                 state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
509             );
510         }
511     }
512 
513     #[test]
514     #[ignore]
test_offset_identification_core_1()515     fn test_offset_identification_core_1() {
516         fn rdtsc_with_core_1_offset_by_100_000() -> u64 {
517             let mut id = 0u32;
518             // SAFETY: trivially safe
519             let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
520             if id == 1 {
521                 value += 100_000;
522             }
523 
524             value
525         }
526 
527         // This test only works if the host has >=2 logical cores.
528         let num_cores =
529             base::number_of_logical_cores().expect("Failed to get number of logical cores");
530         if num_cores < 2 {
531             return;
532         }
533 
534         let state = calibrate_tsc_state_inner(
535             rdtsc_with_core_1_offset_by_100_000,
536             (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
537                 .collect(),
538         )
539         .unwrap();
540 
541         for core in 0..num_cores {
542             let expected_offset_ns = if core == 1 {
543                 100_000i128 * 1_000_000_000i128 / state.frequency as i128
544             } else {
545                 0i128
546             };
547             assert!(
548                 state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
549             );
550             assert!(
551                 state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
552             );
553         }
554     }
555 }
556