1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::HashSet;
6 use std::iter::FromIterator;
7 use std::time::Duration;
8 use std::time::Instant;
9
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use anyhow::Result;
13 use base::set_cpu_affinity;
14 use base::warn;
15 use remain::sorted;
16 use thiserror::Error;
17
18 use super::grouping::*;
19 use super::rdtsc_safe;
20
21 const TSC_CALIBRATION_SAMPLES: usize = 10;
22 const TSC_CALIBRATION_DURATION: Duration = Duration::from_millis(100);
23 // remove data that is outside 3 standard deviations off the median
24 const TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT: f64 = 3.0;
25 // We consider two TSC cores to be in sync if they are within 2 microseconds of each other.
26 // An optimal context switch takes about 1-3 microseconds.
27 const TSC_OFFSET_GROUPING_THRESHOLD: Duration = Duration::from_micros(2);
28
29 #[sorted]
30 #[derive(Error, Debug)]
31 pub enum TscCalibrationError {
32 /// Received `err` when setting the cpu affinity to `core`
33 #[error("failed to set thread cpu affinity to core {core}: {err}")]
34 SetCpuAffinityError { core: usize, err: base::Error },
35 }
36
37 /// Get the standard deviation of a `Vec<T>`.
standard_deviation<T: num_traits::ToPrimitive + num_traits::Num + Copy>(items: &[T]) -> f6438 pub fn standard_deviation<T: num_traits::ToPrimitive + num_traits::Num + Copy>(items: &[T]) -> f64 {
39 let sum: T = items.iter().fold(T::zero(), |acc: T, elem| acc + *elem);
40 let count = items.len();
41
42 let mean: f64 = sum.to_f64().unwrap_or(0.0) / count as f64;
43
44 let variance = items
45 .iter()
46 .map(|x| {
47 let diff = mean - (x.to_f64().unwrap_or(0.0));
48 diff * diff
49 })
50 .sum::<f64>();
51 (variance / count as f64).sqrt()
52 }
53
sort_and_get_bounds(items: &mut [i128], stdev_limit: f64) -> (f64, f64)54 fn sort_and_get_bounds(items: &mut [i128], stdev_limit: f64) -> (f64, f64) {
55 items.sort_unstable();
56 let median = items[items.len() / 2];
57
58 let standard_deviation = standard_deviation(items);
59 let lower_bound = median as f64 - stdev_limit * standard_deviation;
60 let upper_bound = median as f64 + stdev_limit * standard_deviation;
61 (lower_bound, upper_bound)
62 }
63
64 /// Represents the host monotonic time and the TSC value at a single moment in time.
65 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
66 struct TscMoment {
67 time: Instant,
68 tsc: u64,
69 }
70
71 impl TscMoment {
now(rdtsc: fn() -> u64) -> Self72 fn now(rdtsc: fn() -> u64) -> Self {
73 TscMoment {
74 time: Instant::now(),
75 tsc: rdtsc(),
76 }
77 }
78
79 /// Measure the tsc frequency using two `TscMoment`s.
measure_tsc_frequency(first: &TscMoment, second: &TscMoment) -> i12880 fn measure_tsc_frequency(first: &TscMoment, second: &TscMoment) -> i128 {
81 // handle case where first is actually second in time
82 let (first, second) = if first.time > second.time {
83 (second, first)
84 } else {
85 (first, second)
86 };
87
88 let time_delta = second.time - first.time;
89 let tsc_delta = second.tsc as i128 - first.tsc as i128;
90
91 tsc_delta * 1_000_000_000i128 / time_delta.as_nanos() as i128
92 }
93
94 /// Measure the tsc offset using two `TscMoment`s and the TSC frequency.
measure_tsc_offset(first: &TscMoment, second: &TscMoment, tsc_frequency: u64) -> i12895 fn measure_tsc_offset(first: &TscMoment, second: &TscMoment, tsc_frequency: u64) -> i128 {
96 // handle case where first is actually second in time
97 let (first, second) = if first.time > second.time {
98 (second, first)
99 } else {
100 (first, second)
101 };
102
103 let tsc_delta = second.tsc as i128 - first.tsc as i128;
104 let time_delta_as_tsc_ticks =
105 (second.time - first.time).as_nanos() * tsc_frequency as u128 / 1_000_000_000u128;
106 tsc_delta - time_delta_as_tsc_ticks as i128
107 }
108 }
109
110 #[derive(Default, Debug, Clone)]
111 pub struct TscState {
112 pub frequency: u64,
113 pub offsets: Vec<(usize, i128)>,
114 pub core_grouping: CoreGrouping,
115 }
116
117 impl TscState {
new( tsc_frequency: u64, offsets: Vec<(usize, i128)>, in_sync_threshold: Duration, ) -> Result<Self>118 pub(crate) fn new(
119 tsc_frequency: u64,
120 offsets: Vec<(usize, i128)>,
121 in_sync_threshold: Duration,
122 ) -> Result<Self> {
123 let core_grouping = group_core_offsets(&offsets, in_sync_threshold, tsc_frequency)
124 .context("Failed to group cores by their TSC offsets")?;
125 Ok(TscState {
126 frequency: tsc_frequency,
127 offsets,
128 core_grouping,
129 })
130 }
131 }
132
133 /// Calibrate the TSC frequency of `core`.
134 ///
135 /// This function first pins itself to `core`, generates `num_samples` start `TscMoment`s, sleeps
136 /// for `calibration_duration`, and then generates `num_samples` end `TscMoment`s. For each pair
137 /// of start and end moments, a TSC frequency value is calculated. Any frequencies that are
138 /// outside of `stddev_limit` standard deviations from the median offset are discarded, because
139 /// they may represent an interrupt that occurred while a TscMoment was generated. The remaining
140 /// non-discarded frequencies are then averaged. The function returns the TSC frequency average, as
141 /// well as a Vec of `TscMoment`s, which are all of the end moments that were associated with at
142 /// least one non-discarded frequency.
143 ///
144 /// # Arguments
145 /// * `core` - Core that this function should run on.
146 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
147 /// * `num_samples` - Number of start and end `TscMoment`s to generate.
148 /// * `calibration_duration` - How long to sleep in between gathering start and end moments.
149 /// * `stdev_limit` - Number of standard deviations outside of which frequencies are discarded.
calibrate_tsc_frequency( rdtsc: fn() -> u64, core: usize, num_samples: usize, calibration_duration: Duration, stdev_limit: f64, ) -> std::result::Result<(i128, Vec<TscMoment>), TscCalibrationError>150 fn calibrate_tsc_frequency(
151 rdtsc: fn() -> u64,
152 core: usize,
153 num_samples: usize,
154 calibration_duration: Duration,
155 stdev_limit: f64,
156 ) -> std::result::Result<(i128, Vec<TscMoment>), TscCalibrationError> {
157 set_cpu_affinity(vec![core])
158 .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
159
160 let starts: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
161
162 std::thread::sleep(calibration_duration);
163
164 let ends: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
165
166 let mut freqs = Vec::with_capacity(num_samples * num_samples);
167 for start in &starts {
168 for end in &ends {
169 freqs.push(TscMoment::measure_tsc_frequency(start, end))
170 }
171 }
172
173 let (lower_bound, upper_bound) = sort_and_get_bounds(&mut freqs, stdev_limit);
174
175 let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples * num_samples);
176 let mut good_end_moments: HashSet<TscMoment> = HashSet::new();
177 for i in 0..num_samples {
178 for j in 0..num_samples {
179 let freq = freqs[i * num_samples + j];
180
181 if lower_bound < (freq as f64) && (freq as f64) < upper_bound {
182 good_end_moments.insert(ends[j]);
183 good_samples.push(freq);
184 }
185 }
186 }
187
188 Ok((
189 good_samples.iter().sum::<i128>() / good_samples.len() as i128,
190 Vec::from_iter(good_end_moments),
191 ))
192 }
193
194 /// Measure the TSC offset for `core` from core 0 where `reference_moments` were gathered.
195 ///
196 /// This function first pins itself to `core`, then generates `num_samples` `TscMoment`s for this
197 /// core, and then measures the TSC offset between those moments and all `reference_moments`. Any
198 /// moments that are outside of `stddev_limit` standard deviations from the median offset are
199 /// discarded, because they may represent an interrupt that occurred while a TscMoment was
200 /// generated. The remaining offsets are averaged and returned as nanoseconds.
201 ///
202 /// # Arguments
203 /// * `core` - Core that this function should run on.
204 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
205 /// * `tsc_frequency` - TSC frequency measured from core 0.
206 /// * `reference_moments` - `TscMoment`s gathered from core 0.
207 /// * `num_samples` - Number of `TscMoment`s to generate on this thread for measuring the offset.
208 /// * `stdev_limit` - Number of standard deviations outside of which offsets are discarded.
measure_tsc_offset( core: usize, rdtsc: fn() -> u64, tsc_frequency: u64, reference_moments: Vec<TscMoment>, num_samples: usize, stdev_limit: f64, ) -> std::result::Result<i128, TscCalibrationError>209 fn measure_tsc_offset(
210 core: usize,
211 rdtsc: fn() -> u64,
212 tsc_frequency: u64,
213 reference_moments: Vec<TscMoment>,
214 num_samples: usize,
215 stdev_limit: f64,
216 ) -> std::result::Result<i128, TscCalibrationError> {
217 set_cpu_affinity(vec![core])
218 .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
219
220 let mut diffs: Vec<i128> = Vec::with_capacity(num_samples);
221
222 for _ in 0..num_samples {
223 let now = TscMoment::now(rdtsc);
224 for reference_moment in &reference_moments {
225 diffs.push(TscMoment::measure_tsc_offset(
226 reference_moment,
227 &now,
228 tsc_frequency,
229 ));
230 }
231 }
232
233 let (lower_bound, upper_bound) = sort_and_get_bounds(&mut diffs, stdev_limit);
234
235 let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples);
236 for diff in &diffs {
237 if lower_bound < (*diff as f64) && (*diff as f64) < upper_bound {
238 good_samples.push(*diff);
239 }
240 }
241
242 let average_diff = good_samples.iter().sum::<i128>() / good_samples.len() as i128;
243
244 // Convert the diff to nanoseconds using the tsc_frequency
245 Ok(average_diff * 1_000_000_000 / tsc_frequency as i128)
246 }
247
248 /// Calibrate the TSC state.
249 ///
250 /// This function first runs a TSC frequency calibration thread for 100ms, which is pinned to
251 /// core0. The TSC calibration thread returns both the calibrated frequency, as well as a Vec of
252 /// TscMoment objects which were validated to be accurate (meaning it's unlikely an interrupt
253 /// occurred between moment's `time` and `tsc` values). This function then runs a tsc offset
254 /// measurement thread for each core, which takes the TSC frequency and the Vec of TscMoments and
255 /// measures whether or not the TSC values for that core are offset from core 0, and by how much.
256 /// The frequency and the per-core offsets are returned as a TscState.
calibrate_tsc_state() -> Result<TscState>257 pub fn calibrate_tsc_state() -> Result<TscState> {
258 calibrate_tsc_state_inner(
259 rdtsc_safe,
260 (0..base::number_of_logical_cores().context("Failed to get number of logical cores")?)
261 .collect(),
262 )
263 }
264
265 /// Actually calibrate the TSC state.
266 ///
267 /// This function takes a customizable version of rdtsc and a specific set of cores to calibrate,
268 /// which is helpful for testing calibration logic and error handling.
269 ///
270 /// # Arguments
271 ///
272 /// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
273 /// * `cores` - Cores to measure the TSC offset of.
calibrate_tsc_state_inner(rdtsc: fn() -> u64, cores: Vec<usize>) -> Result<TscState>274 fn calibrate_tsc_state_inner(rdtsc: fn() -> u64, cores: Vec<usize>) -> Result<TscState> {
275 // For loops can't return values unfortunately
276 let mut calibration_contents: Option<(u64, Vec<TscMoment>)> = None;
277 for core in &cores {
278 // Copy the value of core to a moveable variable now.
279 let moved_core = *core;
280 let handle = std::thread::Builder::new()
281 .name(format!("tsc_calibration_core_{}", core).to_string())
282 .spawn(move || {
283 calibrate_tsc_frequency(
284 rdtsc,
285 moved_core,
286 TSC_CALIBRATION_SAMPLES,
287 TSC_CALIBRATION_DURATION,
288 TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
289 )
290 })
291 .map_err(|e| {
292 anyhow!(
293 "TSC frequency calibration thread for core {} failed: {:?}",
294 core,
295 e
296 )
297 })?;
298
299 match handle.join() {
300 Ok(calibrate_result) => match calibrate_result {
301 Ok((freq, reference_moments)) => {
302 if freq <= 0 {
303 warn!(
304 "TSC calibration on core {} resulted in TSC frequency of {} Hz, \
305 trying on another core.",
306 core, freq
307 );
308 continue;
309 };
310 calibration_contents = Some((freq as u64, reference_moments));
311 break;
312 }
313
314 Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
315 // There are several legitimate reasons why it might not be possible for crosvm
316 // to run on some cores:
317 // 1. Some cores may be offline.
318 // 2. On Windows, the process affinity mask may not contain all cores.
319 //
320 // We thus just warn in this situation.
321 warn!(
322 "Failed to set thread affinity to {} during tsc frequency calibration due \
323 to {}. This core is probably offline.",
324 core, err
325 );
326 }
327 },
328 // thread failed
329 Err(e) => {
330 return Err(anyhow!(
331 "TSC frequency calibration thread for core {} failed: {:?}",
332 core,
333 e
334 ));
335 }
336 };
337 }
338
339 let (freq, reference_moments) =
340 calibration_contents.ok_or(anyhow!("Failed to calibrate TSC frequency on all cores"))?;
341
342 let mut offsets: Vec<(usize, i128)> = Vec::with_capacity(cores.len());
343 for core in cores {
344 let thread_reference_moments = reference_moments.clone();
345 let handle = std::thread::Builder::new()
346 .name(format!("measure_tsc_offset_core_{}", core).to_string())
347 .spawn(move || {
348 measure_tsc_offset(
349 core,
350 rdtsc,
351 freq,
352 thread_reference_moments,
353 TSC_CALIBRATION_SAMPLES,
354 TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
355 )
356 })
357 .map_err(|e| {
358 anyhow!(
359 "TSC offset measurement thread for core {} failed: {:?}",
360 core,
361 e
362 )
363 })?;
364 let offset = match handle.join() {
365 // thread succeeded
366 Ok(measurement_result) => match measurement_result {
367 Ok(offset) => Some(offset),
368 Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
369 // There are several legitimate reasons why it might not be possible for crosvm
370 // to run on some cores:
371 // 1. Some cores may be offline.
372 // 2. On Windows, the process affinity mask may not contain all cores.
373 //
374 // We thus just warn in this situation.
375 warn!(
376 "Failed to set thread affinity to {} during tsc offset measurement due \
377 to {}. This core is probably offline.",
378 core, err
379 );
380 None
381 }
382 },
383 // thread failed
384 Err(e) => {
385 return Err(anyhow!(
386 "TSC offset measurement thread for core {} failed: {:?}",
387 core,
388 e
389 ));
390 }
391 };
392
393 if let Some(offset) = offset {
394 offsets.push((core, offset));
395 }
396 }
397
398 TscState::new(freq, offsets, TSC_OFFSET_GROUPING_THRESHOLD)
399 }
400
401 #[cfg(test)]
402 mod tests {
403 use std::arch::x86_64::__rdtscp;
404 use std::arch::x86_64::_rdtsc;
405
406 use super::*;
407
408 const ACCEPTABLE_OFFSET_MEASUREMENT_ERROR: i128 = 2_000i128;
409
410 #[test]
test_handle_offline_core()411 fn test_handle_offline_core() {
412 // This test imitates what would happen if a core is offline, and set_cpu_affinity fails.
413 // The calibration should not fail, and the extra core should not appear in the list of
414 // offsets.
415
416 let num_cores =
417 base::number_of_logical_cores().expect("number of logical cores should not fail");
418
419 let too_may_cores = num_cores + 2;
420 let host_state = calibrate_tsc_state_inner(rdtsc_safe, (0..too_may_cores).collect())
421 .expect("calibrate tsc state should not fail");
422
423 // First assert that the number of offsets measured is at most num_cores (it might be
424 // less if the current host has some offline cores).
425 assert!(host_state.offsets.len() <= num_cores);
426
427 for (core, _) in host_state.offsets {
428 // Assert that all offsets that we have are for cores 0..num_cores.
429 assert!(core < num_cores);
430 }
431 }
432
433 #[test]
test_frequency_higher_than_u32()434 fn test_frequency_higher_than_u32() {
435 // This test is making sure that we're not truncating our TSC frequencies in the case that
436 // they are greater than u32::MAX.
437
438 let host_state = calibrate_tsc_state_inner(
439 rdtsc_safe,
440 (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
441 .collect(),
442 )
443 .expect("failed to calibrate host freq");
444
445 // We use a static multiplier of 1000 here because the function has to be static (fn).
446 // 1000 should work for tsc frequency > 4.2MHz, which should apply to basically any
447 // processor. This if statement checks and bails early if that's not the case.
448 if host_state.frequency * 1000 < (u32::MAX as u64) {
449 return;
450 }
451
452 fn rdtsc_frequency_higher_than_u32() -> u64 {
453 // SAFETY: trivially safe
454 unsafe { _rdtsc() }.wrapping_mul(1000)
455 }
456
457 let state = calibrate_tsc_state_inner(
458 rdtsc_frequency_higher_than_u32,
459 (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
460 .collect(),
461 )
462 .unwrap();
463
464 let expected_freq = host_state.frequency * 1000;
465 let margin_of_error = expected_freq / 100;
466 assert!(state.frequency < expected_freq + margin_of_error);
467 assert!(state.frequency > expected_freq - margin_of_error);
468 }
469
470 #[test]
471 #[ignore]
test_offset_identification_core_0()472 fn test_offset_identification_core_0() {
473 fn rdtsc_with_core_0_offset_by_100_000() -> u64 {
474 let mut id = 0u32;
475 // SAFETY: trivially safe
476 let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
477 if id == 0 {
478 value += 100_000;
479 }
480
481 value
482 }
483
484 // This test only works if the host has >=2 logical cores.
485 let num_cores =
486 base::number_of_logical_cores().expect("Failed to get number of logical cores");
487 if num_cores < 2 {
488 return;
489 }
490
491 let state = calibrate_tsc_state_inner(
492 rdtsc_with_core_0_offset_by_100_000,
493 (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
494 .collect(),
495 )
496 .unwrap();
497
498 for core in 0..num_cores {
499 let expected_offset_ns = if core > 0 {
500 -100_000i128 * 1_000_000_000i128 / state.frequency as i128
501 } else {
502 0i128
503 };
504 assert!(
505 state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
506 );
507 assert!(
508 state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
509 );
510 }
511 }
512
513 #[test]
514 #[ignore]
test_offset_identification_core_1()515 fn test_offset_identification_core_1() {
516 fn rdtsc_with_core_1_offset_by_100_000() -> u64 {
517 let mut id = 0u32;
518 // SAFETY: trivially safe
519 let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
520 if id == 1 {
521 value += 100_000;
522 }
523
524 value
525 }
526
527 // This test only works if the host has >=2 logical cores.
528 let num_cores =
529 base::number_of_logical_cores().expect("Failed to get number of logical cores");
530 if num_cores < 2 {
531 return;
532 }
533
534 let state = calibrate_tsc_state_inner(
535 rdtsc_with_core_1_offset_by_100_000,
536 (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
537 .collect(),
538 )
539 .unwrap();
540
541 for core in 0..num_cores {
542 let expected_offset_ns = if core == 1 {
543 100_000i128 * 1_000_000_000i128 / state.frequency as i128
544 } else {
545 0i128
546 };
547 assert!(
548 state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
549 );
550 assert!(
551 state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
552 );
553 }
554 }
555 }
556