1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! Handles operations using platform Time Stamp Counter (TSC).
6
7 // TODO(b/213149158): Remove after uses are added.
8 #![allow(dead_code)]
9
10 use std::arch::x86_64::_rdtsc;
11
12 use anyhow::anyhow;
13 use anyhow::Result;
14 use base::debug;
15 use base::error;
16 use once_cell::sync::Lazy;
17
18 mod calibrate;
19 mod cpuid;
20 mod grouping;
21
22 pub use calibrate::*;
23 pub use cpuid::*;
24
rdtsc_safe() -> u6425 fn rdtsc_safe() -> u64 {
26 // SAFETY:
27 // Safe because _rdtsc takes no arguments
28 unsafe { _rdtsc() }
29 }
30
31 // Singleton for getting the state of the host TSCs, to avoid calibrating multiple times.
32 static TSC_STATE: Lazy<Option<TscState>> = Lazy::new(|| match calibrate_tsc_state() {
33 Ok(tsc_state) => {
34 debug!("Using calibrated tsc frequency: {} Hz", tsc_state.frequency);
35 for (core, offset) in tsc_state.offsets.iter().enumerate() {
36 debug!("Core {} has tsc offset of {:?} ns", core, offset);
37 }
38 Some(tsc_state)
39 }
40 Err(e) => {
41 error!("Failed to calibrate tsc state: {:#}", e);
42 None
43 }
44 });
45
46 /// Returns the frequency of the host TSC. Calibration only happens once.
tsc_frequency() -> Result<u64>47 pub fn tsc_frequency() -> Result<u64> {
48 let state = TSC_STATE
49 .as_ref()
50 .ok_or(anyhow!("TSC calibration failed"))?;
51 Ok(state.frequency)
52 }
53
54 /// Returns the state of the host TSCs. Calibration only happens once.
tsc_state() -> Result<TscState>55 pub fn tsc_state() -> Result<TscState> {
56 Ok(TSC_STATE
57 .as_ref()
58 .ok_or(anyhow!("TSC calibration failed"))?
59 .clone())
60 }
61
62 #[derive(Default, Debug)]
63 pub struct TscSyncMitigations {
64 /// Vec of per-vcpu affinities to apply to each vcpu thread. If None, no affinity should be
65 /// applied.
66 pub affinities: Vec<Option<Vec<usize>>>,
67 /// Vec of TSC offsets to set on each vcpu. If None, no offset should be applied.
68 pub offsets: Vec<Option<u64>>,
69 }
70
71 impl TscSyncMitigations {
new(num_vcpus: usize) -> Self72 fn new(num_vcpus: usize) -> Self {
73 TscSyncMitigations {
74 affinities: vec![None; num_vcpus],
75 offsets: vec![None; num_vcpus],
76 }
77 }
78
get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>>79 pub fn get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>> {
80 self.affinities.get(cpu_id).unwrap().clone()
81 }
82
get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64>83 pub fn get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64> {
84 *self.offsets.get(cpu_id).unwrap()
85 }
86 }
87
88 /// Given the state of the host TSCs in `tsc_state`, and the number of vcpus that are intended to
89 /// be run, return a set of affinities and TSC offsets to apply to those vcpus.
get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations90 pub fn get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations {
91 tsc_sync_mitigations_inner(tsc_state, num_vcpus, rdtsc_safe)
92 }
93
tsc_sync_mitigations_inner( tsc_state: &TscState, num_vcpus: usize, rdtsc: fn() -> u64, ) -> TscSyncMitigations94 fn tsc_sync_mitigations_inner(
95 tsc_state: &TscState,
96 num_vcpus: usize,
97 rdtsc: fn() -> u64,
98 ) -> TscSyncMitigations {
99 let mut mitigations = TscSyncMitigations::new(num_vcpus);
100 // If there's only one core grouping that means all the TSCs are in sync and no mitigations are
101 // needed.
102 if tsc_state.core_grouping.size() == 1 {
103 return mitigations;
104 }
105
106 let largest_group = tsc_state.core_grouping.largest_group();
107 let num_cores = tsc_state.offsets.len();
108
109 // If the largest core group is larger than the number of vcpus, just pin all vcpus to that core
110 // group, and no need to set offsets.
111 if largest_group.cores.len() >= num_vcpus {
112 let affinity: Vec<usize> = largest_group.cores.iter().map(|core| core.core).collect();
113 for i in 0..num_vcpus {
114 mitigations.affinities[i] = Some(affinity.clone());
115 }
116 } else {
117 // Otherwise, we pin each vcpu to a core and set it's offset to compensate.
118 let host_tsc_now = rdtsc();
119
120 for i in 0..num_vcpus {
121 // This handles the case where num_vcpus > num_cores, even though we try to avoid that
122 // in practice.
123 let pinned_core = i % num_cores;
124
125 mitigations.affinities[i] = Some(vec![pinned_core]);
126 // The guest TSC value is calculated like so:
127 // host_tsc + tsc_offset = guest_tsc
128 // If we assume that each host core has it's own error (core_offset), then it's more
129 // like this:
130 // host_tsc + core_offset + tsc_offset = guest_tsc
131 // We want guest_tsc to be 0 at boot, so the formula is this:
132 // host_tsc + core_offset + tsc_offset = 0
133 // and then you subtract host_tsc and core_offset from both sides and you get:
134 // tsc_offset = 0 - host_tsc - core_offset
135 mitigations.offsets[i] = Some(
136 0u64.wrapping_sub(host_tsc_now)
137 // Note: wrapping_add and casting tsc_state from an i64 to a u64 should be the
138 // same as using the future wrapping_add_signed function, which is only in
139 // nightly. This should be switched to using wrapping_add_signed once that is
140 // in stable.
141 .wrapping_add(tsc_state.offsets[pinned_core].1.wrapping_neg() as i64 as u64),
142 );
143 }
144 }
145
146 mitigations
147 }
148
149 #[cfg(test)]
150 mod tests {
151 use std::time::Duration;
152
153 use super::*;
154 use crate::tsc::grouping::CoreGroup;
155 use crate::tsc::grouping::CoreGrouping;
156 use crate::tsc::grouping::CoreOffset;
157
158 #[test]
test_sync_mitigation_set_offsets()159 fn test_sync_mitigation_set_offsets() {
160 let offsets = vec![(0, 0), (1, 1000), (2, -1000), (3, 2000)];
161 // frequency of 1GHz means 20 nanos is 20 ticks
162 let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
163 .expect("TscState::new should not fail for this test");
164
165 assert_eq!(
166 state.core_grouping,
167 CoreGrouping::new(vec![
168 CoreGroup {
169 cores: vec![CoreOffset {
170 core: 2,
171 offset: -1000
172 }]
173 },
174 CoreGroup {
175 cores: vec![CoreOffset { core: 0, offset: 0 }]
176 },
177 CoreGroup {
178 cores: vec![CoreOffset {
179 core: 1,
180 offset: 1000
181 }]
182 },
183 CoreGroup {
184 cores: vec![CoreOffset {
185 core: 3,
186 offset: 2000
187 }]
188 },
189 ])
190 .expect("CoreGrouping::new should not fail here")
191 );
192
193 fn fake_rdtsc() -> u64 {
194 u64::MAX
195 }
196
197 let mitigations = tsc_sync_mitigations_inner(&state, 4, fake_rdtsc);
198
199 // core offsets are:
200 // - core 0: has an offset of 0, so TSC offset = 0 - u64::MAX - 0 = 1
201 // - core 1: has an offset of 1000, so TSC offset = 0 - u64::MAX - 1000 = -999
202 // - core 2: has an offset of -1000, so TSC offset = 0 - u64::MAX + 1000 = 1001
203 // - core 3: has an offset of 2000, so TSC offset = 0 - u64::MAX - 2000 = -1999
204 let expected = [1, 1u64.wrapping_sub(1000), 1001u64, 1u64.wrapping_sub(2000)];
205
206 for (i, expect) in expected.iter().enumerate() {
207 assert_eq!(
208 mitigations
209 .get_vcpu_tsc_offset(i)
210 .unwrap_or_else(|| panic!("core {} should have an offset of {}", i, expect)),
211 *expect
212 );
213
214 assert_eq!(
215 mitigations
216 .get_vcpu_affinity(i)
217 .unwrap_or_else(|| panic!("core {} should have an affinity of [{}]", i, i)),
218 vec![i]
219 );
220 }
221 }
222
223 #[test]
test_sync_mitigation_large_group()224 fn test_sync_mitigation_large_group() {
225 // 8 cores, and cores 1,3,5,7 are in-sync at offset -1000
226 let offsets = vec![
227 (0, 0),
228 (1, -1000),
229 (2, 1000),
230 (3, -1000),
231 (4, 2000),
232 (5, -1000),
233 (6, 3000),
234 (7, -1000),
235 ];
236 // frequency of 1GHz means 20 nanos is 20 ticks
237 let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
238 .expect("TscState::new should not fail for this test");
239
240 assert_eq!(
241 state.core_grouping,
242 CoreGrouping::new(vec![
243 CoreGroup {
244 cores: vec![
245 CoreOffset {
246 core: 1,
247 offset: -1000
248 },
249 CoreOffset {
250 core: 3,
251 offset: -1000
252 },
253 CoreOffset {
254 core: 5,
255 offset: -1000
256 },
257 CoreOffset {
258 core: 7,
259 offset: -1000
260 }
261 ]
262 },
263 CoreGroup {
264 cores: vec![CoreOffset { core: 0, offset: 0 }]
265 },
266 CoreGroup {
267 cores: vec![CoreOffset {
268 core: 2,
269 offset: 1000
270 }]
271 },
272 CoreGroup {
273 cores: vec![CoreOffset {
274 core: 4,
275 offset: 2000
276 }]
277 },
278 CoreGroup {
279 cores: vec![CoreOffset {
280 core: 6,
281 offset: 3000
282 }]
283 },
284 ])
285 .expect("CoreGrouping::new should not fail here")
286 );
287
288 fn fake_rdtsc() -> u64 {
289 u64::MAX
290 }
291
292 let num_vcpus = 4;
293 let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
294
295 let expected_affinity = vec![1, 3, 5, 7];
296 for i in 0..num_vcpus {
297 assert_eq!(
298 mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
299 "core {} should have an affinity of {:?}",
300 i, expected_affinity
301 )),
302 expected_affinity
303 );
304 assert_eq!(mitigations.get_vcpu_tsc_offset(i), None);
305 }
306 }
307
308 #[test]
more_vcpus_than_cores()309 fn more_vcpus_than_cores() {
310 // 4 cores, two can be grouped but it doesn't matter because we'll have more vcpus than
311 // the largest group.
312 let offsets = vec![(0, 0), (1, 0), (2, 1000), (3, 2000)];
313 // frequency of 1GHz means 20 nanos is 20 ticks
314 let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
315 .expect("TscState::new should not fail for this test");
316
317 assert_eq!(
318 state.core_grouping,
319 CoreGrouping::new(vec![
320 CoreGroup {
321 cores: vec![
322 CoreOffset { core: 0, offset: 0 },
323 CoreOffset { core: 1, offset: 0 }
324 ]
325 },
326 CoreGroup {
327 cores: vec![CoreOffset {
328 core: 2,
329 offset: 1000
330 }]
331 },
332 CoreGroup {
333 cores: vec![CoreOffset {
334 core: 3,
335 offset: 2000
336 }]
337 },
338 ])
339 .expect("CoreGrouping::new should not fail here")
340 );
341
342 fn fake_rdtsc() -> u64 {
343 u64::MAX
344 }
345
346 // 8 vcpus, more than we have cores
347 let num_vcpus = 8;
348 let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
349 let expected_offsets = [1, 1, 1u64.wrapping_sub(1000), 1u64.wrapping_sub(2000)];
350
351 for i in 0..num_vcpus {
352 assert_eq!(
353 mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
354 "core {} should have an affinity of {:?}",
355 i,
356 i % 4
357 )),
358 // expected affinity is the vcpu modulo 4
359 vec![i % 4]
360 );
361 assert_eq!(
362 mitigations.get_vcpu_tsc_offset(i).unwrap_or_else(|| panic!(
363 "core {} should have an offset of {:?}",
364 i,
365 expected_offsets[i % 4]
366 )),
367 expected_offsets[i % 4]
368 );
369 }
370 }
371 }
372