1 // Copyright 2019 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::convert::TryInto; 6 7 use anyhow::Context; 8 use base::error; 9 use base::info; 10 use base::AsRawDescriptor; 11 use base::Error as SysError; 12 use base::Event; 13 use base::RawDescriptor; 14 use base::Tube; 15 use base::TubeError; 16 use bit_field::*; 17 use remain::sorted; 18 use serde::Deserialize; 19 use serde::Serialize; 20 use thiserror::Error; 21 use vm_control::VmIrqRequest; 22 use vm_control::VmIrqResponse; 23 use zerocopy::AsBytes; 24 use zerocopy::FromBytes; 25 use zerocopy::FromZeroes; 26 27 use crate::pci::pci_configuration::PciCapConfig; 28 use crate::pci::pci_configuration::PciCapConfigWriteResult; 29 use crate::pci::PciCapability; 30 use crate::pci::PciCapabilityID; 31 32 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 33 pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 34 pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 35 pub const BITS_PER_PBA_ENTRY: usize = 64; 36 const FUNCTION_MASK_BIT: u16 = 0x4000; 37 const MSIX_ENABLE_BIT: u16 = 0x8000; 38 const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1; 39 40 #[derive(Serialize, Deserialize, Clone, Default)] 41 struct MsixTableEntry { 42 msg_addr_lo: u32, 43 msg_addr_hi: u32, 44 msg_data: u32, 45 vector_ctl: u32, 46 } 47 48 impl MsixTableEntry { masked(&self) -> bool49 fn masked(&self) -> bool { 50 self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT 51 } 52 } 53 54 struct IrqfdGsi { 55 irqfd: Event, 56 gsi: u32, 57 } 58 59 /// Wrapper over MSI-X Capability Structure and MSI-X Tables 60 pub struct MsixConfig { 61 table_entries: Vec<MsixTableEntry>, 62 pba_entries: Vec<u64>, 63 irq_vec: Vec<Option<IrqfdGsi>>, 64 masked: bool, 65 enabled: bool, 66 msi_device_socket: Tube, 67 msix_num: u16, 68 pci_id: u32, 69 device_name: String, 70 } 71 72 #[derive(Serialize, Deserialize)] 73 struct MsixConfigSnapshot { 74 table_entries: Vec<MsixTableEntry>, 75 pba_entries: Vec<u64>, 76 /// Just like MsixConfig::irq_vec, but only the GSI. 77 irq_gsi_vec: Vec<Option<u32>>, 78 masked: bool, 79 enabled: bool, 80 msix_num: u16, 81 pci_id: u32, 82 device_name: String, 83 } 84 85 #[sorted] 86 #[derive(Error, Debug)] 87 pub enum MsixError { 88 #[error("AddMsiRoute failed: {0}")] 89 AddMsiRoute(SysError), 90 #[error("failed to receive AddMsiRoute response: {0}")] 91 AddMsiRouteRecv(TubeError), 92 #[error("failed to send AddMsiRoute request: {0}")] 93 AddMsiRouteSend(TubeError), 94 #[error("AllocateOneMsi failed: {0}")] 95 AllocateOneMsi(SysError), 96 #[error("failed to receive AllocateOneMsi response: {0}")] 97 AllocateOneMsiRecv(TubeError), 98 #[error("failed to send AllocateOneMsi request: {0}")] 99 AllocateOneMsiSend(TubeError), 100 #[error("failed to deserialize snapshot: {0}")] 101 DeserializationFailed(serde_json::Error), 102 #[error("invalid vector length in snapshot: {0}")] 103 InvalidVectorLength(std::num::TryFromIntError), 104 #[error("ReleaseOneIrq failed: {0}")] 105 ReleaseOneIrq(base::Error), 106 #[error("failed to receive ReleaseOneIrq response: {0}")] 107 ReleaseOneIrqRecv(TubeError), 108 #[error("failed to send ReleaseOneIrq request: {0}")] 109 ReleaseOneIrqSend(TubeError), 110 } 111 112 type MsixResult<T> = std::result::Result<T, MsixError>; 113 114 #[derive(Copy, Clone)] 115 pub enum MsixStatus { 116 Changed, 117 EntryChanged(usize), 118 NothingToDo, 119 } 120 121 impl PciCapConfigWriteResult for MsixStatus {} 122 123 impl MsixConfig { new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self124 pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self { 125 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 126 127 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 128 table_entries.resize_with(msix_vectors as usize, Default::default); 129 table_entries 130 .iter_mut() 131 .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT); 132 let mut pba_entries: Vec<u64> = Vec::new(); 133 let num_pba_entries: usize = 134 ((msix_vectors as usize) + BITS_PER_PBA_ENTRY - 1) / BITS_PER_PBA_ENTRY; 135 pba_entries.resize_with(num_pba_entries, Default::default); 136 137 let mut irq_vec = Vec::new(); 138 irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>); 139 140 MsixConfig { 141 table_entries, 142 pba_entries, 143 irq_vec, 144 masked: false, 145 enabled: false, 146 msi_device_socket: vm_socket, 147 msix_num: msix_vectors, 148 pci_id, 149 device_name, 150 } 151 } 152 153 /// Get the number of MSI-X vectors in this configuration. num_vectors(&self) -> u16154 pub fn num_vectors(&self) -> u16 { 155 self.msix_num 156 } 157 158 /// Check whether the Function Mask bit in Message Control word in set or not. 159 /// if 1, all of the vectors associated with the function are masked, 160 /// regardless of their per-vector Mask bit states. 161 /// If 0, each vector's Mask bit determines whether the vector is masked or not. masked(&self) -> bool162 pub fn masked(&self) -> bool { 163 self.masked 164 } 165 166 /// Check whether the Function Mask bit in MSIX table Message Control 167 /// word in set or not. 168 /// If true, the vector is masked. 169 /// If false, the vector is unmasked. table_masked(&self, index: usize) -> bool170 pub fn table_masked(&self, index: usize) -> bool { 171 if index >= self.table_entries.len() { 172 true 173 } else { 174 self.table_entries[index].masked() 175 } 176 } 177 178 /// Check whether the MSI-X Enable bit in Message Control word in set or not. 179 /// if 1, the function is permitted to use MSI-X to request service. enabled(&self) -> bool180 pub fn enabled(&self) -> bool { 181 self.enabled 182 } 183 184 /// Read the MSI-X Capability Structure. 185 /// The top 2 bits in Message Control word are emulated and all other 186 /// bits are read only. read_msix_capability(&self, data: u32) -> u32187 pub fn read_msix_capability(&self, data: u32) -> u32 { 188 let mut msg_ctl = (data >> 16) as u16; 189 msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT); 190 191 if self.enabled { 192 msg_ctl |= MSIX_ENABLE_BIT; 193 } 194 if self.masked { 195 msg_ctl |= FUNCTION_MASK_BIT; 196 } 197 (msg_ctl as u32) << 16 | (data & u16::MAX as u32) 198 } 199 200 /// Write to the MSI-X Capability Structure. 201 /// Only the top 2 bits in Message Control Word are writable. write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus202 pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 203 if offset == 2 && data.len() == 2 { 204 let reg = u16::from_le_bytes([data[0], data[1]]); 205 let old_masked = self.masked; 206 let old_enabled = self.enabled; 207 208 self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT; 209 self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT; 210 211 if !old_enabled && self.enabled { 212 if let Err(e) = self.msix_enable_all() { 213 error!("failed to enable MSI-X: {}", e); 214 self.enabled = false; 215 } 216 } 217 218 // If the Function Mask bit was set, and has just been cleared, it's 219 // important to go through the entire PBA to check if there was any 220 // pending MSI-X message to inject, given that the vector is not 221 // masked. 222 if old_masked && !self.masked { 223 for (index, entry) in self.table_entries.clone().iter().enumerate() { 224 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 225 self.inject_msix_and_clear_pba(index); 226 } 227 } 228 return MsixStatus::Changed; 229 } else if !old_masked && self.masked { 230 return MsixStatus::Changed; 231 } 232 } else { 233 error!( 234 "invalid write to MSI-X Capability Structure offset {:x}", 235 offset 236 ); 237 } 238 MsixStatus::NothingToDo 239 } 240 241 /// Create a snapshot of the current MsixConfig struct for use in 242 /// snapshotting. snapshot(&mut self) -> anyhow::Result<serde_json::Value>243 pub fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> { 244 serde_json::to_value(MsixConfigSnapshot { 245 table_entries: self.table_entries.clone(), 246 pba_entries: self.pba_entries.clone(), 247 masked: self.masked, 248 enabled: self.enabled, 249 msix_num: self.msix_num, 250 pci_id: self.pci_id, 251 device_name: self.device_name.clone(), 252 irq_gsi_vec: self 253 .irq_vec 254 .iter() 255 .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi)) 256 .collect(), 257 }) 258 .context("failed to serialize MsixConfigSnapshot") 259 } 260 261 /// Restore a MsixConfig struct based on a snapshot. In short, this will 262 /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they 263 /// will be re-wired to the irq chip). restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()>264 pub fn restore(&mut self, snapshot: serde_json::Value) -> MsixResult<()> { 265 let snapshot: MsixConfigSnapshot = 266 serde_json::from_value(snapshot).map_err(MsixError::DeserializationFailed)?; 267 268 self.table_entries = snapshot.table_entries; 269 self.pba_entries = snapshot.pba_entries; 270 self.masked = snapshot.masked; 271 self.enabled = snapshot.enabled; 272 self.msix_num = snapshot.msix_num; 273 self.pci_id = snapshot.pci_id; 274 self.device_name = snapshot.device_name; 275 276 self.msix_release_all()?; 277 self.irq_vec 278 .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>); 279 for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() { 280 if let Some(gsi_num) = gsi { 281 self.msix_restore_one(vector, *gsi_num)?; 282 } else { 283 info!( 284 "skipping restore of vector {} for device {}", 285 vector, self.device_name 286 ); 287 } 288 } 289 Ok(()) 290 } 291 292 /// Restore the specified MSI-X vector. 293 /// 294 /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for 295 /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must 296 /// have passed those checks. msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()>297 fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> { 298 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 299 let request = VmIrqRequest::AllocateOneMsiAtGsi { 300 irqfd, 301 gsi, 302 device_id: self.pci_id, 303 queue_id: index, 304 device_name: self.device_name.clone(), 305 }; 306 self.msi_device_socket 307 .send(&request) 308 .map_err(MsixError::AllocateOneMsiSend)?; 309 if let VmIrqResponse::Err(e) = self 310 .msi_device_socket 311 .recv() 312 .map_err(MsixError::AllocateOneMsiRecv)? 313 { 314 return Err(MsixError::AllocateOneMsi(e)); 315 }; 316 317 self.irq_vec[index] = Some(IrqfdGsi { 318 irqfd: match request { 319 VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd, 320 _ => unreachable!(), 321 }, 322 gsi, 323 }); 324 self.add_msi_route(index as u16, gsi)?; 325 Ok(()) 326 } 327 328 /// On warm restore, there could already be MSIs registered. We need to 329 /// release them in case the routing has changed (e.g. different 330 /// data <-> GSI). msix_release_all(&mut self) -> MsixResult<()>331 fn msix_release_all(&mut self) -> MsixResult<()> { 332 for irqfd_gsi in self.irq_vec.drain(..).flatten() { 333 let request = VmIrqRequest::ReleaseOneIrq { 334 gsi: irqfd_gsi.gsi, 335 irqfd: irqfd_gsi.irqfd, 336 }; 337 338 self.msi_device_socket 339 .send(&request) 340 .map_err(MsixError::ReleaseOneIrqSend)?; 341 if let VmIrqResponse::Err(e) = self 342 .msi_device_socket 343 .recv() 344 .map_err(MsixError::ReleaseOneIrqRecv)? 345 { 346 return Err(MsixError::ReleaseOneIrq(e)); 347 } 348 } 349 Ok(()) 350 } 351 add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()>352 fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> { 353 let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; 354 self.read_msix_table((index * 16).into(), data.as_mut()); 355 let msi_address: u64 = u64::from_le_bytes(data); 356 let mut data: [u8; 4] = [0, 0, 0, 0]; 357 self.read_msix_table((index * 16 + 8).into(), data.as_mut()); 358 let msi_data: u32 = u32::from_le_bytes(data); 359 360 if msi_address == 0 { 361 return Ok(()); 362 } 363 364 self.msi_device_socket 365 .send(&VmIrqRequest::AddMsiRoute { 366 gsi, 367 msi_address, 368 msi_data, 369 }) 370 .map_err(MsixError::AddMsiRouteSend)?; 371 if let VmIrqResponse::Err(e) = self 372 .msi_device_socket 373 .recv() 374 .map_err(MsixError::AddMsiRouteRecv)? 375 { 376 return Err(MsixError::AddMsiRoute(e)); 377 } 378 Ok(()) 379 } 380 381 // Enable MSI-X msix_enable_all(&mut self) -> MsixResult<()>382 fn msix_enable_all(&mut self) -> MsixResult<()> { 383 for index in 0..self.irq_vec.len() { 384 self.msix_enable_one(index)?; 385 } 386 Ok(()) 387 } 388 389 // Use a new MSI-X vector 390 // Create a new eventfd and bind them to a new msi msix_enable_one(&mut self, index: usize) -> MsixResult<()>391 fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> { 392 if self.irq_vec[index].is_some() 393 || !self.enabled() 394 || self.masked() 395 || self.table_masked(index) 396 { 397 return Ok(()); 398 } 399 let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?; 400 let request = VmIrqRequest::AllocateOneMsi { 401 irqfd, 402 device_id: self.pci_id, 403 queue_id: index, 404 device_name: self.device_name.clone(), 405 }; 406 self.msi_device_socket 407 .send(&request) 408 .map_err(MsixError::AllocateOneMsiSend)?; 409 let irq_num: u32 = match self 410 .msi_device_socket 411 .recv() 412 .map_err(MsixError::AllocateOneMsiRecv)? 413 { 414 VmIrqResponse::AllocateOneMsi { gsi } => gsi, 415 VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)), 416 _ => unreachable!(), 417 }; 418 self.irq_vec[index] = Some(IrqfdGsi { 419 irqfd: match request { 420 VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd, 421 _ => unreachable!(), 422 }, 423 gsi: irq_num, 424 }); 425 426 self.add_msi_route(index as u16, irq_num)?; 427 Ok(()) 428 } 429 430 /// Read MSI-X table 431 /// # Arguments 432 /// * 'offset' - the offset within the MSI-X Table 433 /// * 'data' - used to store the read results 434 /// 435 /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full 436 /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined. 437 /// 438 /// location: DWORD3 DWORD2 DWORD1 DWORD0 439 /// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr 440 /// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr 441 /// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr 442 /// ... read_msix_table(&self, offset: u64, data: &mut [u8])443 pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) { 444 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 445 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 446 447 match data.len() { 448 4 => { 449 let value = match modulo_offset { 450 0x0 => self.table_entries[index].msg_addr_lo, 451 0x4 => self.table_entries[index].msg_addr_hi, 452 0x8 => self.table_entries[index].msg_data, 453 0xc => self.table_entries[index].vector_ctl, 454 _ => { 455 error!("invalid offset"); 456 0 457 } 458 }; 459 460 data.copy_from_slice(&value.to_le_bytes()); 461 } 462 8 => { 463 let value = match modulo_offset { 464 0x0 => { 465 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 466 | u64::from(self.table_entries[index].msg_addr_lo) 467 } 468 0x8 => { 469 (u64::from(self.table_entries[index].vector_ctl) << 32) 470 | u64::from(self.table_entries[index].msg_data) 471 } 472 _ => { 473 error!("invalid offset"); 474 0 475 } 476 }; 477 478 data.copy_from_slice(&value.to_le_bytes()); 479 } 480 _ => error!("invalid data length"), 481 }; 482 } 483 484 /// Write to MSI-X table 485 /// 486 /// Message Address: the contents of this field specifies the address 487 /// for the memory write transaction; different MSI-X vectors have 488 /// different Message Address values 489 /// Message Data: the contents of this field specifies the data driven 490 /// on AD\[31::00\] during the memory write transaction's data phase. 491 /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit 492 /// is set, the function is prohibited from sending a message using 493 /// this MSI-X Table entry. write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus494 pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus { 495 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 496 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 497 498 // Store the value of the entry before modification 499 let old_entry = self.table_entries[index].clone(); 500 501 match data.len() { 502 4 => { 503 let value = u32::from_le_bytes(data.try_into().unwrap()); 504 match modulo_offset { 505 0x0 => self.table_entries[index].msg_addr_lo = value, 506 0x4 => self.table_entries[index].msg_addr_hi = value, 507 0x8 => self.table_entries[index].msg_data = value, 508 0xc => self.table_entries[index].vector_ctl = value, 509 _ => error!("invalid offset"), 510 }; 511 } 512 8 => { 513 let value = u64::from_le_bytes(data.try_into().unwrap()); 514 match modulo_offset { 515 0x0 => { 516 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 517 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 518 } 519 0x8 => { 520 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 521 self.table_entries[index].vector_ctl = (value >> 32) as u32; 522 } 523 _ => error!("invalid offset"), 524 }; 525 } 526 _ => error!("invalid data length"), 527 }; 528 529 let new_entry = self.table_entries[index].clone(); 530 531 // This MSI-X vector is enabled for the first time. 532 if self.enabled() 533 && !self.masked() 534 && self.irq_vec[index].is_none() 535 && old_entry.masked() 536 && !new_entry.masked() 537 { 538 if let Err(e) = self.msix_enable_one(index) { 539 error!("failed to enable MSI-X vector {}: {}", index, e); 540 self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT; 541 } 542 return MsixStatus::EntryChanged(index); 543 } 544 545 if self.enabled() 546 && (old_entry.msg_addr_lo != new_entry.msg_addr_lo 547 || old_entry.msg_addr_hi != new_entry.msg_addr_hi 548 || old_entry.msg_data != new_entry.msg_data) 549 { 550 if let Some(irqfd_gsi) = &self.irq_vec[index] { 551 let irq_num = irqfd_gsi.gsi; 552 if let Err(e) = self.add_msi_route(index as u16, irq_num) { 553 error!("add_msi_route failed: {}", e); 554 } 555 } 556 } 557 558 // After the MSI-X table entry has been updated, it is necessary to 559 // check if the vector control masking bit has changed. In case the 560 // bit has been flipped from 1 to 0, we need to inject a MSI message 561 // if the corresponding pending bit from the PBA is set. Once the MSI 562 // has been injected, the pending bit in the PBA needs to be cleared. 563 // All of this is valid only if MSI-X has not been masked for the whole 564 // device. 565 566 // Check if bit has been flipped 567 if !self.masked() { 568 if old_entry.masked() && !self.table_entries[index].masked() { 569 if self.get_pba_bit(index as u16) == 1 { 570 self.inject_msix_and_clear_pba(index); 571 } 572 return MsixStatus::EntryChanged(index); 573 } else if !old_entry.masked() && self.table_entries[index].masked() { 574 return MsixStatus::EntryChanged(index); 575 } 576 } 577 MsixStatus::NothingToDo 578 } 579 580 /// Read PBA Entries 581 /// # Arguments 582 /// * 'offset' - the offset within the PBA entries 583 /// * 'data' - used to store the read results 584 /// 585 /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function 586 /// has a pending message for the associated MSI-X Table entry. read_pba_entries(&self, offset: u64, data: &mut [u8])587 pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) { 588 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 589 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 590 591 match data.len() { 592 4 => { 593 let value: u32 = match modulo_offset { 594 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 595 0x4 => (self.pba_entries[index] >> 32) as u32, 596 _ => { 597 error!("invalid offset"); 598 0 599 } 600 }; 601 602 data.copy_from_slice(&value.to_le_bytes()); 603 } 604 8 => { 605 let value: u64 = match modulo_offset { 606 0x0 => self.pba_entries[index], 607 _ => { 608 error!("invalid offset"); 609 0 610 } 611 }; 612 613 data.copy_from_slice(&value.to_le_bytes()); 614 } 615 _ => error!("invalid data length"), 616 } 617 } 618 619 /// Write to PBA Entries 620 /// 621 /// Software should never write, and should only read Pending Bits. 622 /// If software writes to Pending Bits, the result is undefined. write_pba_entries(&mut self, _offset: u64, _data: &[u8])623 pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) { 624 error!("Pending Bit Array is read only"); 625 } 626 set_pba_bit(&mut self, vector: u16, set: bool)627 fn set_pba_bit(&mut self, vector: u16, set: bool) { 628 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 629 630 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 631 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 632 let mut mask: u64 = (1 << shift) as u64; 633 634 if set { 635 self.pba_entries[index] |= mask; 636 } else { 637 mask = !mask; 638 self.pba_entries[index] &= mask; 639 } 640 } 641 get_pba_bit(&self, vector: u16) -> u8642 fn get_pba_bit(&self, vector: u16) -> u8 { 643 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 644 645 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 646 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 647 648 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 649 } 650 inject_msix_and_clear_pba(&mut self, vector: usize)651 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 652 if let Some(irq) = &self.irq_vec[vector] { 653 irq.irqfd.signal().unwrap(); 654 } 655 656 // Clear the bit from PBA 657 self.set_pba_bit(vector as u16, false); 658 } 659 660 /// Inject virtual interrupt to the guest 661 /// 662 /// # Arguments 663 /// * 'vector' - the index to the MSI-X Table entry 664 /// 665 /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is 666 /// prohibited from sending the associated message, and the function 667 /// must set the associated Pending bit whenever the function would 668 /// otherwise send the message. When software unmasks a vector whose 669 /// associated Pending bit is set, the function must schedule sending 670 /// the associated message, and clear the Pending bit as soon as the 671 /// message has been sent. 672 /// 673 /// If the vector is unmasked, writing to irqfd which wakes up KVM to 674 /// inject virtual interrupt to the guest. trigger(&mut self, vector: u16)675 pub fn trigger(&mut self, vector: u16) { 676 if self.table_entries[vector as usize].masked() || self.masked() { 677 self.set_pba_bit(vector, true); 678 } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) { 679 irq.irqfd.signal().unwrap(); 680 } 681 } 682 683 /// Return the raw descriptor of the MSI device socket get_msi_socket(&self) -> RawDescriptor684 pub fn get_msi_socket(&self) -> RawDescriptor { 685 self.msi_device_socket.as_raw_descriptor() 686 } 687 688 /// Return irqfd of MSI-X Table entry 689 /// 690 /// # Arguments 691 /// * 'vector' - the index to the MSI-X table entry get_irqfd(&self, vector: usize) -> Option<&Event>692 pub fn get_irqfd(&self, vector: usize) -> Option<&Event> { 693 match self.irq_vec.get(vector).unwrap_or(&None) { 694 Some(irq) => Some(&irq.irqfd), 695 None => None, 696 } 697 } 698 destroy(&mut self)699 pub fn destroy(&mut self) { 700 while let Some(irq) = self.irq_vec.pop() { 701 if let Some(irq) = irq { 702 let request = VmIrqRequest::ReleaseOneIrq { 703 gsi: irq.gsi, 704 irqfd: irq.irqfd, 705 }; 706 if self.msi_device_socket.send(&request).is_err() { 707 continue; 708 } 709 let _ = self.msi_device_socket.recv::<VmIrqResponse>(); 710 } 711 } 712 } 713 } 714 715 const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0]; 716 717 impl PciCapConfig for MsixConfig { read_mask(&self) -> &'static [u32]718 fn read_mask(&self) -> &'static [u32] { 719 &MSIX_CONFIG_READ_MASK 720 } 721 read_reg(&self, reg_idx: usize) -> u32722 fn read_reg(&self, reg_idx: usize) -> u32 { 723 if reg_idx == 0 { 724 self.read_msix_capability(0) 725 } else { 726 0 727 } 728 } 729 write_reg( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option<Box<dyn PciCapConfigWriteResult>>730 fn write_reg( 731 &mut self, 732 reg_idx: usize, 733 offset: u64, 734 data: &[u8], 735 ) -> Option<Box<dyn PciCapConfigWriteResult>> { 736 let status = if reg_idx == 0 { 737 self.write_msix_capability(offset, data) 738 } else { 739 MsixStatus::NothingToDo 740 }; 741 Some(Box::new(status)) 742 } 743 } 744 745 impl AsRawDescriptor for MsixConfig { as_raw_descriptor(&self) -> RawDescriptor746 fn as_raw_descriptor(&self) -> RawDescriptor { 747 self.msi_device_socket.as_raw_descriptor() 748 } 749 } 750 751 /// Message Control Register 752 // 10-0: MSI-X Table size 753 // 13-11: Reserved 754 // 14: Mask. Mask all MSI-X when set. 755 // 15: Enable. Enable all MSI-X when set. 756 // See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details. 757 #[bitfield] 758 #[derive(Copy, Clone, Default, AsBytes, FromZeroes, FromBytes)] 759 pub struct MsixCtrl { 760 table_size: B10, 761 reserved: B4, 762 mask: B1, 763 enable: B1, 764 } 765 766 #[allow(dead_code)] 767 #[repr(C)] 768 #[derive(Clone, Copy, Default, AsBytes, FromZeroes, FromBytes)] 769 /// MSI-X Capability Structure 770 pub struct MsixCap { 771 // To make add_capability() happy 772 _cap_vndr: u8, 773 _cap_next: u8, 774 // Message Control Register 775 msg_ctl: MsixCtrl, 776 // Table. Contains the offset and the BAR indicator (BIR) 777 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 778 // 31-3: Table offset in the BAR pointed by the BIR. 779 table: u32, 780 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 781 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 782 // 31-3: PBA offset in the BAR pointed by the BIR. 783 pba: u32, 784 } 785 786 impl PciCapability for MsixCap { bytes(&self) -> &[u8]787 fn bytes(&self) -> &[u8] { 788 self.as_bytes() 789 } 790 id(&self) -> PciCapabilityID791 fn id(&self) -> PciCapabilityID { 792 PciCapabilityID::Msix 793 } 794 writable_bits(&self) -> Vec<u32>795 fn writable_bits(&self) -> Vec<u32> { 796 // Only msg_ctl[15:14] is writable 797 vec![0x3000_0000, 0, 0] 798 } 799 } 800 801 impl MsixCap { new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self802 pub fn new( 803 table_pci_bar: u8, 804 table_size: u16, 805 table_off: u32, 806 pba_pci_bar: u8, 807 pba_off: u32, 808 ) -> Self { 809 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 810 811 // Set the table size and enable MSI-X. 812 let mut msg_ctl = MsixCtrl::new(); 813 msg_ctl.set_enable(1); 814 // Table Size is N - 1 encoded. 815 msg_ctl.set_table_size(table_size - 1); 816 817 MsixCap { 818 _cap_vndr: 0, 819 _cap_next: 0, 820 msg_ctl, 821 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 822 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 823 } 824 } 825 } 826 827 #[cfg(test)] 828 mod tests { 829 830 use std::thread; 831 832 use super::*; 833 834 #[track_caller] recv_allocate_msi(t: &Tube) -> u32835 fn recv_allocate_msi(t: &Tube) -> u32 { 836 match t.recv::<VmIrqRequest>().unwrap() { 837 VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi, 838 msg => panic!("unexpected irqchip message: {:?}", msg), 839 } 840 } 841 842 struct MsiRouteDetails { 843 gsi: u32, 844 msi_address: u64, 845 msi_data: u32, 846 } 847 848 #[track_caller] recv_add_msi_route(t: &Tube) -> MsiRouteDetails849 fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails { 850 match t.recv::<VmIrqRequest>().unwrap() { 851 VmIrqRequest::AddMsiRoute { 852 gsi, 853 msi_address, 854 msi_data, 855 } => MsiRouteDetails { 856 gsi, 857 msi_address, 858 msi_data, 859 }, 860 msg => panic!("unexpected irqchip message: {:?}", msg), 861 } 862 } 863 864 #[track_caller] recv_release_one_irq(t: &Tube) -> u32865 fn recv_release_one_irq(t: &Tube) -> u32 { 866 match t.recv::<VmIrqRequest>().unwrap() { 867 VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi, 868 msg => panic!("unexpected irqchip message: {:?}", msg), 869 } 870 } 871 872 #[track_caller] send_ok(t: &Tube)873 fn send_ok(t: &Tube) { 874 t.send(&VmIrqResponse::Ok).unwrap(); 875 } 876 877 /// Tests a cold restore where there are no existing vectors at the time 878 /// restore is called. 879 #[test] verify_msix_restore_cold_smoke()880 fn verify_msix_restore_cold_smoke() { 881 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 882 let (_unused, unused_config_tube) = Tube::pair().unwrap(); 883 884 let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned()); 885 886 // Set up two MSI-X vectors (0 and 1). 887 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 888 cfg.table_entries[0].msg_data = 0xd0; 889 cfg.table_entries[0].msg_addr_lo = 0xa0; 890 cfg.table_entries[0].msg_addr_hi = 0; 891 cfg.table_entries[1].msg_data = 0xd1; 892 cfg.table_entries[1].msg_addr_lo = 0xa1; 893 cfg.table_entries[1].msg_addr_hi = 0; 894 895 // Pretend that these vectors were hooked up to GSIs 10 & 20, 896 // respectively. 897 cfg.irq_vec = vec![ 898 Some(IrqfdGsi { 899 gsi: 10, 900 irqfd: Event::new().unwrap(), 901 }), 902 Some(IrqfdGsi { 903 gsi: 20, 904 irqfd: Event::new().unwrap(), 905 }), 906 ]; 907 908 // Take a snapshot of MsixConfig. 909 let snapshot = cfg.snapshot().unwrap(); 910 911 // Create a fake irqchip to respond to our requests 912 let irqchip_fake = thread::spawn(move || { 913 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 914 send_ok(&irqchip_tube); 915 let route_one = recv_add_msi_route(&irqchip_tube); 916 assert_eq!(route_one.gsi, 10); 917 assert_eq!(route_one.msi_address, 0xa0); 918 assert_eq!(route_one.msi_data, 0xd0); 919 send_ok(&irqchip_tube); 920 921 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 922 send_ok(&irqchip_tube); 923 let route_two = recv_add_msi_route(&irqchip_tube); 924 assert_eq!(route_two.gsi, 20); 925 assert_eq!(route_two.msi_address, 0xa1); 926 assert_eq!(route_two.msi_data, 0xd1); 927 send_ok(&irqchip_tube); 928 irqchip_tube 929 }); 930 931 let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned()); 932 restored_cfg.restore(snapshot).unwrap(); 933 irqchip_fake.join().unwrap(); 934 935 assert_eq!(restored_cfg.pci_id, 0); 936 assert_eq!(restored_cfg.device_name, "test_device"); 937 } 938 939 /// Tests a warm restore where there are existing vectors at the time 940 /// restore is called. These vectors need to be released first. 941 #[test] verify_msix_restore_warm_smoke()942 fn verify_msix_restore_warm_smoke() { 943 let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap(); 944 945 let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned()); 946 947 // Set up two MSI-X vectors (0 and 1). 948 // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM. 949 cfg.table_entries[0].msg_data = 0xd0; 950 cfg.table_entries[0].msg_addr_lo = 0xa0; 951 cfg.table_entries[0].msg_addr_hi = 0; 952 cfg.table_entries[1].msg_data = 0xd1; 953 cfg.table_entries[1].msg_addr_lo = 0xa1; 954 cfg.table_entries[1].msg_addr_hi = 0; 955 956 // Pretend that these vectors were hooked up to GSIs 10 & 20, 957 // respectively. 958 cfg.irq_vec = vec![ 959 Some(IrqfdGsi { 960 gsi: 10, 961 irqfd: Event::new().unwrap(), 962 }), 963 Some(IrqfdGsi { 964 gsi: 20, 965 irqfd: Event::new().unwrap(), 966 }), 967 ]; 968 969 // Take a snapshot of MsixConfig. 970 let snapshot = cfg.snapshot().unwrap(); 971 972 // Create a fake irqchip to respond to our requests 973 let irqchip_fake = thread::spawn(move || { 974 // First, we free the existing vectors / GSIs. 975 assert_eq!(recv_release_one_irq(&irqchip_tube), 10); 976 send_ok(&irqchip_tube); 977 assert_eq!(recv_release_one_irq(&irqchip_tube), 20); 978 send_ok(&irqchip_tube); 979 980 // Now we re-allocate them. 981 assert_eq!(recv_allocate_msi(&irqchip_tube), 10); 982 send_ok(&irqchip_tube); 983 let route_one = recv_add_msi_route(&irqchip_tube); 984 assert_eq!(route_one.gsi, 10); 985 assert_eq!(route_one.msi_address, 0xa0); 986 assert_eq!(route_one.msi_data, 0xd0); 987 send_ok(&irqchip_tube); 988 989 assert_eq!(recv_allocate_msi(&irqchip_tube), 20); 990 send_ok(&irqchip_tube); 991 let route_two = recv_add_msi_route(&irqchip_tube); 992 assert_eq!(route_two.gsi, 20); 993 assert_eq!(route_two.msi_address, 0xa1); 994 assert_eq!(route_two.msi_data, 0xd1); 995 send_ok(&irqchip_tube); 996 irqchip_tube 997 }); 998 999 cfg.restore(snapshot).unwrap(); 1000 irqchip_fake.join().unwrap(); 1001 1002 assert_eq!(cfg.pci_id, 0); 1003 assert_eq!(cfg.device_name, "test_device"); 1004 } 1005 } 1006