1 // Copyright 2017 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::collections::BTreeMap; 6 use std::fs::OpenOptions; 7 use std::os::unix::prelude::OpenOptionsExt; 8 9 use anyhow::anyhow; 10 use anyhow::Context; 11 use base::error; 12 use base::open_file_or_duplicate; 13 use base::warn; 14 use base::AsRawDescriptor; 15 use base::Event; 16 use base::RawDescriptor; 17 use base::WorkerThread; 18 use data_model::Le64; 19 use serde::Deserialize; 20 use serde::Serialize; 21 use vhost::Vhost; 22 use vhost::Vsock as VhostVsockHandle; 23 use vm_memory::GuestMemory; 24 use zerocopy::AsBytes; 25 26 use super::worker::VringBase; 27 use super::worker::Worker; 28 use super::Error; 29 use super::Result; 30 use crate::virtio::copy_config; 31 use crate::virtio::device_constants::vsock::NUM_QUEUES; 32 use crate::virtio::vsock::VsockConfig; 33 use crate::virtio::DeviceType; 34 use crate::virtio::Interrupt; 35 use crate::virtio::Queue; 36 use crate::virtio::VirtioDevice; 37 38 const QUEUE_SIZE: u16 = 256; 39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES]; 40 41 pub struct Vsock { 42 worker_thread: Option<WorkerThread<Worker<VhostVsockHandle>>>, 43 vhost_handle: Option<VhostVsockHandle>, 44 cid: u64, 45 interrupts: Option<Vec<Event>>, 46 avail_features: u64, 47 acked_features: u64, 48 // vrings_base states: 49 // None - device was just created or is running. 50 // Some - device was put to sleep after running or was restored. 51 vrings_base: Option<Vec<VringBase>>, 52 // Some iff the device is active and awake. 53 event_queue: Option<Queue>, 54 // If true, we should send a TRANSPORT_RESET event to the guest at the next opportunity. 55 needs_transport_reset: bool, 56 } 57 58 #[derive(Serialize, Deserialize)] 59 struct VsockSnapshot { 60 cid: u64, 61 avail_features: u64, 62 acked_features: u64, 63 vrings_base: Vec<VringBase>, 64 } 65 66 impl Vsock { 67 /// Create a new virtio-vsock device with the given VM cid. new(base_features: u64, vsock_config: &VsockConfig) -> anyhow::Result<Vsock>68 pub fn new(base_features: u64, vsock_config: &VsockConfig) -> anyhow::Result<Vsock> { 69 let device_file = open_file_or_duplicate( 70 &vsock_config.vhost_device, 71 OpenOptions::new() 72 .read(true) 73 .write(true) 74 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK), 75 ) 76 .with_context(|| { 77 format!( 78 "failed to open virtual socket device {}", 79 vsock_config.vhost_device.display(), 80 ) 81 })?; 82 83 let handle = VhostVsockHandle::new(device_file); 84 85 let avail_features = base_features; 86 87 let mut interrupts = Vec::new(); 88 for _ in 0..NUM_QUEUES { 89 interrupts.push(Event::new().map_err(Error::VhostIrqCreate)?); 90 } 91 92 Ok(Vsock { 93 worker_thread: None, 94 vhost_handle: Some(handle), 95 cid: vsock_config.cid, 96 interrupts: Some(interrupts), 97 avail_features, 98 acked_features: 0, 99 vrings_base: None, 100 event_queue: None, 101 needs_transport_reset: false, 102 }) 103 } 104 new_for_testing(cid: u64, features: u64) -> Vsock105 pub fn new_for_testing(cid: u64, features: u64) -> Vsock { 106 Vsock { 107 worker_thread: None, 108 vhost_handle: None, 109 cid, 110 interrupts: None, 111 avail_features: features, 112 acked_features: 0, 113 vrings_base: None, 114 event_queue: None, 115 needs_transport_reset: false, 116 } 117 } 118 acked_features(&self) -> u64119 pub fn acked_features(&self) -> u64 { 120 self.acked_features 121 } 122 } 123 124 impl VirtioDevice for Vsock { keep_rds(&self) -> Vec<RawDescriptor>125 fn keep_rds(&self) -> Vec<RawDescriptor> { 126 let mut keep_rds = Vec::new(); 127 128 if let Some(handle) = &self.vhost_handle { 129 keep_rds.push(handle.as_raw_descriptor()); 130 } 131 132 if let Some(interrupt) = &self.interrupts { 133 for vhost_int in interrupt.iter() { 134 keep_rds.push(vhost_int.as_raw_descriptor()); 135 } 136 } 137 138 keep_rds 139 } 140 device_type(&self) -> DeviceType141 fn device_type(&self) -> DeviceType { 142 DeviceType::Vsock 143 } 144 queue_max_sizes(&self) -> &[u16]145 fn queue_max_sizes(&self) -> &[u16] { 146 QUEUE_SIZES 147 } 148 features(&self) -> u64149 fn features(&self) -> u64 { 150 self.avail_features 151 } 152 read_config(&self, offset: u64, data: &mut [u8])153 fn read_config(&self, offset: u64, data: &mut [u8]) { 154 let cid = Le64::from(self.cid); 155 copy_config(data, 0, cid.as_bytes(), offset); 156 } 157 ack_features(&mut self, value: u64)158 fn ack_features(&mut self, value: u64) { 159 let mut v = value; 160 161 // Check if the guest is ACK'ing a feature that we didn't claim to have. 162 let unrequested_features = v & !self.avail_features; 163 if unrequested_features != 0 { 164 warn!("vsock: virtio-vsock got unknown feature ack: {:x}", v); 165 166 // Don't count these features as acked. 167 v &= !unrequested_features; 168 } 169 self.acked_features |= v; 170 } 171 activate( &mut self, mem: GuestMemory, interrupt: Interrupt, mut queues: BTreeMap<usize, Queue>, ) -> anyhow::Result<()>172 fn activate( 173 &mut self, 174 mem: GuestMemory, 175 interrupt: Interrupt, 176 mut queues: BTreeMap<usize, Queue>, 177 ) -> anyhow::Result<()> { 178 if queues.len() != NUM_QUEUES { 179 return Err(anyhow!( 180 "net: expected {} queues, got {}", 181 NUM_QUEUES, 182 queues.len() 183 )); 184 } 185 186 let vhost_handle = self.vhost_handle.take().context("missing vhost_handle")?; 187 let interrupts = self.interrupts.take().context("missing interrupts")?; 188 let acked_features = self.acked_features; 189 let cid = self.cid; 190 191 // The third vq is an event-only vq that is not handled by the vhost 192 // subsystem (but still needs to exist). Split it off here. 193 let mut event_queue = queues.remove(&2).unwrap(); 194 // Send TRANSPORT_RESET event if needed. 195 if self.needs_transport_reset { 196 self.needs_transport_reset = false; 197 198 // We assume the event queue is non-empty. This should be OK for existing use cases 199 // because we expect the guest vsock driver to be initialized at the time of snapshot 200 // and this is only the event we ever write to the queue. 201 // 202 // If that assumption becomes invalid, we could integrate this logic into the worker 203 // thread's event loop so that it can wait for space in the queue. 204 let mut avail_desc = event_queue 205 .pop() 206 .expect("event queue is empty, can't send transport reset event"); 207 let transport_reset = virtio_sys::virtio_vsock::virtio_vsock_event{ 208 id: virtio_sys::virtio_vsock::virtio_vsock_event_id_VIRTIO_VSOCK_EVENT_TRANSPORT_RESET.into(), 209 }; 210 avail_desc 211 .writer 212 .write_obj(transport_reset) 213 .expect("failed to write transport reset event"); 214 let len = avail_desc.writer.bytes_written() as u32; 215 event_queue.add_used(avail_desc, len); 216 event_queue.trigger_interrupt(); 217 } 218 self.event_queue = Some(event_queue); 219 220 let mut worker = Worker::new( 221 queues, 222 vhost_handle, 223 interrupts, 224 interrupt, 225 acked_features, 226 None, 227 ); 228 let activate_vqs = |handle: &VhostVsockHandle| -> Result<()> { 229 handle.set_cid(cid).map_err(Error::VhostVsockSetCid)?; 230 handle.start().map_err(Error::VhostVsockStart)?; 231 Ok(()) 232 }; 233 worker 234 .init(mem, QUEUE_SIZES, activate_vqs, self.vrings_base.take()) 235 .context("vsock worker init exited with error")?; 236 237 self.worker_thread = Some(WorkerThread::start("vhost_vsock", move |kill_evt| { 238 let cleanup_vqs = |_handle: &VhostVsockHandle| -> Result<()> { Ok(()) }; 239 let result = worker.run(cleanup_vqs, kill_evt); 240 if let Err(e) = result { 241 error!("vsock worker thread exited with error: {:?}", e); 242 } 243 worker 244 })); 245 246 Ok(()) 247 } 248 reset(&mut self) -> anyhow::Result<()>249 fn reset(&mut self) -> anyhow::Result<()> { 250 if let Some(worker_thread) = self.worker_thread.take() { 251 let worker = worker_thread.stop(); 252 worker 253 .vhost_handle 254 .stop() 255 .context("failed to stop vrings")?; 256 // Call get_vring_base to stop the queues. 257 for (pos, _) in worker.queues.iter() { 258 worker 259 .vhost_handle 260 .get_vring_base(*pos) 261 .context("get_vring_base failed")?; 262 } 263 264 self.vhost_handle = Some(worker.vhost_handle); 265 self.interrupts = Some(worker.vhost_interrupt); 266 } 267 self.acked_features = 0; 268 self.vrings_base = None; 269 self.event_queue = None; 270 self.needs_transport_reset = false; 271 Ok(()) 272 } 273 on_device_sandboxed(&mut self)274 fn on_device_sandboxed(&mut self) { 275 // ignore the error but to log the error. We don't need to do 276 // anything here because when activate, the other vhost set up 277 // will be failed to stop the activate thread. 278 if let Some(vhost_handle) = &self.vhost_handle { 279 match vhost_handle.set_owner() { 280 Ok(_) => {} 281 Err(e) => error!("{}: failed to set owner: {:?}", self.debug_label(), e), 282 } 283 } 284 } 285 virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>>286 fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> { 287 if let Some(worker_thread) = self.worker_thread.take() { 288 let worker = worker_thread.stop(); 289 self.interrupts = Some(worker.vhost_interrupt); 290 worker 291 .vhost_handle 292 .stop() 293 .context("failed to stop vrings")?; 294 let mut queues: BTreeMap<usize, Queue> = worker.queues; 295 let mut vrings_base = Vec::new(); 296 for (pos, _) in queues.iter() { 297 let vring_base = VringBase { 298 index: *pos, 299 base: worker.vhost_handle.get_vring_base(*pos)?, 300 }; 301 vrings_base.push(vring_base); 302 } 303 self.vrings_base = Some(vrings_base); 304 self.vhost_handle = Some(worker.vhost_handle); 305 queues.insert( 306 2, 307 self.event_queue.take().expect("Vsock event queue missing"), 308 ); 309 return Ok(Some(BTreeMap::from_iter(queues))); 310 } 311 Ok(None) 312 } 313 virtio_wake( &mut self, device_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, ) -> anyhow::Result<()>314 fn virtio_wake( 315 &mut self, 316 device_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, 317 ) -> anyhow::Result<()> { 318 match device_state { 319 None => Ok(()), 320 Some((mem, interrupt, queues)) => { 321 // TODO: activate is just what we want at the moment, but we should probably move 322 // it into a "start workers" function to make it obvious that it isn't strictly 323 // used for activate events. 324 self.activate(mem, interrupt, queues)?; 325 Ok(()) 326 } 327 } 328 } 329 virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value>330 fn virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value> { 331 let vrings_base = self.vrings_base.clone().unwrap_or_default(); 332 serde_json::to_value(VsockSnapshot { 333 // `cid` and `avail_features` are snapshot as a safeguard. Upon restore, validate 334 // cid and avail_features in the current vsock match the previously snapshot vsock. 335 cid: self.cid, 336 avail_features: self.avail_features, 337 acked_features: self.acked_features, 338 vrings_base, 339 }) 340 .context("failed to snapshot virtio console") 341 } 342 virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>343 fn virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> { 344 let deser: VsockSnapshot = 345 serde_json::from_value(data).context("failed to deserialize virtio vsock")?; 346 anyhow::ensure!( 347 self.cid == deser.cid, 348 "Virtio vsock incorrect cid for restore:\n Expected: {}, Actual: {}", 349 self.cid, 350 deser.cid, 351 ); 352 anyhow::ensure!( 353 self.avail_features == deser.avail_features, 354 "Virtio vsock incorrect avail features for restore:\n Expected: {}, Actual: {}", 355 self.avail_features, 356 deser.avail_features, 357 ); 358 self.acked_features = deser.acked_features; 359 self.vrings_base = Some(deser.vrings_base); 360 // Send the TRANSPORT_RESET on next wake so that the guest knows that its existing vsock 361 // connections are broken. 362 self.needs_transport_reset = true; 363 Ok(()) 364 } 365 } 366 367 #[cfg(test)] 368 mod tests { 369 use std::convert::TryInto; 370 371 use super::*; 372 373 #[test] ack_features()374 fn ack_features() { 375 let cid = 5; 376 let features: u64 = (1 << 20) | (1 << 49) | (1 << 2) | (1 << 19); 377 let mut acked_features: u64 = 0; 378 let mut unavailable_features: u64 = 0; 379 380 let mut vsock = Vsock::new_for_testing(cid, features); 381 assert_eq!(acked_features, vsock.acked_features()); 382 383 acked_features |= 1 << 2; 384 vsock.ack_features(acked_features); 385 assert_eq!(acked_features, vsock.acked_features()); 386 387 acked_features |= 1 << 49; 388 vsock.ack_features(acked_features); 389 assert_eq!(acked_features, vsock.acked_features()); 390 391 acked_features |= 1 << 60; 392 unavailable_features |= 1 << 60; 393 vsock.ack_features(acked_features); 394 assert_eq!( 395 acked_features & !unavailable_features, 396 vsock.acked_features() 397 ); 398 399 acked_features |= 1 << 1; 400 unavailable_features |= 1 << 1; 401 vsock.ack_features(acked_features); 402 assert_eq!( 403 acked_features & !unavailable_features, 404 vsock.acked_features() 405 ); 406 } 407 408 #[test] read_config()409 fn read_config() { 410 let cid = 0xfca9a559fdcb9756; 411 let vsock = Vsock::new_for_testing(cid, 0); 412 413 let mut buf = [0u8; 8]; 414 vsock.read_config(0, &mut buf); 415 assert_eq!(cid, u64::from_le_bytes(buf)); 416 417 vsock.read_config(0, &mut buf[..4]); 418 assert_eq!( 419 (cid & 0xffffffff) as u32, 420 u32::from_le_bytes(buf[..4].try_into().unwrap()) 421 ); 422 423 vsock.read_config(4, &mut buf[..4]); 424 assert_eq!( 425 (cid >> 32) as u32, 426 u32::from_le_bytes(buf[..4].try_into().unwrap()) 427 ); 428 429 let data: [u8; 8] = [8, 226, 5, 46, 159, 59, 89, 77]; 430 buf.copy_from_slice(&data); 431 432 vsock.read_config(12, &mut buf); 433 assert_eq!(&buf, &data); 434 } 435 436 #[test] features()437 fn features() { 438 let cid = 5; 439 let features: u64 = 0xfc195ae8db88cff9; 440 441 let vsock = Vsock::new_for_testing(cid, features); 442 assert_eq!(features, vsock.features()); 443 } 444 } 445