1 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause 2 3 use std::{ 4 collections::{HashMap, HashSet}, 5 io::{self, Result as IoResult}, 6 sync::{Arc, Mutex, RwLock}, 7 u16, u32, u64, u8, 8 }; 9 10 use log::warn; 11 use thiserror::Error as ThisError; 12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 13 use vhost_user_backend::{VhostUserBackend, VringRwLock}; 14 use virtio_bindings::bindings::{ 15 virtio_config::VIRTIO_F_NOTIFY_ON_EMPTY, virtio_config::VIRTIO_F_VERSION_1, 16 virtio_ring::VIRTIO_RING_F_EVENT_IDX, 17 }; 18 use vm_memory::{ByteValued, GuestMemoryAtomic, GuestMemoryMmap, Le64}; 19 use vmm_sys_util::{ 20 epoll::EventSet, 21 eventfd::{EventFd, EFD_NONBLOCK}, 22 }; 23 24 use crate::thread_backend::RawPktsQ; 25 use crate::vhu_vsock_thread::*; 26 27 pub(crate) type CidMap = 28 HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>; 29 30 const NUM_QUEUES: usize = 3; 31 const QUEUE_SIZE: usize = 256; 32 33 // New descriptors pending on the rx queue 34 const RX_QUEUE_EVENT: u16 = 0; 35 // New descriptors are pending on the tx queue. 36 const TX_QUEUE_EVENT: u16 = 1; 37 // New descriptors are pending on the event queue. 38 const EVT_QUEUE_EVENT: u16 = 2; 39 40 /// Notification coming from the backend. 41 /// Event range [0...num_queues] is reserved for queues and exit event. 42 /// So NUM_QUEUES + 1 is used. 43 pub(crate) const BACKEND_EVENT: u16 = (NUM_QUEUES + 1) as u16; 44 45 /// Notification coming from the sibling VM. 46 pub(crate) const SIBLING_VM_EVENT: u16 = BACKEND_EVENT + 1; 47 48 /// CID of the host 49 pub(crate) const VSOCK_HOST_CID: u64 = 2; 50 51 /// Connection oriented packet 52 pub(crate) const VSOCK_TYPE_STREAM: u16 = 1; 53 54 /// Vsock packet operation ID 55 56 /// Connection request 57 pub(crate) const VSOCK_OP_REQUEST: u16 = 1; 58 /// Connection response 59 pub(crate) const VSOCK_OP_RESPONSE: u16 = 2; 60 /// Connection reset 61 pub(crate) const VSOCK_OP_RST: u16 = 3; 62 /// Shutdown connection 63 pub(crate) const VSOCK_OP_SHUTDOWN: u16 = 4; 64 /// Data read/write 65 pub(crate) const VSOCK_OP_RW: u16 = 5; 66 /// Flow control credit update 67 pub(crate) const VSOCK_OP_CREDIT_UPDATE: u16 = 6; 68 /// Flow control credit request 69 pub(crate) const VSOCK_OP_CREDIT_REQUEST: u16 = 7; 70 71 /// Vsock packet flags 72 73 /// VSOCK_OP_SHUTDOWN: Packet sender will receive no more data 74 pub(crate) const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1; 75 /// VSOCK_OP_SHUTDOWN: Packet sender will send no more data 76 pub(crate) const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2; 77 78 // Queue mask to select vrings. 79 const QUEUE_MASK: u64 = 0b11; 80 81 pub(crate) type Result<T> = std::result::Result<T, Error>; 82 83 /// Custom error types 84 #[derive(Debug, ThisError)] 85 pub(crate) enum Error { 86 #[error("Failed to handle event other than EPOLLIN event")] 87 HandleEventNotEpollIn, 88 #[error("Failed to handle unknown event")] 89 HandleUnknownEvent, 90 #[error("Failed to accept new local socket connection")] 91 UnixAccept(std::io::Error), 92 #[error("Failed to bind a unix stream")] 93 UnixBind(std::io::Error), 94 #[error("Failed to create an epoll fd")] 95 EpollFdCreate(std::io::Error), 96 #[error("Failed to add to epoll")] 97 EpollAdd(std::io::Error), 98 #[error("Failed to modify evset associated with epoll")] 99 EpollModify(std::io::Error), 100 #[error("Failed to read from unix stream")] 101 UnixRead(std::io::Error), 102 #[error("Failed to convert byte array to string")] 103 ConvertFromUtf8(std::str::Utf8Error), 104 #[error("Invalid vsock connection request from host")] 105 InvalidPortRequest, 106 #[error("Unable to convert string to integer")] 107 ParseInteger(std::num::ParseIntError), 108 #[error("Error reading stream port")] 109 ReadStreamPort(Box<Error>), 110 #[error("Failed to de-register fd from epoll")] 111 EpollRemove(std::io::Error), 112 #[error("No memory configured")] 113 NoMemoryConfigured, 114 #[error("Unable to iterate queue")] 115 IterateQueue, 116 #[error("No rx request available")] 117 NoRequestRx, 118 #[error("Packet missing data buffer")] 119 PktBufMissing, 120 #[error("Failed to connect to unix socket")] 121 UnixConnect(std::io::Error), 122 #[error("Unable to write to unix stream")] 123 UnixWrite, 124 #[error("Unable to push data to local tx buffer")] 125 LocalTxBufFull, 126 #[error("Unable to flush data from local tx buffer")] 127 LocalTxBufFlush(std::io::Error), 128 #[error("No free local port available for new host inititated connection")] 129 NoFreeLocalPort, 130 #[error("Backend rx queue is empty")] 131 EmptyBackendRxQ, 132 #[error("Failed to create an EventFd")] 133 EventFdCreate(std::io::Error), 134 #[error("Raw vsock packets queue is empty")] 135 EmptyRawPktsQueue, 136 #[error("CID already in use by another vsock device")] 137 CidAlreadyInUse, 138 } 139 140 impl std::convert::From<Error> for std::io::Error { from(e: Error) -> Self141 fn from(e: Error) -> Self { 142 std::io::Error::new(io::ErrorKind::Other, e) 143 } 144 } 145 146 #[derive(Debug, Clone)] 147 /// This structure is the public API through which an external program 148 /// is allowed to configure the backend. 149 pub(crate) struct VsockConfig { 150 guest_cid: u64, 151 socket: String, 152 uds_path: String, 153 tx_buffer_size: u32, 154 groups: Vec<String>, 155 } 156 157 impl VsockConfig { 158 /// Create a new instance of the VsockConfig struct, containing the 159 /// parameters to be fed into the vsock-backend server. new( guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32, groups: Vec<String>, ) -> Self160 pub fn new( 161 guest_cid: u64, 162 socket: String, 163 uds_path: String, 164 tx_buffer_size: u32, 165 groups: Vec<String>, 166 ) -> Self { 167 Self { 168 guest_cid, 169 socket, 170 uds_path, 171 tx_buffer_size, 172 groups, 173 } 174 } 175 176 /// Return the guest's current CID. get_guest_cid(&self) -> u64177 pub fn get_guest_cid(&self) -> u64 { 178 self.guest_cid 179 } 180 181 /// Return the path of the unix domain socket which is listening to 182 /// requests from the host side application. get_uds_path(&self) -> String183 pub fn get_uds_path(&self) -> String { 184 String::from(&self.uds_path) 185 } 186 187 /// Return the path of the unix domain socket which is listening to 188 /// requests from the guest. get_socket_path(&self) -> String189 pub fn get_socket_path(&self) -> String { 190 String::from(&self.socket) 191 } 192 get_tx_buffer_size(&self) -> u32193 pub fn get_tx_buffer_size(&self) -> u32 { 194 self.tx_buffer_size 195 } 196 get_groups(&self) -> Vec<String>197 pub fn get_groups(&self) -> Vec<String> { 198 self.groups.clone() 199 } 200 } 201 202 /// A local port and peer port pair used to retrieve 203 /// the corresponding connection. 204 #[derive(Hash, PartialEq, Eq, Debug, Clone)] 205 pub(crate) struct ConnMapKey { 206 local_port: u32, 207 peer_port: u32, 208 } 209 210 impl ConnMapKey { new(local_port: u32, peer_port: u32) -> Self211 pub fn new(local_port: u32, peer_port: u32) -> Self { 212 Self { 213 local_port, 214 peer_port, 215 } 216 } 217 } 218 219 /// Virtio Vsock Configuration 220 #[derive(Copy, Clone, Debug, Default, PartialEq)] 221 #[repr(C)] 222 struct VirtioVsockConfig { 223 pub guest_cid: Le64, 224 } 225 226 // SAFETY: The layout of the structure is fixed and can be initialized by 227 // reading its content from byte array. 228 unsafe impl ByteValued for VirtioVsockConfig {} 229 230 pub(crate) struct VhostUserVsockBackend { 231 config: VirtioVsockConfig, 232 pub threads: Vec<Mutex<VhostUserVsockThread>>, 233 queues_per_thread: Vec<u64>, 234 pub exit_event: EventFd, 235 } 236 237 impl VhostUserVsockBackend { new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self>238 pub fn new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self> { 239 let thread = Mutex::new(VhostUserVsockThread::new( 240 config.get_uds_path(), 241 config.get_guest_cid(), 242 config.get_tx_buffer_size(), 243 config.get_groups(), 244 cid_map, 245 )?); 246 let queues_per_thread = vec![QUEUE_MASK]; 247 248 Ok(Self { 249 config: VirtioVsockConfig { 250 guest_cid: From::from(config.get_guest_cid()), 251 }, 252 threads: vec![thread], 253 queues_per_thread, 254 exit_event: EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?, 255 }) 256 } 257 } 258 259 impl VhostUserBackend<VringRwLock, ()> for VhostUserVsockBackend { num_queues(&self) -> usize260 fn num_queues(&self) -> usize { 261 NUM_QUEUES 262 } 263 max_queue_size(&self) -> usize264 fn max_queue_size(&self) -> usize { 265 QUEUE_SIZE 266 } 267 features(&self) -> u64268 fn features(&self) -> u64 { 269 1 << VIRTIO_F_VERSION_1 270 | 1 << VIRTIO_F_NOTIFY_ON_EMPTY 271 | 1 << VIRTIO_RING_F_EVENT_IDX 272 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() 273 } 274 protocol_features(&self) -> VhostUserProtocolFeatures275 fn protocol_features(&self) -> VhostUserProtocolFeatures { 276 VhostUserProtocolFeatures::CONFIG 277 } 278 set_event_idx(&self, enabled: bool)279 fn set_event_idx(&self, enabled: bool) { 280 for thread in self.threads.iter() { 281 thread.lock().unwrap().event_idx = enabled; 282 } 283 } 284 update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()>285 fn update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> { 286 for thread in self.threads.iter() { 287 thread.lock().unwrap().mem = Some(atomic_mem.clone()); 288 } 289 Ok(()) 290 } 291 handle_event( &self, device_event: u16, evset: EventSet, vrings: &[VringRwLock], thread_id: usize, ) -> IoResult<bool>292 fn handle_event( 293 &self, 294 device_event: u16, 295 evset: EventSet, 296 vrings: &[VringRwLock], 297 thread_id: usize, 298 ) -> IoResult<bool> { 299 let vring_rx = &vrings[0]; 300 let vring_tx = &vrings[1]; 301 302 if evset != EventSet::IN { 303 return Err(Error::HandleEventNotEpollIn.into()); 304 } 305 306 let mut thread = self.threads[thread_id].lock().unwrap(); 307 let evt_idx = thread.event_idx; 308 309 match device_event { 310 RX_QUEUE_EVENT => {} 311 TX_QUEUE_EVENT => { 312 thread.process_tx(vring_tx, evt_idx)?; 313 } 314 EVT_QUEUE_EVENT => { 315 warn!("Received an unexpected EVT_QUEUE_EVENT"); 316 } 317 BACKEND_EVENT => { 318 thread.process_backend_evt(evset); 319 if let Err(e) = thread.process_tx(vring_tx, evt_idx) { 320 match e { 321 Error::NoMemoryConfigured => { 322 warn!("Received a backend event before vring initialization") 323 } 324 _ => return Err(e.into()), 325 } 326 } 327 } 328 SIBLING_VM_EVENT => { 329 let _ = thread.sibling_event_fd.read(); 330 thread.process_raw_pkts(vring_rx, evt_idx)?; 331 return Ok(false); 332 } 333 _ => { 334 return Err(Error::HandleUnknownEvent.into()); 335 } 336 } 337 338 if device_event != EVT_QUEUE_EVENT { 339 thread.process_rx(vring_rx, evt_idx)?; 340 } 341 342 Ok(false) 343 } 344 get_config(&self, offset: u32, size: u32) -> Vec<u8>345 fn get_config(&self, offset: u32, size: u32) -> Vec<u8> { 346 let offset = offset as usize; 347 let size = size as usize; 348 349 let buf = self.config.as_slice(); 350 351 if offset + size > buf.len() { 352 return Vec::new(); 353 } 354 355 buf[offset..offset + size].to_vec() 356 } 357 queues_per_thread(&self) -> Vec<u64>358 fn queues_per_thread(&self) -> Vec<u64> { 359 self.queues_per_thread.clone() 360 } 361 exit_event(&self, _thread_index: usize) -> Option<EventFd>362 fn exit_event(&self, _thread_index: usize) -> Option<EventFd> { 363 self.exit_event.try_clone().ok() 364 } 365 } 366 367 #[cfg(test)] 368 mod tests { 369 use super::*; 370 use std::convert::TryInto; 371 use tempfile::tempdir; 372 use vhost_user_backend::VringT; 373 use vm_memory::GuestAddress; 374 375 const CONN_TX_BUF_SIZE: u32 = 64 * 1024; 376 377 #[test] test_vsock_backend()378 fn test_vsock_backend() { 379 const CID: u64 = 3; 380 381 let groups_list: Vec<String> = vec![String::from("default")]; 382 383 let test_dir = tempdir().expect("Could not create a temp test directory."); 384 385 let vhost_socket_path = test_dir 386 .path() 387 .join("test_vsock_backend.socket") 388 .display() 389 .to_string(); 390 let vsock_socket_path = test_dir 391 .path() 392 .join("test_vsock_backend.vsock") 393 .display() 394 .to_string(); 395 396 let config = VsockConfig::new( 397 CID, 398 vhost_socket_path.to_string(), 399 vsock_socket_path.to_string(), 400 CONN_TX_BUF_SIZE, 401 groups_list, 402 ); 403 404 let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new())); 405 406 let backend = VhostUserVsockBackend::new(config, cid_map); 407 408 assert!(backend.is_ok()); 409 let backend = backend.unwrap(); 410 411 assert_eq!(backend.num_queues(), NUM_QUEUES); 412 assert_eq!(backend.max_queue_size(), QUEUE_SIZE); 413 assert_ne!(backend.features(), 0); 414 assert!(!backend.protocol_features().is_empty()); 415 backend.set_event_idx(false); 416 417 let mem = GuestMemoryAtomic::new( 418 GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(), 419 ); 420 let vrings = [ 421 VringRwLock::new(mem.clone(), 0x1000).unwrap(), 422 VringRwLock::new(mem.clone(), 0x2000).unwrap(), 423 ]; 424 vrings[0].set_queue_info(0x100, 0x200, 0x300).unwrap(); 425 vrings[0].set_queue_ready(true); 426 vrings[1].set_queue_info(0x1100, 0x1200, 0x1300).unwrap(); 427 vrings[1].set_queue_ready(true); 428 429 assert!(backend.update_memory(mem).is_ok()); 430 431 let queues_per_thread = backend.queues_per_thread(); 432 assert_eq!(queues_per_thread.len(), 1); 433 assert_eq!(queues_per_thread[0], 0b11); 434 435 let config = backend.get_config(0, 8); 436 assert_eq!(config.len(), 8); 437 let cid = u64::from_le_bytes(config.try_into().unwrap()); 438 assert_eq!(cid, CID); 439 440 let exit = backend.exit_event(0); 441 assert!(exit.is_some()); 442 exit.unwrap().write(1).unwrap(); 443 444 let ret = backend.handle_event(RX_QUEUE_EVENT, EventSet::IN, &vrings, 0); 445 assert!(ret.is_ok()); 446 assert!(!ret.unwrap()); 447 448 let ret = backend.handle_event(TX_QUEUE_EVENT, EventSet::IN, &vrings, 0); 449 assert!(ret.is_ok()); 450 assert!(!ret.unwrap()); 451 452 let ret = backend.handle_event(EVT_QUEUE_EVENT, EventSet::IN, &vrings, 0); 453 assert!(ret.is_ok()); 454 assert!(!ret.unwrap()); 455 456 let ret = backend.handle_event(BACKEND_EVENT, EventSet::IN, &vrings, 0); 457 assert!(ret.is_ok()); 458 assert!(!ret.unwrap()); 459 460 // cleanup 461 let _ = std::fs::remove_file(vhost_socket_path); 462 let _ = std::fs::remove_file(vsock_socket_path); 463 464 test_dir.close().unwrap(); 465 } 466 467 #[test] test_vsock_backend_failures()468 fn test_vsock_backend_failures() { 469 const CID: u64 = 3; 470 471 let groups: Vec<String> = vec![String::from("default")]; 472 473 let test_dir = tempdir().expect("Could not create a temp test directory."); 474 475 let vhost_socket_path = test_dir 476 .path() 477 .join("test_vsock_backend_failures.socket") 478 .display() 479 .to_string(); 480 let vsock_socket_path = test_dir 481 .path() 482 .join("test_vsock_backend_failures.vsock") 483 .display() 484 .to_string(); 485 486 let config = VsockConfig::new( 487 CID, 488 "/sys/not_allowed.socket".to_string(), 489 "/sys/not_allowed.vsock".to_string(), 490 CONN_TX_BUF_SIZE, 491 groups.clone(), 492 ); 493 494 let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new())); 495 496 let backend = VhostUserVsockBackend::new(config, cid_map.clone()); 497 assert!(backend.is_err()); 498 499 let config = VsockConfig::new( 500 CID, 501 vhost_socket_path.to_string(), 502 vsock_socket_path.to_string(), 503 CONN_TX_BUF_SIZE, 504 groups, 505 ); 506 507 let backend = VhostUserVsockBackend::new(config, cid_map).unwrap(); 508 let mem = GuestMemoryAtomic::new( 509 GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(), 510 ); 511 let vrings = [ 512 VringRwLock::new(mem.clone(), 0x1000).unwrap(), 513 VringRwLock::new(mem.clone(), 0x2000).unwrap(), 514 ]; 515 516 backend.update_memory(mem).unwrap(); 517 518 // reading out of the config space, expecting empty config 519 let config = backend.get_config(2, 8); 520 assert_eq!(config.len(), 0); 521 522 assert_eq!( 523 backend 524 .handle_event(RX_QUEUE_EVENT, EventSet::OUT, &vrings, 0) 525 .unwrap_err() 526 .to_string(), 527 Error::HandleEventNotEpollIn.to_string() 528 ); 529 assert_eq!( 530 backend 531 .handle_event(SIBLING_VM_EVENT + 1, EventSet::IN, &vrings, 0) 532 .unwrap_err() 533 .to_string(), 534 Error::HandleUnknownEvent.to_string() 535 ); 536 537 // cleanup 538 let _ = std::fs::remove_file(vhost_socket_path); 539 let _ = std::fs::remove_file(vsock_socket_path); 540 541 test_dir.close().unwrap(); 542 } 543 } 544