1 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
2 
3 use std::{
4     collections::{HashMap, HashSet},
5     io::{self, Result as IoResult},
6     sync::{Arc, Mutex, RwLock},
7     u16, u32, u64, u8,
8 };
9 
10 use log::warn;
11 use thiserror::Error as ThisError;
12 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
13 use vhost_user_backend::{VhostUserBackend, VringRwLock};
14 use virtio_bindings::bindings::{
15     virtio_config::VIRTIO_F_NOTIFY_ON_EMPTY, virtio_config::VIRTIO_F_VERSION_1,
16     virtio_ring::VIRTIO_RING_F_EVENT_IDX,
17 };
18 use vm_memory::{ByteValued, GuestMemoryAtomic, GuestMemoryMmap, Le64};
19 use vmm_sys_util::{
20     epoll::EventSet,
21     eventfd::{EventFd, EFD_NONBLOCK},
22 };
23 
24 use crate::thread_backend::RawPktsQ;
25 use crate::vhu_vsock_thread::*;
26 
27 pub(crate) type CidMap =
28     HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>;
29 
30 const NUM_QUEUES: usize = 3;
31 const QUEUE_SIZE: usize = 256;
32 
33 // New descriptors pending on the rx queue
34 const RX_QUEUE_EVENT: u16 = 0;
35 // New descriptors are pending on the tx queue.
36 const TX_QUEUE_EVENT: u16 = 1;
37 // New descriptors are pending on the event queue.
38 const EVT_QUEUE_EVENT: u16 = 2;
39 
40 /// Notification coming from the backend.
41 /// Event range [0...num_queues] is reserved for queues and exit event.
42 /// So NUM_QUEUES + 1 is used.
43 pub(crate) const BACKEND_EVENT: u16 = (NUM_QUEUES + 1) as u16;
44 
45 /// Notification coming from the sibling VM.
46 pub(crate) const SIBLING_VM_EVENT: u16 = BACKEND_EVENT + 1;
47 
48 /// CID of the host
49 pub(crate) const VSOCK_HOST_CID: u64 = 2;
50 
51 /// Connection oriented packet
52 pub(crate) const VSOCK_TYPE_STREAM: u16 = 1;
53 
54 /// Vsock packet operation ID
55 
56 /// Connection request
57 pub(crate) const VSOCK_OP_REQUEST: u16 = 1;
58 /// Connection response
59 pub(crate) const VSOCK_OP_RESPONSE: u16 = 2;
60 /// Connection reset
61 pub(crate) const VSOCK_OP_RST: u16 = 3;
62 /// Shutdown connection
63 pub(crate) const VSOCK_OP_SHUTDOWN: u16 = 4;
64 /// Data read/write
65 pub(crate) const VSOCK_OP_RW: u16 = 5;
66 /// Flow control credit update
67 pub(crate) const VSOCK_OP_CREDIT_UPDATE: u16 = 6;
68 /// Flow control credit request
69 pub(crate) const VSOCK_OP_CREDIT_REQUEST: u16 = 7;
70 
71 /// Vsock packet flags
72 
73 /// VSOCK_OP_SHUTDOWN: Packet sender will receive no more data
74 pub(crate) const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1;
75 /// VSOCK_OP_SHUTDOWN: Packet sender will send no more data
76 pub(crate) const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2;
77 
78 // Queue mask to select vrings.
79 const QUEUE_MASK: u64 = 0b11;
80 
81 pub(crate) type Result<T> = std::result::Result<T, Error>;
82 
83 /// Custom error types
84 #[derive(Debug, ThisError)]
85 pub(crate) enum Error {
86     #[error("Failed to handle event other than EPOLLIN event")]
87     HandleEventNotEpollIn,
88     #[error("Failed to handle unknown event")]
89     HandleUnknownEvent,
90     #[error("Failed to accept new local socket connection")]
91     UnixAccept(std::io::Error),
92     #[error("Failed to bind a unix stream")]
93     UnixBind(std::io::Error),
94     #[error("Failed to create an epoll fd")]
95     EpollFdCreate(std::io::Error),
96     #[error("Failed to add to epoll")]
97     EpollAdd(std::io::Error),
98     #[error("Failed to modify evset associated with epoll")]
99     EpollModify(std::io::Error),
100     #[error("Failed to read from unix stream")]
101     UnixRead(std::io::Error),
102     #[error("Failed to convert byte array to string")]
103     ConvertFromUtf8(std::str::Utf8Error),
104     #[error("Invalid vsock connection request from host")]
105     InvalidPortRequest,
106     #[error("Unable to convert string to integer")]
107     ParseInteger(std::num::ParseIntError),
108     #[error("Error reading stream port")]
109     ReadStreamPort(Box<Error>),
110     #[error("Failed to de-register fd from epoll")]
111     EpollRemove(std::io::Error),
112     #[error("No memory configured")]
113     NoMemoryConfigured,
114     #[error("Unable to iterate queue")]
115     IterateQueue,
116     #[error("No rx request available")]
117     NoRequestRx,
118     #[error("Packet missing data buffer")]
119     PktBufMissing,
120     #[error("Failed to connect to unix socket")]
121     UnixConnect(std::io::Error),
122     #[error("Unable to write to unix stream")]
123     UnixWrite,
124     #[error("Unable to push data to local tx buffer")]
125     LocalTxBufFull,
126     #[error("Unable to flush data from local tx buffer")]
127     LocalTxBufFlush(std::io::Error),
128     #[error("No free local port available for new host inititated connection")]
129     NoFreeLocalPort,
130     #[error("Backend rx queue is empty")]
131     EmptyBackendRxQ,
132     #[error("Failed to create an EventFd")]
133     EventFdCreate(std::io::Error),
134     #[error("Raw vsock packets queue is empty")]
135     EmptyRawPktsQueue,
136     #[error("CID already in use by another vsock device")]
137     CidAlreadyInUse,
138 }
139 
140 impl std::convert::From<Error> for std::io::Error {
from(e: Error) -> Self141     fn from(e: Error) -> Self {
142         std::io::Error::new(io::ErrorKind::Other, e)
143     }
144 }
145 
146 #[derive(Debug, Clone)]
147 /// This structure is the public API through which an external program
148 /// is allowed to configure the backend.
149 pub(crate) struct VsockConfig {
150     guest_cid: u64,
151     socket: String,
152     uds_path: String,
153     tx_buffer_size: u32,
154     groups: Vec<String>,
155 }
156 
157 impl VsockConfig {
158     /// Create a new instance of the VsockConfig struct, containing the
159     /// parameters to be fed into the vsock-backend server.
new( guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32, groups: Vec<String>, ) -> Self160     pub fn new(
161         guest_cid: u64,
162         socket: String,
163         uds_path: String,
164         tx_buffer_size: u32,
165         groups: Vec<String>,
166     ) -> Self {
167         Self {
168             guest_cid,
169             socket,
170             uds_path,
171             tx_buffer_size,
172             groups,
173         }
174     }
175 
176     /// Return the guest's current CID.
get_guest_cid(&self) -> u64177     pub fn get_guest_cid(&self) -> u64 {
178         self.guest_cid
179     }
180 
181     /// Return the path of the unix domain socket which is listening to
182     /// requests from the host side application.
get_uds_path(&self) -> String183     pub fn get_uds_path(&self) -> String {
184         String::from(&self.uds_path)
185     }
186 
187     /// Return the path of the unix domain socket which is listening to
188     /// requests from the guest.
get_socket_path(&self) -> String189     pub fn get_socket_path(&self) -> String {
190         String::from(&self.socket)
191     }
192 
get_tx_buffer_size(&self) -> u32193     pub fn get_tx_buffer_size(&self) -> u32 {
194         self.tx_buffer_size
195     }
196 
get_groups(&self) -> Vec<String>197     pub fn get_groups(&self) -> Vec<String> {
198         self.groups.clone()
199     }
200 }
201 
202 /// A local port and peer port pair used to retrieve
203 /// the corresponding connection.
204 #[derive(Hash, PartialEq, Eq, Debug, Clone)]
205 pub(crate) struct ConnMapKey {
206     local_port: u32,
207     peer_port: u32,
208 }
209 
210 impl ConnMapKey {
new(local_port: u32, peer_port: u32) -> Self211     pub fn new(local_port: u32, peer_port: u32) -> Self {
212         Self {
213             local_port,
214             peer_port,
215         }
216     }
217 }
218 
219 /// Virtio Vsock Configuration
220 #[derive(Copy, Clone, Debug, Default, PartialEq)]
221 #[repr(C)]
222 struct VirtioVsockConfig {
223     pub guest_cid: Le64,
224 }
225 
226 // SAFETY: The layout of the structure is fixed and can be initialized by
227 // reading its content from byte array.
228 unsafe impl ByteValued for VirtioVsockConfig {}
229 
230 pub(crate) struct VhostUserVsockBackend {
231     config: VirtioVsockConfig,
232     pub threads: Vec<Mutex<VhostUserVsockThread>>,
233     queues_per_thread: Vec<u64>,
234     pub exit_event: EventFd,
235 }
236 
237 impl VhostUserVsockBackend {
new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self>238     pub fn new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self> {
239         let thread = Mutex::new(VhostUserVsockThread::new(
240             config.get_uds_path(),
241             config.get_guest_cid(),
242             config.get_tx_buffer_size(),
243             config.get_groups(),
244             cid_map,
245         )?);
246         let queues_per_thread = vec![QUEUE_MASK];
247 
248         Ok(Self {
249             config: VirtioVsockConfig {
250                 guest_cid: From::from(config.get_guest_cid()),
251             },
252             threads: vec![thread],
253             queues_per_thread,
254             exit_event: EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?,
255         })
256     }
257 }
258 
259 impl VhostUserBackend<VringRwLock, ()> for VhostUserVsockBackend {
num_queues(&self) -> usize260     fn num_queues(&self) -> usize {
261         NUM_QUEUES
262     }
263 
max_queue_size(&self) -> usize264     fn max_queue_size(&self) -> usize {
265         QUEUE_SIZE
266     }
267 
features(&self) -> u64268     fn features(&self) -> u64 {
269         1 << VIRTIO_F_VERSION_1
270             | 1 << VIRTIO_F_NOTIFY_ON_EMPTY
271             | 1 << VIRTIO_RING_F_EVENT_IDX
272             | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
273     }
274 
protocol_features(&self) -> VhostUserProtocolFeatures275     fn protocol_features(&self) -> VhostUserProtocolFeatures {
276         VhostUserProtocolFeatures::CONFIG
277     }
278 
set_event_idx(&self, enabled: bool)279     fn set_event_idx(&self, enabled: bool) {
280         for thread in self.threads.iter() {
281             thread.lock().unwrap().event_idx = enabled;
282         }
283     }
284 
update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()>285     fn update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
286         for thread in self.threads.iter() {
287             thread.lock().unwrap().mem = Some(atomic_mem.clone());
288         }
289         Ok(())
290     }
291 
handle_event( &self, device_event: u16, evset: EventSet, vrings: &[VringRwLock], thread_id: usize, ) -> IoResult<bool>292     fn handle_event(
293         &self,
294         device_event: u16,
295         evset: EventSet,
296         vrings: &[VringRwLock],
297         thread_id: usize,
298     ) -> IoResult<bool> {
299         let vring_rx = &vrings[0];
300         let vring_tx = &vrings[1];
301 
302         if evset != EventSet::IN {
303             return Err(Error::HandleEventNotEpollIn.into());
304         }
305 
306         let mut thread = self.threads[thread_id].lock().unwrap();
307         let evt_idx = thread.event_idx;
308 
309         match device_event {
310             RX_QUEUE_EVENT => {}
311             TX_QUEUE_EVENT => {
312                 thread.process_tx(vring_tx, evt_idx)?;
313             }
314             EVT_QUEUE_EVENT => {
315                 warn!("Received an unexpected EVT_QUEUE_EVENT");
316             }
317             BACKEND_EVENT => {
318                 thread.process_backend_evt(evset);
319                 if let Err(e) = thread.process_tx(vring_tx, evt_idx) {
320                     match e {
321                         Error::NoMemoryConfigured => {
322                             warn!("Received a backend event before vring initialization")
323                         }
324                         _ => return Err(e.into()),
325                     }
326                 }
327             }
328             SIBLING_VM_EVENT => {
329                 let _ = thread.sibling_event_fd.read();
330                 thread.process_raw_pkts(vring_rx, evt_idx)?;
331                 return Ok(false);
332             }
333             _ => {
334                 return Err(Error::HandleUnknownEvent.into());
335             }
336         }
337 
338         if device_event != EVT_QUEUE_EVENT {
339             thread.process_rx(vring_rx, evt_idx)?;
340         }
341 
342         Ok(false)
343     }
344 
get_config(&self, offset: u32, size: u32) -> Vec<u8>345     fn get_config(&self, offset: u32, size: u32) -> Vec<u8> {
346         let offset = offset as usize;
347         let size = size as usize;
348 
349         let buf = self.config.as_slice();
350 
351         if offset + size > buf.len() {
352             return Vec::new();
353         }
354 
355         buf[offset..offset + size].to_vec()
356     }
357 
queues_per_thread(&self) -> Vec<u64>358     fn queues_per_thread(&self) -> Vec<u64> {
359         self.queues_per_thread.clone()
360     }
361 
exit_event(&self, _thread_index: usize) -> Option<EventFd>362     fn exit_event(&self, _thread_index: usize) -> Option<EventFd> {
363         self.exit_event.try_clone().ok()
364     }
365 }
366 
367 #[cfg(test)]
368 mod tests {
369     use super::*;
370     use std::convert::TryInto;
371     use tempfile::tempdir;
372     use vhost_user_backend::VringT;
373     use vm_memory::GuestAddress;
374 
375     const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
376 
377     #[test]
test_vsock_backend()378     fn test_vsock_backend() {
379         const CID: u64 = 3;
380 
381         let groups_list: Vec<String> = vec![String::from("default")];
382 
383         let test_dir = tempdir().expect("Could not create a temp test directory.");
384 
385         let vhost_socket_path = test_dir
386             .path()
387             .join("test_vsock_backend.socket")
388             .display()
389             .to_string();
390         let vsock_socket_path = test_dir
391             .path()
392             .join("test_vsock_backend.vsock")
393             .display()
394             .to_string();
395 
396         let config = VsockConfig::new(
397             CID,
398             vhost_socket_path.to_string(),
399             vsock_socket_path.to_string(),
400             CONN_TX_BUF_SIZE,
401             groups_list,
402         );
403 
404         let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
405 
406         let backend = VhostUserVsockBackend::new(config, cid_map);
407 
408         assert!(backend.is_ok());
409         let backend = backend.unwrap();
410 
411         assert_eq!(backend.num_queues(), NUM_QUEUES);
412         assert_eq!(backend.max_queue_size(), QUEUE_SIZE);
413         assert_ne!(backend.features(), 0);
414         assert!(!backend.protocol_features().is_empty());
415         backend.set_event_idx(false);
416 
417         let mem = GuestMemoryAtomic::new(
418             GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(),
419         );
420         let vrings = [
421             VringRwLock::new(mem.clone(), 0x1000).unwrap(),
422             VringRwLock::new(mem.clone(), 0x2000).unwrap(),
423         ];
424         vrings[0].set_queue_info(0x100, 0x200, 0x300).unwrap();
425         vrings[0].set_queue_ready(true);
426         vrings[1].set_queue_info(0x1100, 0x1200, 0x1300).unwrap();
427         vrings[1].set_queue_ready(true);
428 
429         assert!(backend.update_memory(mem).is_ok());
430 
431         let queues_per_thread = backend.queues_per_thread();
432         assert_eq!(queues_per_thread.len(), 1);
433         assert_eq!(queues_per_thread[0], 0b11);
434 
435         let config = backend.get_config(0, 8);
436         assert_eq!(config.len(), 8);
437         let cid = u64::from_le_bytes(config.try_into().unwrap());
438         assert_eq!(cid, CID);
439 
440         let exit = backend.exit_event(0);
441         assert!(exit.is_some());
442         exit.unwrap().write(1).unwrap();
443 
444         let ret = backend.handle_event(RX_QUEUE_EVENT, EventSet::IN, &vrings, 0);
445         assert!(ret.is_ok());
446         assert!(!ret.unwrap());
447 
448         let ret = backend.handle_event(TX_QUEUE_EVENT, EventSet::IN, &vrings, 0);
449         assert!(ret.is_ok());
450         assert!(!ret.unwrap());
451 
452         let ret = backend.handle_event(EVT_QUEUE_EVENT, EventSet::IN, &vrings, 0);
453         assert!(ret.is_ok());
454         assert!(!ret.unwrap());
455 
456         let ret = backend.handle_event(BACKEND_EVENT, EventSet::IN, &vrings, 0);
457         assert!(ret.is_ok());
458         assert!(!ret.unwrap());
459 
460         // cleanup
461         let _ = std::fs::remove_file(vhost_socket_path);
462         let _ = std::fs::remove_file(vsock_socket_path);
463 
464         test_dir.close().unwrap();
465     }
466 
467     #[test]
test_vsock_backend_failures()468     fn test_vsock_backend_failures() {
469         const CID: u64 = 3;
470 
471         let groups: Vec<String> = vec![String::from("default")];
472 
473         let test_dir = tempdir().expect("Could not create a temp test directory.");
474 
475         let vhost_socket_path = test_dir
476             .path()
477             .join("test_vsock_backend_failures.socket")
478             .display()
479             .to_string();
480         let vsock_socket_path = test_dir
481             .path()
482             .join("test_vsock_backend_failures.vsock")
483             .display()
484             .to_string();
485 
486         let config = VsockConfig::new(
487             CID,
488             "/sys/not_allowed.socket".to_string(),
489             "/sys/not_allowed.vsock".to_string(),
490             CONN_TX_BUF_SIZE,
491             groups.clone(),
492         );
493 
494         let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
495 
496         let backend = VhostUserVsockBackend::new(config, cid_map.clone());
497         assert!(backend.is_err());
498 
499         let config = VsockConfig::new(
500             CID,
501             vhost_socket_path.to_string(),
502             vsock_socket_path.to_string(),
503             CONN_TX_BUF_SIZE,
504             groups,
505         );
506 
507         let backend = VhostUserVsockBackend::new(config, cid_map).unwrap();
508         let mem = GuestMemoryAtomic::new(
509             GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(),
510         );
511         let vrings = [
512             VringRwLock::new(mem.clone(), 0x1000).unwrap(),
513             VringRwLock::new(mem.clone(), 0x2000).unwrap(),
514         ];
515 
516         backend.update_memory(mem).unwrap();
517 
518         // reading out of the config space, expecting empty config
519         let config = backend.get_config(2, 8);
520         assert_eq!(config.len(), 0);
521 
522         assert_eq!(
523             backend
524                 .handle_event(RX_QUEUE_EVENT, EventSet::OUT, &vrings, 0)
525                 .unwrap_err()
526                 .to_string(),
527             Error::HandleEventNotEpollIn.to_string()
528         );
529         assert_eq!(
530             backend
531                 .handle_event(SIBLING_VM_EVENT + 1, EventSet::IN, &vrings, 0)
532                 .unwrap_err()
533                 .to_string(),
534             Error::HandleUnknownEvent.to_string()
535         );
536 
537         // cleanup
538         let _ = std::fs::remove_file(vhost_socket_path);
539         let _ = std::fs::remove_file(vsock_socket_path);
540 
541         test_dir.close().unwrap();
542     }
543 }
544