1 // Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-Google file.
7 
8 //! Traits and structs to control Linux in-kernel vhost drivers.
9 //!
10 //! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
11 //! communicate with userspace applications. This sub module provides ioctl based interfaces to
12 //! control the in-kernel net, scsi, vsock vhost drivers.
13 
14 use std::mem;
15 use std::os::unix::io::{AsRawFd, RawFd};
16 
17 use libc::{c_void, ssize_t, write};
18 
19 use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
20 use vmm_sys_util::eventfd::EventFd;
21 use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref};
22 
23 use super::{
24     Error, Result, VhostBackend, VhostIotlbBackend, VhostIotlbMsg, VhostIotlbMsgParser,
25     VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo, VringConfigData, VHOST_MAX_MEMORY_REGIONS,
26 };
27 
28 pub mod vhost_binding;
29 use self::vhost_binding::*;
30 
31 #[cfg(feature = "vhost-net")]
32 pub mod net;
33 #[cfg(feature = "vhost-vdpa")]
34 pub mod vdpa;
35 #[cfg(feature = "vhost-vsock")]
36 pub mod vsock;
37 
38 #[inline]
ioctl_result<T>(rc: i32, res: T) -> Result<T>39 fn ioctl_result<T>(rc: i32, res: T) -> Result<T> {
40     if rc < 0 {
41         Err(Error::IoctlError(std::io::Error::last_os_error()))
42     } else {
43         Ok(res)
44     }
45 }
46 
47 #[inline]
io_result<T>(rc: isize, res: T) -> Result<T>48 fn io_result<T>(rc: isize, res: T) -> Result<T> {
49     if rc < 0 {
50         Err(Error::IOError(std::io::Error::last_os_error()))
51     } else {
52         Ok(res)
53     }
54 }
55 
56 /// Represent an in-kernel vhost device backend.
57 pub trait VhostKernBackend: AsRawFd {
58     /// Associated type to access guest memory.
59     type AS: GuestAddressSpace;
60 
61     /// Get the object to access the guest's memory.
mem(&self) -> &Self::AS62     fn mem(&self) -> &Self::AS;
63 
64     /// Check whether the ring configuration is valid.
is_valid(&self, config_data: &VringConfigData) -> bool65     fn is_valid(&self, config_data: &VringConfigData) -> bool {
66         let queue_size = config_data.queue_size;
67         if queue_size > config_data.queue_max_size
68             || queue_size == 0
69             || (queue_size & (queue_size - 1)) != 0
70         {
71             return false;
72         }
73 
74         let m = self.mem().memory();
75         let desc_table_size = 16 * u64::from(queue_size) as GuestUsize;
76         let avail_ring_size = 6 + 2 * u64::from(queue_size) as GuestUsize;
77         let used_ring_size = 6 + 8 * u64::from(queue_size) as GuestUsize;
78         if GuestAddress(config_data.desc_table_addr)
79             .checked_add(desc_table_size)
80             .map_or(true, |v| !m.address_in_range(v))
81         {
82             return false;
83         }
84         if GuestAddress(config_data.avail_ring_addr)
85             .checked_add(avail_ring_size)
86             .map_or(true, |v| !m.address_in_range(v))
87         {
88             return false;
89         }
90         if GuestAddress(config_data.used_ring_addr)
91             .checked_add(used_ring_size)
92             .map_or(true, |v| !m.address_in_range(v))
93         {
94             return false;
95         }
96 
97         config_data.is_log_addr_valid()
98     }
99 }
100 
101 impl<T: VhostKernBackend> VhostBackend for T {
102     /// Get a bitmask of supported virtio/vhost features.
get_features(&self) -> Result<u64>103     fn get_features(&self) -> Result<u64> {
104         let mut avail_features: u64 = 0;
105         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
106         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_GET_FEATURES(), &mut avail_features) };
107         ioctl_result(ret, avail_features)
108     }
109 
110     /// Inform the vhost subsystem which features to enable. This should be a subset of
111     /// supported features from VHOST_GET_FEATURES.
112     ///
113     /// # Arguments
114     /// * `features` - Bitmask of features to set.
set_features(&self, features: u64) -> Result<()>115     fn set_features(&self, features: u64) -> Result<()> {
116         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
117         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_FEATURES(), &features) };
118         ioctl_result(ret, ())
119     }
120 
121     /// Set the current process as the owner of this file descriptor.
122     /// This must be run before any other vhost ioctls.
set_owner(&self) -> Result<()>123     fn set_owner(&self) -> Result<()> {
124         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
125         let ret = unsafe { ioctl(self, VHOST_SET_OWNER()) };
126         ioctl_result(ret, ())
127     }
128 
reset_owner(&self) -> Result<()>129     fn reset_owner(&self) -> Result<()> {
130         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
131         let ret = unsafe { ioctl(self, VHOST_RESET_OWNER()) };
132         ioctl_result(ret, ())
133     }
134 
135     /// Set the guest memory mappings for vhost to use.
set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()>136     fn set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()> {
137         if regions.is_empty() || regions.len() > VHOST_MAX_MEMORY_REGIONS {
138             return Err(Error::InvalidGuestMemory);
139         }
140 
141         let mut vhost_memory = VhostMemory::new(regions.len() as u16);
142         for (index, region) in regions.iter().enumerate() {
143             vhost_memory.set_region(
144                 index as u32,
145                 &vhost_memory_region {
146                     guest_phys_addr: region.guest_phys_addr,
147                     memory_size: region.memory_size,
148                     userspace_addr: region.userspace_addr,
149                     flags_padding: 0u64,
150                 },
151             )?;
152         }
153 
154         // SAFETY: This ioctl is called with a pointer that is valid for the lifetime
155         // of this function. The kernel will make its own copy of the memory
156         // tables. As always, check the return value.
157         let ret = unsafe { ioctl_with_ptr(self, VHOST_SET_MEM_TABLE(), vhost_memory.as_ptr()) };
158         ioctl_result(ret, ())
159     }
160 
161     /// Set base address for page modification logging.
162     ///
163     /// # Arguments
164     /// * `base` - Base address for page modification logging.
set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()>165     fn set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()> {
166         if region.is_some() {
167             return Err(Error::LogAddress);
168         }
169 
170         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
171         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_BASE(), &base) };
172         ioctl_result(ret, ())
173     }
174 
175     /// Specify an eventfd file descriptor to signal on log write.
set_log_fd(&self, fd: RawFd) -> Result<()>176     fn set_log_fd(&self, fd: RawFd) -> Result<()> {
177         let val: i32 = fd;
178         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
179         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_FD(), &val) };
180         ioctl_result(ret, ())
181     }
182 
183     /// Set the number of descriptors in the vring.
184     ///
185     /// # Arguments
186     /// * `queue_index` - Index of the queue to set descriptor count for.
187     /// * `num` - Number of descriptors in the queue.
set_vring_num(&self, queue_index: usize, num: u16) -> Result<()>188     fn set_vring_num(&self, queue_index: usize, num: u16) -> Result<()> {
189         let vring_state = vhost_vring_state {
190             index: queue_index as u32,
191             num: u32::from(num),
192         };
193 
194         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
195         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_NUM(), &vring_state) };
196         ioctl_result(ret, ())
197     }
198 
199     /// Set the addresses for a given vring.
200     ///
201     /// # Arguments
202     /// * `queue_index` - Index of the queue to set addresses for.
203     /// * `config_data` - Vring config data, addresses of desc_table, avail_ring
204     ///     and used_ring are in the guest address space.
set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()>205     fn set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()> {
206         if !self.is_valid(config_data) {
207             return Err(Error::InvalidQueue);
208         }
209 
210         // The addresses are converted into the host address space.
211         let vring_addr = config_data.to_vhost_vring_addr(queue_index, self.mem())?;
212 
213         // SAFETY: This ioctl is called on a valid vhost fd and has its
214         // return value checked.
215         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ADDR(), &vring_addr) };
216         ioctl_result(ret, ())
217     }
218 
219     /// Set the first index to look for available descriptors.
220     ///
221     /// # Arguments
222     /// * `queue_index` - Index of the queue to modify.
223     /// * `num` - Index where available descriptors start.
set_vring_base(&self, queue_index: usize, base: u16) -> Result<()>224     fn set_vring_base(&self, queue_index: usize, base: u16) -> Result<()> {
225         let vring_state = vhost_vring_state {
226             index: queue_index as u32,
227             num: u32::from(base),
228         };
229 
230         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
231         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_BASE(), &vring_state) };
232         ioctl_result(ret, ())
233     }
234 
235     /// Get a bitmask of supported virtio/vhost features.
get_vring_base(&self, queue_index: usize) -> Result<u32>236     fn get_vring_base(&self, queue_index: usize) -> Result<u32> {
237         let vring_state = vhost_vring_state {
238             index: queue_index as u32,
239             num: 0,
240         };
241         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
242         let ret = unsafe { ioctl_with_ref(self, VHOST_GET_VRING_BASE(), &vring_state) };
243         ioctl_result(ret, vring_state.num)
244     }
245 
246     /// Set the eventfd to trigger when buffers have been used by the host.
247     ///
248     /// # Arguments
249     /// * `queue_index` - Index of the queue to modify.
250     /// * `fd` - EventFd to trigger.
set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()>251     fn set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
252         let vring_file = vhost_vring_file {
253             index: queue_index as u32,
254             fd: fd.as_raw_fd(),
255         };
256 
257         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
258         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_CALL(), &vring_file) };
259         ioctl_result(ret, ())
260     }
261 
262     /// Set the eventfd that will be signaled by the guest when buffers are
263     /// available for the host to process.
264     ///
265     /// # Arguments
266     /// * `queue_index` - Index of the queue to modify.
267     /// * `fd` - EventFd that will be signaled from guest.
set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()>268     fn set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
269         let vring_file = vhost_vring_file {
270             index: queue_index as u32,
271             fd: fd.as_raw_fd(),
272         };
273 
274         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
275         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_KICK(), &vring_file) };
276         ioctl_result(ret, ())
277     }
278 
279     /// Set the eventfd to signal an error from the vhost backend.
280     ///
281     /// # Arguments
282     /// * `queue_index` - Index of the queue to modify.
283     /// * `fd` - EventFd that will be signaled from the backend.
set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()>284     fn set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
285         let vring_file = vhost_vring_file {
286             index: queue_index as u32,
287             fd: fd.as_raw_fd(),
288         };
289 
290         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
291         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ERR(), &vring_file) };
292         ioctl_result(ret, ())
293     }
294 }
295 
296 /// Interface to handle in-kernel backend features.
297 pub trait VhostKernFeatures: Sized + AsRawFd {
298     /// Get features acked with the vhost backend.
get_backend_features_acked(&self) -> u64299     fn get_backend_features_acked(&self) -> u64;
300 
301     /// Set features acked with the vhost backend.
set_backend_features_acked(&mut self, features: u64)302     fn set_backend_features_acked(&mut self, features: u64);
303 
304     /// Get a bitmask of supported vhost backend features.
get_backend_features(&self) -> Result<u64>305     fn get_backend_features(&self) -> Result<u64> {
306         let mut avail_features: u64 = 0;
307 
308         let ret =
309             // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
310             unsafe { ioctl_with_mut_ref(self, VHOST_GET_BACKEND_FEATURES(), &mut avail_features) };
311         ioctl_result(ret, avail_features)
312     }
313 
314     /// Inform the vhost subsystem which backend features to enable. This should
315     /// be a subset of supported features from VHOST_GET_BACKEND_FEATURES.
316     ///
317     /// # Arguments
318     /// * `features` - Bitmask of features to set.
set_backend_features(&mut self, features: u64) -> Result<()>319     fn set_backend_features(&mut self, features: u64) -> Result<()> {
320         // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
321         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_BACKEND_FEATURES(), &features) };
322 
323         if ret >= 0 {
324             self.set_backend_features_acked(features);
325         }
326 
327         ioctl_result(ret, ())
328     }
329 }
330 
331 /// Handle IOTLB messeges for in-kernel vhost device backend.
332 impl<I: VhostKernBackend + VhostKernFeatures> VhostIotlbBackend for I {
333     /// Send an IOTLB message to the in-kernel vhost backend.
334     ///
335     /// # Arguments
336     /// * `msg` - IOTLB message to send.
send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()>337     fn send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()> {
338         let ret: ssize_t;
339 
340         if self.get_backend_features_acked() & (1 << VHOST_BACKEND_F_IOTLB_MSG_V2) != 0 {
341             let mut msg_v2 = vhost_msg_v2 {
342                 type_: VHOST_IOTLB_MSG_V2,
343                 ..Default::default()
344             };
345 
346             msg_v2.__bindgen_anon_1.iotlb.iova = msg.iova;
347             msg_v2.__bindgen_anon_1.iotlb.size = msg.size;
348             msg_v2.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
349             msg_v2.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
350             msg_v2.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
351 
352             // SAFETY: This is safe because we are using a valid vhost fd, and
353             // a valid pointer and size to the vhost_msg_v2 structure.
354             ret = unsafe {
355                 write(
356                     self.as_raw_fd(),
357                     &msg_v2 as *const vhost_msg_v2 as *const c_void,
358                     mem::size_of::<vhost_msg_v2>(),
359                 )
360             };
361         } else {
362             let mut msg_v1 = vhost_msg {
363                 type_: VHOST_IOTLB_MSG,
364                 ..Default::default()
365             };
366 
367             msg_v1.__bindgen_anon_1.iotlb.iova = msg.iova;
368             msg_v1.__bindgen_anon_1.iotlb.size = msg.size;
369             msg_v1.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
370             msg_v1.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
371             msg_v1.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
372 
373             // SAFETY: This is safe because we are using a valid vhost fd, and
374             // a valid pointer and size to the vhost_msg structure.
375             ret = unsafe {
376                 write(
377                     self.as_raw_fd(),
378                     &msg_v1 as *const vhost_msg as *const c_void,
379                     mem::size_of::<vhost_msg>(),
380                 )
381             };
382         }
383 
384         io_result(ret, ())
385     }
386 }
387 
388 impl VhostIotlbMsgParser for vhost_msg {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>389     fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
390         if self.type_ != VHOST_IOTLB_MSG {
391             return Err(Error::InvalidIotlbMsg);
392         }
393 
394         // SAFETY: We trust the kernel to return a structure with the union
395         // fields properly initialized. We are sure it is a vhost_msg, because
396         // we checked that `self.type_` is VHOST_IOTLB_MSG.
397         unsafe {
398             if self.__bindgen_anon_1.iotlb.type_ == 0 {
399                 return Err(Error::InvalidIotlbMsg);
400             }
401 
402             msg.iova = self.__bindgen_anon_1.iotlb.iova;
403             msg.size = self.__bindgen_anon_1.iotlb.size;
404             msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
405             msg.perm = mem::transmute(self.__bindgen_anon_1.iotlb.perm);
406             msg.msg_type = mem::transmute(self.__bindgen_anon_1.iotlb.type_);
407         }
408 
409         Ok(())
410     }
411 }
412 
413 impl VhostIotlbMsgParser for vhost_msg_v2 {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>414     fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
415         if self.type_ != VHOST_IOTLB_MSG_V2 {
416             return Err(Error::InvalidIotlbMsg);
417         }
418 
419         // SAFETY: We trust the kernel to return a structure with the union
420         // fields properly initialized. We are sure it is a vhost_msg_v2, because
421         // we checked that `self.type_` is VHOST_IOTLB_MSG_V2.
422         unsafe {
423             if self.__bindgen_anon_1.iotlb.type_ == 0 {
424                 return Err(Error::InvalidIotlbMsg);
425             }
426 
427             msg.iova = self.__bindgen_anon_1.iotlb.iova;
428             msg.size = self.__bindgen_anon_1.iotlb.size;
429             msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
430             msg.perm = mem::transmute(self.__bindgen_anon_1.iotlb.perm);
431             msg.msg_type = mem::transmute(self.__bindgen_anon_1.iotlb.type_);
432         }
433 
434         Ok(())
435     }
436 }
437 
438 impl VringConfigData {
439     /// Convert the config (guest address space) into vhost_vring_addr
440     /// (host address space).
to_vhost_vring_addr<AS: GuestAddressSpace>( &self, queue_index: usize, mem: &AS, ) -> Result<vhost_vring_addr>441     pub fn to_vhost_vring_addr<AS: GuestAddressSpace>(
442         &self,
443         queue_index: usize,
444         mem: &AS,
445     ) -> Result<vhost_vring_addr> {
446         let desc_addr = mem
447             .memory()
448             .get_host_address(GuestAddress(self.desc_table_addr))
449             .map_err(|_| Error::DescriptorTableAddress)?;
450         let avail_addr = mem
451             .memory()
452             .get_host_address(GuestAddress(self.avail_ring_addr))
453             .map_err(|_| Error::AvailAddress)?;
454         let used_addr = mem
455             .memory()
456             .get_host_address(GuestAddress(self.used_ring_addr))
457             .map_err(|_| Error::UsedAddress)?;
458         Ok(vhost_vring_addr {
459             index: queue_index as u32,
460             flags: self.flags,
461             desc_user_addr: desc_addr as u64,
462             used_user_addr: used_addr as u64,
463             avail_user_addr: avail_addr as u64,
464             log_guest_addr: self.get_log_addr(),
465         })
466     }
467 }
468