1 // Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 //
4 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE-BSD-Google file.
7
8 //! Traits and structs to control Linux in-kernel vhost drivers.
9 //!
10 //! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
11 //! communicate with userspace applications. This sub module provides ioctl based interfaces to
12 //! control the in-kernel net, scsi, vsock vhost drivers.
13
14 use std::mem;
15 use std::os::unix::io::{AsRawFd, RawFd};
16
17 use libc::{c_void, ssize_t, write};
18
19 use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize};
20 use vmm_sys_util::eventfd::EventFd;
21 use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref};
22
23 use super::{
24 Error, Result, VhostBackend, VhostIotlbBackend, VhostIotlbMsg, VhostIotlbMsgParser,
25 VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo, VringConfigData, VHOST_MAX_MEMORY_REGIONS,
26 };
27
28 pub mod vhost_binding;
29 use self::vhost_binding::*;
30
31 #[cfg(feature = "vhost-net")]
32 pub mod net;
33 #[cfg(feature = "vhost-vdpa")]
34 pub mod vdpa;
35 #[cfg(feature = "vhost-vsock")]
36 pub mod vsock;
37
38 #[inline]
ioctl_result<T>(rc: i32, res: T) -> Result<T>39 fn ioctl_result<T>(rc: i32, res: T) -> Result<T> {
40 if rc < 0 {
41 Err(Error::IoctlError(std::io::Error::last_os_error()))
42 } else {
43 Ok(res)
44 }
45 }
46
47 #[inline]
io_result<T>(rc: isize, res: T) -> Result<T>48 fn io_result<T>(rc: isize, res: T) -> Result<T> {
49 if rc < 0 {
50 Err(Error::IOError(std::io::Error::last_os_error()))
51 } else {
52 Ok(res)
53 }
54 }
55
56 /// Represent an in-kernel vhost device backend.
57 pub trait VhostKernBackend: AsRawFd {
58 /// Associated type to access guest memory.
59 type AS: GuestAddressSpace;
60
61 /// Get the object to access the guest's memory.
mem(&self) -> &Self::AS62 fn mem(&self) -> &Self::AS;
63
64 /// Check whether the ring configuration is valid.
is_valid(&self, config_data: &VringConfigData) -> bool65 fn is_valid(&self, config_data: &VringConfigData) -> bool {
66 let queue_size = config_data.queue_size;
67 if queue_size > config_data.queue_max_size
68 || queue_size == 0
69 || (queue_size & (queue_size - 1)) != 0
70 {
71 return false;
72 }
73
74 let m = self.mem().memory();
75 let desc_table_size = 16 * u64::from(queue_size) as GuestUsize;
76 let avail_ring_size = 6 + 2 * u64::from(queue_size) as GuestUsize;
77 let used_ring_size = 6 + 8 * u64::from(queue_size) as GuestUsize;
78 if GuestAddress(config_data.desc_table_addr)
79 .checked_add(desc_table_size)
80 .map_or(true, |v| !m.address_in_range(v))
81 {
82 return false;
83 }
84 if GuestAddress(config_data.avail_ring_addr)
85 .checked_add(avail_ring_size)
86 .map_or(true, |v| !m.address_in_range(v))
87 {
88 return false;
89 }
90 if GuestAddress(config_data.used_ring_addr)
91 .checked_add(used_ring_size)
92 .map_or(true, |v| !m.address_in_range(v))
93 {
94 return false;
95 }
96
97 config_data.is_log_addr_valid()
98 }
99 }
100
101 impl<T: VhostKernBackend> VhostBackend for T {
102 /// Get a bitmask of supported virtio/vhost features.
get_features(&self) -> Result<u64>103 fn get_features(&self) -> Result<u64> {
104 let mut avail_features: u64 = 0;
105 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
106 let ret = unsafe { ioctl_with_mut_ref(self, VHOST_GET_FEATURES(), &mut avail_features) };
107 ioctl_result(ret, avail_features)
108 }
109
110 /// Inform the vhost subsystem which features to enable. This should be a subset of
111 /// supported features from VHOST_GET_FEATURES.
112 ///
113 /// # Arguments
114 /// * `features` - Bitmask of features to set.
set_features(&self, features: u64) -> Result<()>115 fn set_features(&self, features: u64) -> Result<()> {
116 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
117 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_FEATURES(), &features) };
118 ioctl_result(ret, ())
119 }
120
121 /// Set the current process as the owner of this file descriptor.
122 /// This must be run before any other vhost ioctls.
set_owner(&self) -> Result<()>123 fn set_owner(&self) -> Result<()> {
124 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
125 let ret = unsafe { ioctl(self, VHOST_SET_OWNER()) };
126 ioctl_result(ret, ())
127 }
128
reset_owner(&self) -> Result<()>129 fn reset_owner(&self) -> Result<()> {
130 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
131 let ret = unsafe { ioctl(self, VHOST_RESET_OWNER()) };
132 ioctl_result(ret, ())
133 }
134
135 /// Set the guest memory mappings for vhost to use.
set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()>136 fn set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<()> {
137 if regions.is_empty() || regions.len() > VHOST_MAX_MEMORY_REGIONS {
138 return Err(Error::InvalidGuestMemory);
139 }
140
141 let mut vhost_memory = VhostMemory::new(regions.len() as u16);
142 for (index, region) in regions.iter().enumerate() {
143 vhost_memory.set_region(
144 index as u32,
145 &vhost_memory_region {
146 guest_phys_addr: region.guest_phys_addr,
147 memory_size: region.memory_size,
148 userspace_addr: region.userspace_addr,
149 flags_padding: 0u64,
150 },
151 )?;
152 }
153
154 // SAFETY: This ioctl is called with a pointer that is valid for the lifetime
155 // of this function. The kernel will make its own copy of the memory
156 // tables. As always, check the return value.
157 let ret = unsafe { ioctl_with_ptr(self, VHOST_SET_MEM_TABLE(), vhost_memory.as_ptr()) };
158 ioctl_result(ret, ())
159 }
160
161 /// Set base address for page modification logging.
162 ///
163 /// # Arguments
164 /// * `base` - Base address for page modification logging.
set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()>165 fn set_log_base(&self, base: u64, region: Option<VhostUserDirtyLogRegion>) -> Result<()> {
166 if region.is_some() {
167 return Err(Error::LogAddress);
168 }
169
170 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
171 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_BASE(), &base) };
172 ioctl_result(ret, ())
173 }
174
175 /// Specify an eventfd file descriptor to signal on log write.
set_log_fd(&self, fd: RawFd) -> Result<()>176 fn set_log_fd(&self, fd: RawFd) -> Result<()> {
177 let val: i32 = fd;
178 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
179 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_LOG_FD(), &val) };
180 ioctl_result(ret, ())
181 }
182
183 /// Set the number of descriptors in the vring.
184 ///
185 /// # Arguments
186 /// * `queue_index` - Index of the queue to set descriptor count for.
187 /// * `num` - Number of descriptors in the queue.
set_vring_num(&self, queue_index: usize, num: u16) -> Result<()>188 fn set_vring_num(&self, queue_index: usize, num: u16) -> Result<()> {
189 let vring_state = vhost_vring_state {
190 index: queue_index as u32,
191 num: u32::from(num),
192 };
193
194 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
195 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_NUM(), &vring_state) };
196 ioctl_result(ret, ())
197 }
198
199 /// Set the addresses for a given vring.
200 ///
201 /// # Arguments
202 /// * `queue_index` - Index of the queue to set addresses for.
203 /// * `config_data` - Vring config data, addresses of desc_table, avail_ring
204 /// and used_ring are in the guest address space.
set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()>205 fn set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()> {
206 if !self.is_valid(config_data) {
207 return Err(Error::InvalidQueue);
208 }
209
210 // The addresses are converted into the host address space.
211 let vring_addr = config_data.to_vhost_vring_addr(queue_index, self.mem())?;
212
213 // SAFETY: This ioctl is called on a valid vhost fd and has its
214 // return value checked.
215 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ADDR(), &vring_addr) };
216 ioctl_result(ret, ())
217 }
218
219 /// Set the first index to look for available descriptors.
220 ///
221 /// # Arguments
222 /// * `queue_index` - Index of the queue to modify.
223 /// * `num` - Index where available descriptors start.
set_vring_base(&self, queue_index: usize, base: u16) -> Result<()>224 fn set_vring_base(&self, queue_index: usize, base: u16) -> Result<()> {
225 let vring_state = vhost_vring_state {
226 index: queue_index as u32,
227 num: u32::from(base),
228 };
229
230 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
231 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_BASE(), &vring_state) };
232 ioctl_result(ret, ())
233 }
234
235 /// Get a bitmask of supported virtio/vhost features.
get_vring_base(&self, queue_index: usize) -> Result<u32>236 fn get_vring_base(&self, queue_index: usize) -> Result<u32> {
237 let vring_state = vhost_vring_state {
238 index: queue_index as u32,
239 num: 0,
240 };
241 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
242 let ret = unsafe { ioctl_with_ref(self, VHOST_GET_VRING_BASE(), &vring_state) };
243 ioctl_result(ret, vring_state.num)
244 }
245
246 /// Set the eventfd to trigger when buffers have been used by the host.
247 ///
248 /// # Arguments
249 /// * `queue_index` - Index of the queue to modify.
250 /// * `fd` - EventFd to trigger.
set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()>251 fn set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
252 let vring_file = vhost_vring_file {
253 index: queue_index as u32,
254 fd: fd.as_raw_fd(),
255 };
256
257 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
258 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_CALL(), &vring_file) };
259 ioctl_result(ret, ())
260 }
261
262 /// Set the eventfd that will be signaled by the guest when buffers are
263 /// available for the host to process.
264 ///
265 /// # Arguments
266 /// * `queue_index` - Index of the queue to modify.
267 /// * `fd` - EventFd that will be signaled from guest.
set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()>268 fn set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
269 let vring_file = vhost_vring_file {
270 index: queue_index as u32,
271 fd: fd.as_raw_fd(),
272 };
273
274 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
275 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_KICK(), &vring_file) };
276 ioctl_result(ret, ())
277 }
278
279 /// Set the eventfd to signal an error from the vhost backend.
280 ///
281 /// # Arguments
282 /// * `queue_index` - Index of the queue to modify.
283 /// * `fd` - EventFd that will be signaled from the backend.
set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()>284 fn set_vring_err(&self, queue_index: usize, fd: &EventFd) -> Result<()> {
285 let vring_file = vhost_vring_file {
286 index: queue_index as u32,
287 fd: fd.as_raw_fd(),
288 };
289
290 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
291 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ERR(), &vring_file) };
292 ioctl_result(ret, ())
293 }
294 }
295
296 /// Interface to handle in-kernel backend features.
297 pub trait VhostKernFeatures: Sized + AsRawFd {
298 /// Get features acked with the vhost backend.
get_backend_features_acked(&self) -> u64299 fn get_backend_features_acked(&self) -> u64;
300
301 /// Set features acked with the vhost backend.
set_backend_features_acked(&mut self, features: u64)302 fn set_backend_features_acked(&mut self, features: u64);
303
304 /// Get a bitmask of supported vhost backend features.
get_backend_features(&self) -> Result<u64>305 fn get_backend_features(&self) -> Result<u64> {
306 let mut avail_features: u64 = 0;
307
308 let ret =
309 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
310 unsafe { ioctl_with_mut_ref(self, VHOST_GET_BACKEND_FEATURES(), &mut avail_features) };
311 ioctl_result(ret, avail_features)
312 }
313
314 /// Inform the vhost subsystem which backend features to enable. This should
315 /// be a subset of supported features from VHOST_GET_BACKEND_FEATURES.
316 ///
317 /// # Arguments
318 /// * `features` - Bitmask of features to set.
set_backend_features(&mut self, features: u64) -> Result<()>319 fn set_backend_features(&mut self, features: u64) -> Result<()> {
320 // SAFETY: This ioctl is called on a valid vhost fd and has its return value checked.
321 let ret = unsafe { ioctl_with_ref(self, VHOST_SET_BACKEND_FEATURES(), &features) };
322
323 if ret >= 0 {
324 self.set_backend_features_acked(features);
325 }
326
327 ioctl_result(ret, ())
328 }
329 }
330
331 /// Handle IOTLB messeges for in-kernel vhost device backend.
332 impl<I: VhostKernBackend + VhostKernFeatures> VhostIotlbBackend for I {
333 /// Send an IOTLB message to the in-kernel vhost backend.
334 ///
335 /// # Arguments
336 /// * `msg` - IOTLB message to send.
send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()>337 fn send_iotlb_msg(&self, msg: &VhostIotlbMsg) -> Result<()> {
338 let ret: ssize_t;
339
340 if self.get_backend_features_acked() & (1 << VHOST_BACKEND_F_IOTLB_MSG_V2) != 0 {
341 let mut msg_v2 = vhost_msg_v2 {
342 type_: VHOST_IOTLB_MSG_V2,
343 ..Default::default()
344 };
345
346 msg_v2.__bindgen_anon_1.iotlb.iova = msg.iova;
347 msg_v2.__bindgen_anon_1.iotlb.size = msg.size;
348 msg_v2.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
349 msg_v2.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
350 msg_v2.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
351
352 // SAFETY: This is safe because we are using a valid vhost fd, and
353 // a valid pointer and size to the vhost_msg_v2 structure.
354 ret = unsafe {
355 write(
356 self.as_raw_fd(),
357 &msg_v2 as *const vhost_msg_v2 as *const c_void,
358 mem::size_of::<vhost_msg_v2>(),
359 )
360 };
361 } else {
362 let mut msg_v1 = vhost_msg {
363 type_: VHOST_IOTLB_MSG,
364 ..Default::default()
365 };
366
367 msg_v1.__bindgen_anon_1.iotlb.iova = msg.iova;
368 msg_v1.__bindgen_anon_1.iotlb.size = msg.size;
369 msg_v1.__bindgen_anon_1.iotlb.uaddr = msg.userspace_addr;
370 msg_v1.__bindgen_anon_1.iotlb.perm = msg.perm as u8;
371 msg_v1.__bindgen_anon_1.iotlb.type_ = msg.msg_type as u8;
372
373 // SAFETY: This is safe because we are using a valid vhost fd, and
374 // a valid pointer and size to the vhost_msg structure.
375 ret = unsafe {
376 write(
377 self.as_raw_fd(),
378 &msg_v1 as *const vhost_msg as *const c_void,
379 mem::size_of::<vhost_msg>(),
380 )
381 };
382 }
383
384 io_result(ret, ())
385 }
386 }
387
388 impl VhostIotlbMsgParser for vhost_msg {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>389 fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
390 if self.type_ != VHOST_IOTLB_MSG {
391 return Err(Error::InvalidIotlbMsg);
392 }
393
394 // SAFETY: We trust the kernel to return a structure with the union
395 // fields properly initialized. We are sure it is a vhost_msg, because
396 // we checked that `self.type_` is VHOST_IOTLB_MSG.
397 unsafe {
398 if self.__bindgen_anon_1.iotlb.type_ == 0 {
399 return Err(Error::InvalidIotlbMsg);
400 }
401
402 msg.iova = self.__bindgen_anon_1.iotlb.iova;
403 msg.size = self.__bindgen_anon_1.iotlb.size;
404 msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
405 msg.perm = mem::transmute(self.__bindgen_anon_1.iotlb.perm);
406 msg.msg_type = mem::transmute(self.__bindgen_anon_1.iotlb.type_);
407 }
408
409 Ok(())
410 }
411 }
412
413 impl VhostIotlbMsgParser for vhost_msg_v2 {
parse(&self, msg: &mut VhostIotlbMsg) -> Result<()>414 fn parse(&self, msg: &mut VhostIotlbMsg) -> Result<()> {
415 if self.type_ != VHOST_IOTLB_MSG_V2 {
416 return Err(Error::InvalidIotlbMsg);
417 }
418
419 // SAFETY: We trust the kernel to return a structure with the union
420 // fields properly initialized. We are sure it is a vhost_msg_v2, because
421 // we checked that `self.type_` is VHOST_IOTLB_MSG_V2.
422 unsafe {
423 if self.__bindgen_anon_1.iotlb.type_ == 0 {
424 return Err(Error::InvalidIotlbMsg);
425 }
426
427 msg.iova = self.__bindgen_anon_1.iotlb.iova;
428 msg.size = self.__bindgen_anon_1.iotlb.size;
429 msg.userspace_addr = self.__bindgen_anon_1.iotlb.uaddr;
430 msg.perm = mem::transmute(self.__bindgen_anon_1.iotlb.perm);
431 msg.msg_type = mem::transmute(self.__bindgen_anon_1.iotlb.type_);
432 }
433
434 Ok(())
435 }
436 }
437
438 impl VringConfigData {
439 /// Convert the config (guest address space) into vhost_vring_addr
440 /// (host address space).
to_vhost_vring_addr<AS: GuestAddressSpace>( &self, queue_index: usize, mem: &AS, ) -> Result<vhost_vring_addr>441 pub fn to_vhost_vring_addr<AS: GuestAddressSpace>(
442 &self,
443 queue_index: usize,
444 mem: &AS,
445 ) -> Result<vhost_vring_addr> {
446 let desc_addr = mem
447 .memory()
448 .get_host_address(GuestAddress(self.desc_table_addr))
449 .map_err(|_| Error::DescriptorTableAddress)?;
450 let avail_addr = mem
451 .memory()
452 .get_host_address(GuestAddress(self.avail_ring_addr))
453 .map_err(|_| Error::AvailAddress)?;
454 let used_addr = mem
455 .memory()
456 .get_host_address(GuestAddress(self.used_ring_addr))
457 .map_err(|_| Error::UsedAddress)?;
458 Ok(vhost_vring_addr {
459 index: queue_index as u32,
460 flags: self.flags,
461 desc_user_addr: desc_addr as u64,
462 used_user_addr: used_addr as u64,
463 avail_user_addr: avail_addr as u64,
464 log_guest_addr: self.get_log_addr(),
465 })
466 }
467 }
468