1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #include <linux/iommu.h>
5 #include <linux/iommufd.h>
6 #include <linux/slab.h>
7 #include <uapi/linux/iommufd.h>
8 
9 #include "../iommu-priv.h"
10 #include "io_pagetable.h"
11 #include "iommufd_private.h"
12 
13 static bool allow_unsafe_interrupts;
14 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
15 MODULE_PARM_DESC(
16 	allow_unsafe_interrupts,
17 	"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
18 	"the MSI interrupt window. Enabling this is a security weakness.");
19 
iommufd_group_release(struct kref * kref)20 static void iommufd_group_release(struct kref *kref)
21 {
22 	struct iommufd_group *igroup =
23 		container_of(kref, struct iommufd_group, ref);
24 
25 	WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
26 
27 	xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
28 		   NULL, GFP_KERNEL);
29 	iommu_group_put(igroup->group);
30 	mutex_destroy(&igroup->lock);
31 	kfree(igroup);
32 }
33 
iommufd_put_group(struct iommufd_group * group)34 static void iommufd_put_group(struct iommufd_group *group)
35 {
36 	kref_put(&group->ref, iommufd_group_release);
37 }
38 
iommufd_group_try_get(struct iommufd_group * igroup,struct iommu_group * group)39 static bool iommufd_group_try_get(struct iommufd_group *igroup,
40 				  struct iommu_group *group)
41 {
42 	if (!igroup)
43 		return false;
44 	/*
45 	 * group ID's cannot be re-used until the group is put back which does
46 	 * not happen if we could get an igroup pointer under the xa_lock.
47 	 */
48 	if (WARN_ON(igroup->group != group))
49 		return false;
50 	return kref_get_unless_zero(&igroup->ref);
51 }
52 
53 /*
54  * iommufd needs to store some more data for each iommu_group, we keep a
55  * parallel xarray indexed by iommu_group id to hold this instead of putting it
56  * in the core structure. To keep things simple the iommufd_group memory is
57  * unique within the iommufd_ctx. This makes it easy to check there are no
58  * memory leaks.
59  */
iommufd_get_group(struct iommufd_ctx * ictx,struct device * dev)60 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
61 					       struct device *dev)
62 {
63 	struct iommufd_group *new_igroup;
64 	struct iommufd_group *cur_igroup;
65 	struct iommufd_group *igroup;
66 	struct iommu_group *group;
67 	unsigned int id;
68 
69 	group = iommu_group_get(dev);
70 	if (!group)
71 		return ERR_PTR(-ENODEV);
72 
73 	id = iommu_group_id(group);
74 
75 	xa_lock(&ictx->groups);
76 	igroup = xa_load(&ictx->groups, id);
77 	if (iommufd_group_try_get(igroup, group)) {
78 		xa_unlock(&ictx->groups);
79 		iommu_group_put(group);
80 		return igroup;
81 	}
82 	xa_unlock(&ictx->groups);
83 
84 	new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
85 	if (!new_igroup) {
86 		iommu_group_put(group);
87 		return ERR_PTR(-ENOMEM);
88 	}
89 
90 	kref_init(&new_igroup->ref);
91 	mutex_init(&new_igroup->lock);
92 	INIT_LIST_HEAD(&new_igroup->device_list);
93 	new_igroup->sw_msi_start = PHYS_ADDR_MAX;
94 	/* group reference moves into new_igroup */
95 	new_igroup->group = group;
96 
97 	/*
98 	 * The ictx is not additionally refcounted here becase all objects using
99 	 * an igroup must put it before their destroy completes.
100 	 */
101 	new_igroup->ictx = ictx;
102 
103 	/*
104 	 * We dropped the lock so igroup is invalid. NULL is a safe and likely
105 	 * value to assume for the xa_cmpxchg algorithm.
106 	 */
107 	cur_igroup = NULL;
108 	xa_lock(&ictx->groups);
109 	while (true) {
110 		igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
111 				      GFP_KERNEL);
112 		if (xa_is_err(igroup)) {
113 			xa_unlock(&ictx->groups);
114 			iommufd_put_group(new_igroup);
115 			return ERR_PTR(xa_err(igroup));
116 		}
117 
118 		/* new_group was successfully installed */
119 		if (cur_igroup == igroup) {
120 			xa_unlock(&ictx->groups);
121 			return new_igroup;
122 		}
123 
124 		/* Check again if the current group is any good */
125 		if (iommufd_group_try_get(igroup, group)) {
126 			xa_unlock(&ictx->groups);
127 			iommufd_put_group(new_igroup);
128 			return igroup;
129 		}
130 		cur_igroup = igroup;
131 	}
132 }
133 
iommufd_device_destroy(struct iommufd_object * obj)134 void iommufd_device_destroy(struct iommufd_object *obj)
135 {
136 	struct iommufd_device *idev =
137 		container_of(obj, struct iommufd_device, obj);
138 
139 	iommu_device_release_dma_owner(idev->dev);
140 	iommufd_put_group(idev->igroup);
141 	if (!iommufd_selftest_is_mock_dev(idev->dev))
142 		iommufd_ctx_put(idev->ictx);
143 }
144 
145 /**
146  * iommufd_device_bind - Bind a physical device to an iommu fd
147  * @ictx: iommufd file descriptor
148  * @dev: Pointer to a physical device struct
149  * @id: Output ID number to return to userspace for this device
150  *
151  * A successful bind establishes an ownership over the device and returns
152  * struct iommufd_device pointer, otherwise returns error pointer.
153  *
154  * A driver using this API must set driver_managed_dma and must not touch
155  * the device until this routine succeeds and establishes ownership.
156  *
157  * Binding a PCI device places the entire RID under iommufd control.
158  *
159  * The caller must undo this with iommufd_device_unbind()
160  */
iommufd_device_bind(struct iommufd_ctx * ictx,struct device * dev,u32 * id)161 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
162 					   struct device *dev, u32 *id)
163 {
164 	struct iommufd_device *idev;
165 	struct iommufd_group *igroup;
166 	int rc;
167 
168 	/*
169 	 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
170 	 * to restore cache coherency.
171 	 */
172 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
173 		return ERR_PTR(-EINVAL);
174 
175 	igroup = iommufd_get_group(ictx, dev);
176 	if (IS_ERR(igroup))
177 		return ERR_CAST(igroup);
178 
179 	/*
180 	 * For historical compat with VFIO the insecure interrupt path is
181 	 * allowed if the module parameter is set. Secure/Isolated means that a
182 	 * MemWr operation from the device (eg a simple DMA) cannot trigger an
183 	 * interrupt outside this iommufd context.
184 	 */
185 	if (!iommufd_selftest_is_mock_dev(dev) &&
186 	    !iommu_group_has_isolated_msi(igroup->group)) {
187 		if (!allow_unsafe_interrupts) {
188 			rc = -EPERM;
189 			goto out_group_put;
190 		}
191 
192 		dev_warn(
193 			dev,
194 			"MSI interrupts are not secure, they cannot be isolated by the platform. "
195 			"Check that platform features like interrupt remapping are enabled. "
196 			"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
197 	}
198 
199 	rc = iommu_device_claim_dma_owner(dev, ictx);
200 	if (rc)
201 		goto out_group_put;
202 
203 	idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
204 	if (IS_ERR(idev)) {
205 		rc = PTR_ERR(idev);
206 		goto out_release_owner;
207 	}
208 	idev->ictx = ictx;
209 	if (!iommufd_selftest_is_mock_dev(dev))
210 		iommufd_ctx_get(ictx);
211 	idev->dev = dev;
212 	idev->enforce_cache_coherency =
213 		device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
214 	/* The calling driver is a user until iommufd_device_unbind() */
215 	refcount_inc(&idev->obj.users);
216 	/* igroup refcount moves into iommufd_device */
217 	idev->igroup = igroup;
218 	mutex_init(&idev->iopf_lock);
219 
220 	/*
221 	 * If the caller fails after this success it must call
222 	 * iommufd_unbind_device() which is safe since we hold this refcount.
223 	 * This also means the device is a leaf in the graph and no other object
224 	 * can take a reference on it.
225 	 */
226 	iommufd_object_finalize(ictx, &idev->obj);
227 	*id = idev->obj.id;
228 	return idev;
229 
230 out_release_owner:
231 	iommu_device_release_dma_owner(dev);
232 out_group_put:
233 	iommufd_put_group(igroup);
234 	return ERR_PTR(rc);
235 }
236 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD");
237 
238 /**
239  * iommufd_ctx_has_group - True if any device within the group is bound
240  *                         to the ictx
241  * @ictx: iommufd file descriptor
242  * @group: Pointer to a physical iommu_group struct
243  *
244  * True if any device within the group has been bound to this ictx, ex. via
245  * iommufd_device_bind(), therefore implying ictx ownership of the group.
246  */
iommufd_ctx_has_group(struct iommufd_ctx * ictx,struct iommu_group * group)247 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
248 {
249 	struct iommufd_object *obj;
250 	unsigned long index;
251 
252 	if (!ictx || !group)
253 		return false;
254 
255 	xa_lock(&ictx->objects);
256 	xa_for_each(&ictx->objects, index, obj) {
257 		if (obj->type == IOMMUFD_OBJ_DEVICE &&
258 		    container_of(obj, struct iommufd_device, obj)
259 				    ->igroup->group == group) {
260 			xa_unlock(&ictx->objects);
261 			return true;
262 		}
263 	}
264 	xa_unlock(&ictx->objects);
265 	return false;
266 }
267 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
268 
269 /**
270  * iommufd_device_unbind - Undo iommufd_device_bind()
271  * @idev: Device returned by iommufd_device_bind()
272  *
273  * Release the device from iommufd control. The DMA ownership will return back
274  * to unowned with DMA controlled by the DMA API. This invalidates the
275  * iommufd_device pointer, other APIs that consume it must not be called
276  * concurrently.
277  */
iommufd_device_unbind(struct iommufd_device * idev)278 void iommufd_device_unbind(struct iommufd_device *idev)
279 {
280 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
281 }
282 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
283 
iommufd_device_to_ictx(struct iommufd_device * idev)284 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
285 {
286 	return idev->ictx;
287 }
288 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD");
289 
iommufd_device_to_id(struct iommufd_device * idev)290 u32 iommufd_device_to_id(struct iommufd_device *idev)
291 {
292 	return idev->obj.id;
293 }
294 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
295 
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)296 static int iommufd_group_setup_msi(struct iommufd_group *igroup,
297 				   struct iommufd_hwpt_paging *hwpt_paging)
298 {
299 	phys_addr_t sw_msi_start = igroup->sw_msi_start;
300 	int rc;
301 
302 	/*
303 	 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
304 	 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
305 	 * the MSI window so iommu_dma_prepare_msi() can install pages into our
306 	 * domain after request_irq(). If it is not done interrupts will not
307 	 * work on this domain.
308 	 *
309 	 * FIXME: This is conceptually broken for iommufd since we want to allow
310 	 * userspace to change the domains, eg switch from an identity IOAS to a
311 	 * DMA IOAS. There is currently no way to create a MSI window that
312 	 * matches what the IRQ layer actually expects in a newly created
313 	 * domain.
314 	 */
315 	if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
316 		rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
317 					  sw_msi_start);
318 		if (rc)
319 			return rc;
320 
321 		/*
322 		 * iommu_get_msi_cookie() can only be called once per domain,
323 		 * it returns -EBUSY on later calls.
324 		 */
325 		hwpt_paging->msi_cookie = true;
326 	}
327 	return 0;
328 }
329 
330 static int
iommufd_device_attach_reserved_iova(struct iommufd_device * idev,struct iommufd_hwpt_paging * hwpt_paging)331 iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
332 				    struct iommufd_hwpt_paging *hwpt_paging)
333 {
334 	int rc;
335 
336 	lockdep_assert_held(&idev->igroup->lock);
337 
338 	rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
339 						 idev->dev,
340 						 &idev->igroup->sw_msi_start);
341 	if (rc)
342 		return rc;
343 
344 	if (list_empty(&idev->igroup->device_list)) {
345 		rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
346 		if (rc) {
347 			iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
348 						  idev->dev);
349 			return rc;
350 		}
351 	}
352 	return 0;
353 }
354 
355 /* The device attach/detach/replace helpers for attach_handle */
356 
357 /* Check if idev is attached to igroup->hwpt */
iommufd_device_is_attached(struct iommufd_device * idev)358 static bool iommufd_device_is_attached(struct iommufd_device *idev)
359 {
360 	struct iommufd_device *cur;
361 
362 	list_for_each_entry(cur, &idev->igroup->device_list, group_item)
363 		if (cur == idev)
364 			return true;
365 	return false;
366 }
367 
iommufd_hwpt_attach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)368 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
369 				      struct iommufd_device *idev)
370 {
371 	struct iommufd_attach_handle *handle;
372 	int rc;
373 
374 	lockdep_assert_held(&idev->igroup->lock);
375 
376 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
377 	if (!handle)
378 		return -ENOMEM;
379 
380 	if (hwpt->fault) {
381 		rc = iommufd_fault_iopf_enable(idev);
382 		if (rc)
383 			goto out_free_handle;
384 	}
385 
386 	handle->idev = idev;
387 	rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
388 				       &handle->handle);
389 	if (rc)
390 		goto out_disable_iopf;
391 
392 	return 0;
393 
394 out_disable_iopf:
395 	if (hwpt->fault)
396 		iommufd_fault_iopf_disable(idev);
397 out_free_handle:
398 	kfree(handle);
399 	return rc;
400 }
401 
402 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev)403 iommufd_device_get_attach_handle(struct iommufd_device *idev)
404 {
405 	struct iommu_attach_handle *handle;
406 
407 	lockdep_assert_held(&idev->igroup->lock);
408 
409 	handle =
410 		iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
411 	if (IS_ERR(handle))
412 		return NULL;
413 	return to_iommufd_handle(handle);
414 }
415 
iommufd_hwpt_detach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)416 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
417 				       struct iommufd_device *idev)
418 {
419 	struct iommufd_attach_handle *handle;
420 
421 	handle = iommufd_device_get_attach_handle(idev);
422 	iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
423 	if (hwpt->fault) {
424 		iommufd_auto_response_faults(hwpt, handle);
425 		iommufd_fault_iopf_disable(idev);
426 	}
427 	kfree(handle);
428 }
429 
iommufd_hwpt_replace_device(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)430 static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
431 				       struct iommufd_hw_pagetable *hwpt,
432 				       struct iommufd_hw_pagetable *old)
433 {
434 	struct iommufd_attach_handle *handle, *old_handle =
435 		iommufd_device_get_attach_handle(idev);
436 	int rc;
437 
438 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
439 	if (!handle)
440 		return -ENOMEM;
441 
442 	if (hwpt->fault && !old->fault) {
443 		rc = iommufd_fault_iopf_enable(idev);
444 		if (rc)
445 			goto out_free_handle;
446 	}
447 
448 	handle->idev = idev;
449 	rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain,
450 					&handle->handle);
451 	if (rc)
452 		goto out_disable_iopf;
453 
454 	if (old->fault) {
455 		iommufd_auto_response_faults(hwpt, old_handle);
456 		if (!hwpt->fault)
457 			iommufd_fault_iopf_disable(idev);
458 	}
459 	kfree(old_handle);
460 
461 	return 0;
462 
463 out_disable_iopf:
464 	if (hwpt->fault && !old->fault)
465 		iommufd_fault_iopf_disable(idev);
466 out_free_handle:
467 	kfree(handle);
468 	return rc;
469 }
470 
iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)471 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
472 				struct iommufd_device *idev)
473 {
474 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
475 	int rc;
476 
477 	mutex_lock(&idev->igroup->lock);
478 
479 	if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
480 		rc = -EINVAL;
481 		goto err_unlock;
482 	}
483 
484 	if (hwpt_paging) {
485 		rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
486 		if (rc)
487 			goto err_unlock;
488 	}
489 
490 	/*
491 	 * Only attach to the group once for the first device that is in the
492 	 * group. All the other devices will follow this attachment. The user
493 	 * should attach every device individually to the hwpt as the per-device
494 	 * reserved regions are only updated during individual device
495 	 * attachment.
496 	 */
497 	if (list_empty(&idev->igroup->device_list)) {
498 		rc = iommufd_hwpt_attach_device(hwpt, idev);
499 		if (rc)
500 			goto err_unresv;
501 		idev->igroup->hwpt = hwpt;
502 	}
503 	refcount_inc(&hwpt->obj.users);
504 	list_add_tail(&idev->group_item, &idev->igroup->device_list);
505 	mutex_unlock(&idev->igroup->lock);
506 	return 0;
507 err_unresv:
508 	if (hwpt_paging)
509 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
510 err_unlock:
511 	mutex_unlock(&idev->igroup->lock);
512 	return rc;
513 }
514 
515 struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device * idev)516 iommufd_hw_pagetable_detach(struct iommufd_device *idev)
517 {
518 	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
519 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
520 
521 	mutex_lock(&idev->igroup->lock);
522 	list_del(&idev->group_item);
523 	if (list_empty(&idev->igroup->device_list)) {
524 		iommufd_hwpt_detach_device(hwpt, idev);
525 		idev->igroup->hwpt = NULL;
526 	}
527 	if (hwpt_paging)
528 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
529 	mutex_unlock(&idev->igroup->lock);
530 
531 	/* Caller must destroy hwpt */
532 	return hwpt;
533 }
534 
535 static struct iommufd_hw_pagetable *
iommufd_device_do_attach(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)536 iommufd_device_do_attach(struct iommufd_device *idev,
537 			 struct iommufd_hw_pagetable *hwpt)
538 {
539 	int rc;
540 
541 	rc = iommufd_hw_pagetable_attach(hwpt, idev);
542 	if (rc)
543 		return ERR_PTR(rc);
544 	return NULL;
545 }
546 
547 static void
iommufd_group_remove_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)548 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
549 				   struct iommufd_hwpt_paging *hwpt_paging)
550 {
551 	struct iommufd_device *cur;
552 
553 	lockdep_assert_held(&igroup->lock);
554 
555 	list_for_each_entry(cur, &igroup->device_list, group_item)
556 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
557 }
558 
559 static int
iommufd_group_do_replace_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)560 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
561 				       struct iommufd_hwpt_paging *hwpt_paging)
562 {
563 	struct iommufd_hwpt_paging *old_hwpt_paging;
564 	struct iommufd_device *cur;
565 	int rc;
566 
567 	lockdep_assert_held(&igroup->lock);
568 
569 	old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
570 	if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
571 		list_for_each_entry(cur, &igroup->device_list, group_item) {
572 			rc = iopt_table_enforce_dev_resv_regions(
573 				&hwpt_paging->ioas->iopt, cur->dev, NULL);
574 			if (rc)
575 				goto err_unresv;
576 		}
577 	}
578 
579 	rc = iommufd_group_setup_msi(igroup, hwpt_paging);
580 	if (rc)
581 		goto err_unresv;
582 	return 0;
583 
584 err_unresv:
585 	iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
586 	return rc;
587 }
588 
589 static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)590 iommufd_device_do_replace(struct iommufd_device *idev,
591 			  struct iommufd_hw_pagetable *hwpt)
592 {
593 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
594 	struct iommufd_hwpt_paging *old_hwpt_paging;
595 	struct iommufd_group *igroup = idev->igroup;
596 	struct iommufd_hw_pagetable *old_hwpt;
597 	unsigned int num_devices;
598 	int rc;
599 
600 	mutex_lock(&idev->igroup->lock);
601 
602 	if (igroup->hwpt == NULL) {
603 		rc = -EINVAL;
604 		goto err_unlock;
605 	}
606 
607 	if (!iommufd_device_is_attached(idev)) {
608 		rc = -EINVAL;
609 		goto err_unlock;
610 	}
611 
612 	if (hwpt == igroup->hwpt) {
613 		mutex_unlock(&idev->igroup->lock);
614 		return NULL;
615 	}
616 
617 	old_hwpt = igroup->hwpt;
618 	if (hwpt_paging) {
619 		rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
620 		if (rc)
621 			goto err_unlock;
622 	}
623 
624 	rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
625 	if (rc)
626 		goto err_unresv;
627 
628 	old_hwpt_paging = find_hwpt_paging(old_hwpt);
629 	if (old_hwpt_paging &&
630 	    (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
631 		iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
632 
633 	igroup->hwpt = hwpt;
634 
635 	num_devices = list_count_nodes(&igroup->device_list);
636 	/*
637 	 * Move the refcounts held by the device_list to the new hwpt. Retain a
638 	 * refcount for this thread as the caller will free it.
639 	 */
640 	refcount_add(num_devices, &hwpt->obj.users);
641 	if (num_devices > 1)
642 		WARN_ON(refcount_sub_and_test(num_devices - 1,
643 					      &old_hwpt->obj.users));
644 	mutex_unlock(&idev->igroup->lock);
645 
646 	/* Caller must destroy old_hwpt */
647 	return old_hwpt;
648 err_unresv:
649 	if (hwpt_paging)
650 		iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
651 err_unlock:
652 	mutex_unlock(&idev->igroup->lock);
653 	return ERR_PTR(rc);
654 }
655 
656 typedef struct iommufd_hw_pagetable *(*attach_fn)(
657 	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
658 
659 /*
660  * When automatically managing the domains we search for a compatible domain in
661  * the iopt and if one is found use it, otherwise create a new domain.
662  * Automatic domain selection will never pick a manually created domain.
663  */
664 static struct iommufd_hw_pagetable *
iommufd_device_auto_get_domain(struct iommufd_device * idev,struct iommufd_ioas * ioas,u32 * pt_id,attach_fn do_attach)665 iommufd_device_auto_get_domain(struct iommufd_device *idev,
666 			       struct iommufd_ioas *ioas, u32 *pt_id,
667 			       attach_fn do_attach)
668 {
669 	/*
670 	 * iommufd_hw_pagetable_attach() is called by
671 	 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
672 	 * iommufd_device_do_attach(). So if we are in this mode then we prefer
673 	 * to use the immediate_attach path as it supports drivers that can't
674 	 * directly allocate a domain.
675 	 */
676 	bool immediate_attach = do_attach == iommufd_device_do_attach;
677 	struct iommufd_hw_pagetable *destroy_hwpt;
678 	struct iommufd_hwpt_paging *hwpt_paging;
679 	struct iommufd_hw_pagetable *hwpt;
680 
681 	/*
682 	 * There is no differentiation when domains are allocated, so any domain
683 	 * that is willing to attach to the device is interchangeable with any
684 	 * other.
685 	 */
686 	mutex_lock(&ioas->mutex);
687 	list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
688 		if (!hwpt_paging->auto_domain)
689 			continue;
690 
691 		hwpt = &hwpt_paging->common;
692 		if (!iommufd_lock_obj(&hwpt->obj))
693 			continue;
694 		destroy_hwpt = (*do_attach)(idev, hwpt);
695 		if (IS_ERR(destroy_hwpt)) {
696 			iommufd_put_object(idev->ictx, &hwpt->obj);
697 			/*
698 			 * -EINVAL means the domain is incompatible with the
699 			 * device. Other error codes should propagate to
700 			 * userspace as failure. Success means the domain is
701 			 * attached.
702 			 */
703 			if (PTR_ERR(destroy_hwpt) == -EINVAL)
704 				continue;
705 			goto out_unlock;
706 		}
707 		*pt_id = hwpt->obj.id;
708 		iommufd_put_object(idev->ictx, &hwpt->obj);
709 		goto out_unlock;
710 	}
711 
712 	hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
713 						immediate_attach, NULL);
714 	if (IS_ERR(hwpt_paging)) {
715 		destroy_hwpt = ERR_CAST(hwpt_paging);
716 		goto out_unlock;
717 	}
718 	hwpt = &hwpt_paging->common;
719 
720 	if (!immediate_attach) {
721 		destroy_hwpt = (*do_attach)(idev, hwpt);
722 		if (IS_ERR(destroy_hwpt))
723 			goto out_abort;
724 	} else {
725 		destroy_hwpt = NULL;
726 	}
727 
728 	hwpt_paging->auto_domain = true;
729 	*pt_id = hwpt->obj.id;
730 
731 	iommufd_object_finalize(idev->ictx, &hwpt->obj);
732 	mutex_unlock(&ioas->mutex);
733 	return destroy_hwpt;
734 
735 out_abort:
736 	iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
737 out_unlock:
738 	mutex_unlock(&ioas->mutex);
739 	return destroy_hwpt;
740 }
741 
iommufd_device_change_pt(struct iommufd_device * idev,u32 * pt_id,attach_fn do_attach)742 static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
743 				    attach_fn do_attach)
744 {
745 	struct iommufd_hw_pagetable *destroy_hwpt;
746 	struct iommufd_object *pt_obj;
747 
748 	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
749 	if (IS_ERR(pt_obj))
750 		return PTR_ERR(pt_obj);
751 
752 	switch (pt_obj->type) {
753 	case IOMMUFD_OBJ_HWPT_NESTED:
754 	case IOMMUFD_OBJ_HWPT_PAGING: {
755 		struct iommufd_hw_pagetable *hwpt =
756 			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
757 
758 		destroy_hwpt = (*do_attach)(idev, hwpt);
759 		if (IS_ERR(destroy_hwpt))
760 			goto out_put_pt_obj;
761 		break;
762 	}
763 	case IOMMUFD_OBJ_IOAS: {
764 		struct iommufd_ioas *ioas =
765 			container_of(pt_obj, struct iommufd_ioas, obj);
766 
767 		destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
768 							      do_attach);
769 		if (IS_ERR(destroy_hwpt))
770 			goto out_put_pt_obj;
771 		break;
772 	}
773 	default:
774 		destroy_hwpt = ERR_PTR(-EINVAL);
775 		goto out_put_pt_obj;
776 	}
777 	iommufd_put_object(idev->ictx, pt_obj);
778 
779 	/* This destruction has to be after we unlock everything */
780 	if (destroy_hwpt)
781 		iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
782 	return 0;
783 
784 out_put_pt_obj:
785 	iommufd_put_object(idev->ictx, pt_obj);
786 	return PTR_ERR(destroy_hwpt);
787 }
788 
789 /**
790  * iommufd_device_attach - Connect a device to an iommu_domain
791  * @idev: device to attach
792  * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
793  *         Output the IOMMUFD_OBJ_HWPT_PAGING ID
794  *
795  * This connects the device to an iommu_domain, either automatically or manually
796  * selected. Once this completes the device could do DMA.
797  *
798  * The caller should return the resulting pt_id back to userspace.
799  * This function is undone by calling iommufd_device_detach().
800  */
iommufd_device_attach(struct iommufd_device * idev,u32 * pt_id)801 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
802 {
803 	int rc;
804 
805 	rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
806 	if (rc)
807 		return rc;
808 
809 	/*
810 	 * Pairs with iommufd_device_detach() - catches caller bugs attempting
811 	 * to destroy a device with an attachment.
812 	 */
813 	refcount_inc(&idev->obj.users);
814 	return 0;
815 }
816 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
817 
818 /**
819  * iommufd_device_replace - Change the device's iommu_domain
820  * @idev: device to change
821  * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
822  *         Output the IOMMUFD_OBJ_HWPT_PAGING ID
823  *
824  * This is the same as::
825  *
826  *   iommufd_device_detach();
827  *   iommufd_device_attach();
828  *
829  * If it fails then no change is made to the attachment. The iommu driver may
830  * implement this so there is no disruption in translation. This can only be
831  * called if iommufd_device_attach() has already succeeded.
832  */
iommufd_device_replace(struct iommufd_device * idev,u32 * pt_id)833 int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
834 {
835 	return iommufd_device_change_pt(idev, pt_id,
836 					&iommufd_device_do_replace);
837 }
838 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
839 
840 /**
841  * iommufd_device_detach - Disconnect a device to an iommu_domain
842  * @idev: device to detach
843  *
844  * Undo iommufd_device_attach(). This disconnects the idev from the previously
845  * attached pt_id. The device returns back to a blocked DMA translation.
846  */
iommufd_device_detach(struct iommufd_device * idev)847 void iommufd_device_detach(struct iommufd_device *idev)
848 {
849 	struct iommufd_hw_pagetable *hwpt;
850 
851 	hwpt = iommufd_hw_pagetable_detach(idev);
852 	iommufd_hw_pagetable_put(idev->ictx, hwpt);
853 	refcount_dec(&idev->obj.users);
854 }
855 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD");
856 
857 /*
858  * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
859  * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
860  * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
861  */
iommufd_access_change_ioas(struct iommufd_access * access,struct iommufd_ioas * new_ioas)862 static int iommufd_access_change_ioas(struct iommufd_access *access,
863 				      struct iommufd_ioas *new_ioas)
864 {
865 	u32 iopt_access_list_id = access->iopt_access_list_id;
866 	struct iommufd_ioas *cur_ioas = access->ioas;
867 	int rc;
868 
869 	lockdep_assert_held(&access->ioas_lock);
870 
871 	/* We are racing with a concurrent detach, bail */
872 	if (cur_ioas != access->ioas_unpin)
873 		return -EBUSY;
874 
875 	if (cur_ioas == new_ioas)
876 		return 0;
877 
878 	/*
879 	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
880 	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
881 	 */
882 	access->ioas = NULL;
883 
884 	if (new_ioas) {
885 		rc = iopt_add_access(&new_ioas->iopt, access);
886 		if (rc) {
887 			access->ioas = cur_ioas;
888 			return rc;
889 		}
890 		refcount_inc(&new_ioas->obj.users);
891 	}
892 
893 	if (cur_ioas) {
894 		if (access->ops->unmap) {
895 			mutex_unlock(&access->ioas_lock);
896 			access->ops->unmap(access->data, 0, ULONG_MAX);
897 			mutex_lock(&access->ioas_lock);
898 		}
899 		iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
900 		refcount_dec(&cur_ioas->obj.users);
901 	}
902 
903 	access->ioas = new_ioas;
904 	access->ioas_unpin = new_ioas;
905 
906 	return 0;
907 }
908 
iommufd_access_change_ioas_id(struct iommufd_access * access,u32 id)909 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
910 {
911 	struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
912 	int rc;
913 
914 	if (IS_ERR(ioas))
915 		return PTR_ERR(ioas);
916 	rc = iommufd_access_change_ioas(access, ioas);
917 	iommufd_put_object(access->ictx, &ioas->obj);
918 	return rc;
919 }
920 
iommufd_access_destroy_object(struct iommufd_object * obj)921 void iommufd_access_destroy_object(struct iommufd_object *obj)
922 {
923 	struct iommufd_access *access =
924 		container_of(obj, struct iommufd_access, obj);
925 
926 	mutex_lock(&access->ioas_lock);
927 	if (access->ioas)
928 		WARN_ON(iommufd_access_change_ioas(access, NULL));
929 	mutex_unlock(&access->ioas_lock);
930 	iommufd_ctx_put(access->ictx);
931 }
932 
933 /**
934  * iommufd_access_create - Create an iommufd_access
935  * @ictx: iommufd file descriptor
936  * @ops: Driver's ops to associate with the access
937  * @data: Opaque data to pass into ops functions
938  * @id: Output ID number to return to userspace for this access
939  *
940  * An iommufd_access allows a driver to read/write to the IOAS without using
941  * DMA. The underlying CPU memory can be accessed using the
942  * iommufd_access_pin_pages() or iommufd_access_rw() functions.
943  *
944  * The provided ops are required to use iommufd_access_pin_pages().
945  */
946 struct iommufd_access *
iommufd_access_create(struct iommufd_ctx * ictx,const struct iommufd_access_ops * ops,void * data,u32 * id)947 iommufd_access_create(struct iommufd_ctx *ictx,
948 		      const struct iommufd_access_ops *ops, void *data, u32 *id)
949 {
950 	struct iommufd_access *access;
951 
952 	/*
953 	 * There is no uAPI for the access object, but to keep things symmetric
954 	 * use the object infrastructure anyhow.
955 	 */
956 	access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
957 	if (IS_ERR(access))
958 		return access;
959 
960 	access->data = data;
961 	access->ops = ops;
962 
963 	if (ops->needs_pin_pages)
964 		access->iova_alignment = PAGE_SIZE;
965 	else
966 		access->iova_alignment = 1;
967 
968 	/* The calling driver is a user until iommufd_access_destroy() */
969 	refcount_inc(&access->obj.users);
970 	access->ictx = ictx;
971 	iommufd_ctx_get(ictx);
972 	iommufd_object_finalize(ictx, &access->obj);
973 	*id = access->obj.id;
974 	mutex_init(&access->ioas_lock);
975 	return access;
976 }
977 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
978 
979 /**
980  * iommufd_access_destroy - Destroy an iommufd_access
981  * @access: The access to destroy
982  *
983  * The caller must stop using the access before destroying it.
984  */
iommufd_access_destroy(struct iommufd_access * access)985 void iommufd_access_destroy(struct iommufd_access *access)
986 {
987 	iommufd_object_destroy_user(access->ictx, &access->obj);
988 }
989 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD");
990 
iommufd_access_detach(struct iommufd_access * access)991 void iommufd_access_detach(struct iommufd_access *access)
992 {
993 	mutex_lock(&access->ioas_lock);
994 	if (WARN_ON(!access->ioas)) {
995 		mutex_unlock(&access->ioas_lock);
996 		return;
997 	}
998 	WARN_ON(iommufd_access_change_ioas(access, NULL));
999 	mutex_unlock(&access->ioas_lock);
1000 }
1001 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD");
1002 
iommufd_access_attach(struct iommufd_access * access,u32 ioas_id)1003 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
1004 {
1005 	int rc;
1006 
1007 	mutex_lock(&access->ioas_lock);
1008 	if (WARN_ON(access->ioas)) {
1009 		mutex_unlock(&access->ioas_lock);
1010 		return -EINVAL;
1011 	}
1012 
1013 	rc = iommufd_access_change_ioas_id(access, ioas_id);
1014 	mutex_unlock(&access->ioas_lock);
1015 	return rc;
1016 }
1017 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
1018 
iommufd_access_replace(struct iommufd_access * access,u32 ioas_id)1019 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
1020 {
1021 	int rc;
1022 
1023 	mutex_lock(&access->ioas_lock);
1024 	if (!access->ioas) {
1025 		mutex_unlock(&access->ioas_lock);
1026 		return -ENOENT;
1027 	}
1028 	rc = iommufd_access_change_ioas_id(access, ioas_id);
1029 	mutex_unlock(&access->ioas_lock);
1030 	return rc;
1031 }
1032 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD");
1033 
1034 /**
1035  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
1036  * @iopt: iopt to work on
1037  * @iova: Starting iova in the iopt
1038  * @length: Number of bytes
1039  *
1040  * After this function returns there should be no users attached to the pages
1041  * linked to this iopt that intersect with iova,length. Anyone that has attached
1042  * a user through iopt_access_pages() needs to detach it through
1043  * iommufd_access_unpin_pages() before this function returns.
1044  *
1045  * iommufd_access_destroy() will wait for any outstanding unmap callback to
1046  * complete. Once iommufd_access_destroy() no unmap ops are running or will
1047  * run in the future. Due to this a driver must not create locking that prevents
1048  * unmap to complete while iommufd_access_destroy() is running.
1049  */
iommufd_access_notify_unmap(struct io_pagetable * iopt,unsigned long iova,unsigned long length)1050 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
1051 				 unsigned long length)
1052 {
1053 	struct iommufd_ioas *ioas =
1054 		container_of(iopt, struct iommufd_ioas, iopt);
1055 	struct iommufd_access *access;
1056 	unsigned long index;
1057 
1058 	xa_lock(&ioas->iopt.access_list);
1059 	xa_for_each(&ioas->iopt.access_list, index, access) {
1060 		if (!iommufd_lock_obj(&access->obj))
1061 			continue;
1062 		xa_unlock(&ioas->iopt.access_list);
1063 
1064 		access->ops->unmap(access->data, iova, length);
1065 
1066 		iommufd_put_object(access->ictx, &access->obj);
1067 		xa_lock(&ioas->iopt.access_list);
1068 	}
1069 	xa_unlock(&ioas->iopt.access_list);
1070 }
1071 
1072 /**
1073  * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
1074  * @access: IOAS access to act on
1075  * @iova: Starting IOVA
1076  * @length: Number of bytes to access
1077  *
1078  * Return the struct page's. The caller must stop accessing them before calling
1079  * this. The iova/length must exactly match the one provided to access_pages.
1080  */
iommufd_access_unpin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length)1081 void iommufd_access_unpin_pages(struct iommufd_access *access,
1082 				unsigned long iova, unsigned long length)
1083 {
1084 	struct iopt_area_contig_iter iter;
1085 	struct io_pagetable *iopt;
1086 	unsigned long last_iova;
1087 	struct iopt_area *area;
1088 
1089 	if (WARN_ON(!length) ||
1090 	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
1091 		return;
1092 
1093 	mutex_lock(&access->ioas_lock);
1094 	/*
1095 	 * The driver must be doing something wrong if it calls this before an
1096 	 * iommufd_access_attach() or after an iommufd_access_detach().
1097 	 */
1098 	if (WARN_ON(!access->ioas_unpin)) {
1099 		mutex_unlock(&access->ioas_lock);
1100 		return;
1101 	}
1102 	iopt = &access->ioas_unpin->iopt;
1103 
1104 	down_read(&iopt->iova_rwsem);
1105 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1106 		iopt_area_remove_access(
1107 			area, iopt_area_iova_to_index(area, iter.cur_iova),
1108 			iopt_area_iova_to_index(
1109 				area,
1110 				min(last_iova, iopt_area_last_iova(area))));
1111 	WARN_ON(!iopt_area_contig_done(&iter));
1112 	up_read(&iopt->iova_rwsem);
1113 	mutex_unlock(&access->ioas_lock);
1114 }
1115 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD");
1116 
iopt_area_contig_is_aligned(struct iopt_area_contig_iter * iter)1117 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
1118 {
1119 	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
1120 		return false;
1121 
1122 	if (!iopt_area_contig_done(iter) &&
1123 	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
1124 	     PAGE_SIZE) != (PAGE_SIZE - 1))
1125 		return false;
1126 	return true;
1127 }
1128 
check_area_prot(struct iopt_area * area,unsigned int flags)1129 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
1130 {
1131 	if (flags & IOMMUFD_ACCESS_RW_WRITE)
1132 		return area->iommu_prot & IOMMU_WRITE;
1133 	return area->iommu_prot & IOMMU_READ;
1134 }
1135 
1136 /**
1137  * iommufd_access_pin_pages() - Return a list of pages under the iova
1138  * @access: IOAS access to act on
1139  * @iova: Starting IOVA
1140  * @length: Number of bytes to access
1141  * @out_pages: Output page list
1142  * @flags: IOPMMUFD_ACCESS_RW_* flags
1143  *
1144  * Reads @length bytes starting at iova and returns the struct page * pointers.
1145  * These can be kmap'd by the caller for CPU access.
1146  *
1147  * The caller must perform iommufd_access_unpin_pages() when done to balance
1148  * this.
1149  *
1150  * This API always requires a page aligned iova. This happens naturally if the
1151  * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
1152  * smaller alignments have corner cases where this API can fail on otherwise
1153  * aligned iova.
1154  */
iommufd_access_pin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length,struct page ** out_pages,unsigned int flags)1155 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
1156 			     unsigned long length, struct page **out_pages,
1157 			     unsigned int flags)
1158 {
1159 	struct iopt_area_contig_iter iter;
1160 	struct io_pagetable *iopt;
1161 	unsigned long last_iova;
1162 	struct iopt_area *area;
1163 	int rc;
1164 
1165 	/* Driver's ops don't support pin_pages */
1166 	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
1167 	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
1168 		return -EINVAL;
1169 
1170 	if (!length)
1171 		return -EINVAL;
1172 	if (check_add_overflow(iova, length - 1, &last_iova))
1173 		return -EOVERFLOW;
1174 
1175 	mutex_lock(&access->ioas_lock);
1176 	if (!access->ioas) {
1177 		mutex_unlock(&access->ioas_lock);
1178 		return -ENOENT;
1179 	}
1180 	iopt = &access->ioas->iopt;
1181 
1182 	down_read(&iopt->iova_rwsem);
1183 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1184 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
1185 		unsigned long last_index = iopt_area_iova_to_index(area, last);
1186 		unsigned long index =
1187 			iopt_area_iova_to_index(area, iter.cur_iova);
1188 
1189 		if (area->prevent_access ||
1190 		    !iopt_area_contig_is_aligned(&iter)) {
1191 			rc = -EINVAL;
1192 			goto err_remove;
1193 		}
1194 
1195 		if (!check_area_prot(area, flags)) {
1196 			rc = -EPERM;
1197 			goto err_remove;
1198 		}
1199 
1200 		rc = iopt_area_add_access(area, index, last_index, out_pages,
1201 					  flags);
1202 		if (rc)
1203 			goto err_remove;
1204 		out_pages += last_index - index + 1;
1205 	}
1206 	if (!iopt_area_contig_done(&iter)) {
1207 		rc = -ENOENT;
1208 		goto err_remove;
1209 	}
1210 
1211 	up_read(&iopt->iova_rwsem);
1212 	mutex_unlock(&access->ioas_lock);
1213 	return 0;
1214 
1215 err_remove:
1216 	if (iova < iter.cur_iova) {
1217 		last_iova = iter.cur_iova - 1;
1218 		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1219 			iopt_area_remove_access(
1220 				area,
1221 				iopt_area_iova_to_index(area, iter.cur_iova),
1222 				iopt_area_iova_to_index(
1223 					area, min(last_iova,
1224 						  iopt_area_last_iova(area))));
1225 	}
1226 	up_read(&iopt->iova_rwsem);
1227 	mutex_unlock(&access->ioas_lock);
1228 	return rc;
1229 }
1230 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD");
1231 
1232 /**
1233  * iommufd_access_rw - Read or write data under the iova
1234  * @access: IOAS access to act on
1235  * @iova: Starting IOVA
1236  * @data: Kernel buffer to copy to/from
1237  * @length: Number of bytes to access
1238  * @flags: IOMMUFD_ACCESS_RW_* flags
1239  *
1240  * Copy kernel to/from data into the range given by IOVA/length. If flags
1241  * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
1242  * by changing it into copy_to/from_user().
1243  */
iommufd_access_rw(struct iommufd_access * access,unsigned long iova,void * data,size_t length,unsigned int flags)1244 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
1245 		      void *data, size_t length, unsigned int flags)
1246 {
1247 	struct iopt_area_contig_iter iter;
1248 	struct io_pagetable *iopt;
1249 	struct iopt_area *area;
1250 	unsigned long last_iova;
1251 	int rc = -EINVAL;
1252 
1253 	if (!length)
1254 		return -EINVAL;
1255 	if (check_add_overflow(iova, length - 1, &last_iova))
1256 		return -EOVERFLOW;
1257 
1258 	mutex_lock(&access->ioas_lock);
1259 	if (!access->ioas) {
1260 		mutex_unlock(&access->ioas_lock);
1261 		return -ENOENT;
1262 	}
1263 	iopt = &access->ioas->iopt;
1264 
1265 	down_read(&iopt->iova_rwsem);
1266 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1267 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
1268 		unsigned long bytes = (last - iter.cur_iova) + 1;
1269 
1270 		if (area->prevent_access) {
1271 			rc = -EINVAL;
1272 			goto err_out;
1273 		}
1274 
1275 		if (!check_area_prot(area, flags)) {
1276 			rc = -EPERM;
1277 			goto err_out;
1278 		}
1279 
1280 		rc = iopt_pages_rw_access(
1281 			area->pages, iopt_area_start_byte(area, iter.cur_iova),
1282 			data, bytes, flags);
1283 		if (rc)
1284 			goto err_out;
1285 		data += bytes;
1286 	}
1287 	if (!iopt_area_contig_done(&iter))
1288 		rc = -ENOENT;
1289 err_out:
1290 	up_read(&iopt->iova_rwsem);
1291 	mutex_unlock(&access->ioas_lock);
1292 	return rc;
1293 }
1294 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
1295 
iommufd_get_hw_info(struct iommufd_ucmd * ucmd)1296 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
1297 {
1298 	struct iommu_hw_info *cmd = ucmd->cmd;
1299 	void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
1300 	const struct iommu_ops *ops;
1301 	struct iommufd_device *idev;
1302 	unsigned int data_len;
1303 	unsigned int copy_len;
1304 	void *data;
1305 	int rc;
1306 
1307 	if (cmd->flags || cmd->__reserved)
1308 		return -EOPNOTSUPP;
1309 
1310 	idev = iommufd_get_device(ucmd, cmd->dev_id);
1311 	if (IS_ERR(idev))
1312 		return PTR_ERR(idev);
1313 
1314 	ops = dev_iommu_ops(idev->dev);
1315 	if (ops->hw_info) {
1316 		data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
1317 		if (IS_ERR(data)) {
1318 			rc = PTR_ERR(data);
1319 			goto out_put;
1320 		}
1321 
1322 		/*
1323 		 * drivers that have hw_info callback should have a unique
1324 		 * iommu_hw_info_type.
1325 		 */
1326 		if (WARN_ON_ONCE(cmd->out_data_type ==
1327 				 IOMMU_HW_INFO_TYPE_NONE)) {
1328 			rc = -ENODEV;
1329 			goto out_free;
1330 		}
1331 	} else {
1332 		cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
1333 		data_len = 0;
1334 		data = NULL;
1335 	}
1336 
1337 	copy_len = min(cmd->data_len, data_len);
1338 	if (copy_to_user(user_ptr, data, copy_len)) {
1339 		rc = -EFAULT;
1340 		goto out_free;
1341 	}
1342 
1343 	/*
1344 	 * Zero the trailing bytes if the user buffer is bigger than the
1345 	 * data size kernel actually has.
1346 	 */
1347 	if (copy_len < cmd->data_len) {
1348 		if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
1349 			rc = -EFAULT;
1350 			goto out_free;
1351 		}
1352 	}
1353 
1354 	/*
1355 	 * We return the length the kernel supports so userspace may know what
1356 	 * the kernel capability is. It could be larger than the input buffer.
1357 	 */
1358 	cmd->data_len = data_len;
1359 
1360 	cmd->out_capabilities = 0;
1361 	if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
1362 		cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
1363 
1364 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
1365 out_free:
1366 	kfree(data);
1367 out_put:
1368 	iommufd_put_object(ucmd->ictx, &idev->obj);
1369 	return rc;
1370 }
1371