Lines Matching +full:ctx +full:- +full:asid

1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2022 HabanaLabs, Ltd.
15 #include <linux/pci-p2pdma.h>
21 /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
31 struct asic_fixed_properties *prop = &hdev->asic_prop; in set_alloc_page_size()
38 if (prop->supports_user_set_page_size && args->alloc.page_size) { in set_alloc_page_size()
39 psize = args->alloc.page_size; in set_alloc_page_size()
42 dev_err(hdev->dev, "user page size (%#llx) is not power of 2\n", psize); in set_alloc_page_size()
43 return -EINVAL; in set_alloc_page_size()
46 psize = prop->device_mem_alloc_default_page_size; in set_alloc_page_size()
65 * two chunks - one to return as result and a remainder to stay in the list.
78 * alloc_device_memory() - allocate device memory.
79 * @ctx: pointer to the context structure.
84 * - Allocate the requested size rounded up to 'dram_page_size' pages.
85 * - Return unique handle for later map/unmap/free.
87 static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, in alloc_device_memory() argument
90 struct hl_device *hdev = ctx->hdev; in alloc_device_memory()
91 struct hl_vm *vm = &hdev->vm; in alloc_device_memory()
104 num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); in alloc_device_memory()
108 dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); in alloc_device_memory()
109 return -EINVAL; in alloc_device_memory()
112 contiguous = args->flags & HL_MEM_CONTIGUOUS; in alloc_device_memory()
116 paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, in alloc_device_memory()
119 paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); in alloc_device_memory()
121 dev_err(hdev->dev, in alloc_device_memory()
124 return -ENOMEM; in alloc_device_memory()
130 rc = -ENOMEM; in alloc_device_memory()
134 phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; in alloc_device_memory()
135 phys_pg_pack->asid = ctx->asid; in alloc_device_memory()
136 phys_pg_pack->npages = num_pgs; in alloc_device_memory()
137 phys_pg_pack->page_size = page_size; in alloc_device_memory()
138 phys_pg_pack->total_size = total_size; in alloc_device_memory()
139 phys_pg_pack->flags = args->flags; in alloc_device_memory()
140 phys_pg_pack->contiguous = contiguous; in alloc_device_memory()
142 phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); in alloc_device_memory()
143 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { in alloc_device_memory()
144 rc = -ENOMEM; in alloc_device_memory()
148 if (phys_pg_pack->contiguous) { in alloc_device_memory()
150 phys_pg_pack->pages[i] = paddr + i * page_size; in alloc_device_memory()
154 phys_pg_pack->pages[i] = in alloc_device_memory()
155 (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool, in alloc_device_memory()
159 phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, in alloc_device_memory()
162 if (!phys_pg_pack->pages[i]) { in alloc_device_memory()
163 dev_err(hdev->dev, in alloc_device_memory()
165 rc = -ENOMEM; in alloc_device_memory()
173 spin_lock(&vm->idr_lock); in alloc_device_memory()
174 handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, in alloc_device_memory()
176 spin_unlock(&vm->idr_lock); in alloc_device_memory()
179 dev_err(hdev->dev, "Failed to get handle for page\n"); in alloc_device_memory()
180 rc = -EFAULT; in alloc_device_memory()
185 kref_get(&vm->dram_pg_pool_refcount); in alloc_device_memory()
187 phys_pg_pack->handle = handle; in alloc_device_memory()
189 atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); in alloc_device_memory()
190 atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); in alloc_device_memory()
198 if (!phys_pg_pack->contiguous) in alloc_device_memory()
200 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], in alloc_device_memory()
203 kvfree(phys_pg_pack->pages); in alloc_device_memory()
208 gen_pool_free(vm->dram_pg_pool, paddr, total_size); in alloc_device_memory()
214 * dma_map_host_va() - DMA mapping of the given host virtual address.
221 * - Allocate userptr structure.
222 * - Pin the given host memory using the userptr structure.
223 * - Perform DMA mapping to have the DMA addresses of the pages.
233 rc = -ENOMEM; in dma_map_host_va()
241 userptr->dma_mapped = true; in dma_map_host_va()
242 userptr->dir = DMA_BIDIRECTIONAL; in dma_map_host_va()
243 userptr->vm_type = VM_TYPE_USERPTR; in dma_map_host_va()
247 rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); in dma_map_host_va()
249 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); in dma_map_host_va()
265 * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
270 * - Unpins the physical pages.
271 * - Frees the userptr structure.
281 * dram_pg_pool_do_release() - free DRAM pages pool
285 * - Frees the idr structure of physical pages handles.
286 * - Frees the generic pool of DRAM physical pages.
297 idr_destroy(&vm->phys_pg_pack_handles); in dram_pg_pool_do_release()
298 gen_pool_destroy(vm->dram_pg_pool); in dram_pg_pool_do_release()
302 * free_phys_pg_pack() - free physical page pack.
307 * - For DRAM memory only
308 * - iterate over the pack, free each physical block structure by
310 * - Free the hl_vm_phys_pg_pack structure.
315 struct hl_vm *vm = &hdev->vm; in free_phys_pg_pack()
318 if (phys_pg_pack->created_from_userptr) in free_phys_pg_pack()
321 if (phys_pg_pack->contiguous) { in free_phys_pg_pack()
322 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], in free_phys_pg_pack()
323 phys_pg_pack->total_size); in free_phys_pg_pack()
325 for (i = 0; i < phys_pg_pack->npages ; i++) in free_phys_pg_pack()
326 kref_put(&vm->dram_pg_pool_refcount, in free_phys_pg_pack()
329 for (i = 0 ; i < phys_pg_pack->npages ; i++) { in free_phys_pg_pack()
330 gen_pool_free(vm->dram_pg_pool, in free_phys_pg_pack()
331 phys_pg_pack->pages[i], in free_phys_pg_pack()
332 phys_pg_pack->page_size); in free_phys_pg_pack()
333 kref_put(&vm->dram_pg_pool_refcount, in free_phys_pg_pack()
339 kvfree(phys_pg_pack->pages); in free_phys_pg_pack()
346 * free_device_memory() - free device memory.
347 * @ctx: pointer to the context structure.
351 * - Free the device memory related to the given handle.
353 static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) in free_device_memory() argument
355 struct hl_device *hdev = ctx->hdev; in free_device_memory()
356 struct hl_vm *vm = &hdev->vm; in free_device_memory()
358 u32 handle = args->free.handle; in free_device_memory()
360 spin_lock(&vm->idr_lock); in free_device_memory()
361 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); in free_device_memory()
363 spin_unlock(&vm->idr_lock); in free_device_memory()
364 dev_err(hdev->dev, "free device memory failed, no match for handle %u\n", handle); in free_device_memory()
365 return -EINVAL; in free_device_memory()
368 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { in free_device_memory()
369 spin_unlock(&vm->idr_lock); in free_device_memory()
370 dev_err(hdev->dev, "handle %u is mapped, cannot free\n", handle); in free_device_memory()
371 return -EINVAL; in free_device_memory()
377 idr_remove(&vm->phys_pg_pack_handles, handle); in free_device_memory()
378 spin_unlock(&vm->idr_lock); in free_device_memory()
380 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); in free_device_memory()
381 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); in free_device_memory()
389 * clear_va_list_locked() - free virtual addresses list.
394 * - Iterate over the list and free each virtual addresses block.
404 list_del(&va_block->node); in clear_va_list_locked()
410 * print_va_list_locked() - print virtual addresses list.
415 * - Iterate over the list and print each virtual addresses block.
425 dev_dbg(hdev->dev, "print va list:\n"); in print_va_list_locked()
428 dev_dbg(hdev->dev, in print_va_list_locked()
430 va_block->start, va_block->end, va_block->size); in print_va_list_locked()
435 * merge_va_blocks_locked() - merge a virtual block if possible.
441 * - Merge the given blocks with the adjacent blocks if their virtual ranges
452 if (&prev->node != va_list && prev->end + 1 == va_block->start) { in merge_va_blocks_locked()
453 prev->end = va_block->end; in merge_va_blocks_locked()
454 prev->size = prev->end - prev->start + 1; in merge_va_blocks_locked()
455 list_del(&va_block->node); in merge_va_blocks_locked()
461 if (&next->node != va_list && va_block->end + 1 == next->start) { in merge_va_blocks_locked()
462 next->start = va_block->start; in merge_va_blocks_locked()
463 next->size = next->end - next->start + 1; in merge_va_blocks_locked()
464 list_del(&va_block->node); in merge_va_blocks_locked()
470 * add_va_block_locked() - add a virtual block to the virtual addresses list.
477 * - Add the given block to the virtual blocks list and merge with other blocks
486 u64 size = end - start + 1; in add_va_block_locked()
492 if (hl_mem_area_crosses_range(start, size, va_block->start, in add_va_block_locked()
493 va_block->end)) { in add_va_block_locked()
494 dev_err(hdev->dev, in add_va_block_locked()
496 va_block->start, va_block->end); in add_va_block_locked()
497 return -EINVAL; in add_va_block_locked()
500 if (va_block->end < start) in add_va_block_locked()
506 return -ENOMEM; in add_va_block_locked()
508 va_block->start = start; in add_va_block_locked()
509 va_block->end = end; in add_va_block_locked()
510 va_block->size = size; in add_va_block_locked()
513 list_add(&va_block->node, va_list); in add_va_block_locked()
515 list_add(&va_block->node, &res->node); in add_va_block_locked()
525 * add_va_block() - wrapper for add_va_block_locked.
532 * - Takes the list lock and calls add_va_block_locked.
539 mutex_lock(&va_range->lock); in add_va_block()
540 rc = add_va_block_locked(hdev, &va_range->list, start, end); in add_va_block()
541 mutex_unlock(&va_range->lock); in add_va_block()
547 * is_hint_crossing_range() - check if hint address crossing specified reserved.
560 prop->hints_dram_reserved_va_range.start_addr, in is_hint_crossing_range()
561 prop->hints_dram_reserved_va_range.end_addr); in is_hint_crossing_range()
565 prop->hints_host_reserved_va_range.start_addr, in is_hint_crossing_range()
566 prop->hints_host_reserved_va_range.end_addr); in is_hint_crossing_range()
570 prop->hints_host_hpage_reserved_va_range.start_addr, in is_hint_crossing_range()
571 prop->hints_host_hpage_reserved_va_range.end_addr); in is_hint_crossing_range()
577 * get_va_block() - get a virtual block for the given size and alignment.
588 * - Iterate on the virtual block list to find a suitable virtual block for the
590 * - Reserve the requested block and update the list.
591 * - Return the start address of the virtual block.
600 struct asic_fixed_properties *prop = &hdev->asic_prop; in get_va_block()
603 dram_hint_mask = prop->dram_hints_align_mask; in get_va_block()
605 bool is_align_pow_2 = is_power_of_2(va_range->page_size); in get_va_block()
611 align_mask = ~((u64)va_block_align - 1); in get_va_block()
614 * with non-power-of-2 range we work only with page granularity in get_va_block()
618 size = DIV_ROUND_UP_ULL(size, va_range->page_size) * in get_va_block()
619 va_range->page_size; in get_va_block()
624 if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) || in get_va_block()
626 do_div(tmp_hint_addr, va_range->page_size))) { in get_va_block()
630 dev_err(hdev->dev, in get_va_block()
631 "Hint address 0x%llx is not page aligned - cannot be respected\n", in get_va_block()
636 dev_dbg(hdev->dev, in get_va_block()
642 mutex_lock(&va_range->lock); in get_va_block()
644 print_va_list_locked(hdev, &va_range->list); in get_va_block()
646 list_for_each_entry(va_block, &va_range->list, node) { in get_va_block()
648 valid_start = va_block->start; in get_va_block()
650 if (is_align_pow_2 && (valid_start & (va_block_align - 1))) { in get_va_block()
653 if (valid_start > va_block->end) in get_va_block()
657 valid_size = va_block->end - valid_start + 1; in get_va_block()
666 if (prop->hints_range_reservation && !hint_addr) in get_va_block()
679 (hint_addr + size) <= va_block->end) { in get_va_block()
688 dev_err(hdev->dev, "no available va block for size %llu\n", in get_va_block()
694 /* Hint address must be respected. If we are here - this means in get_va_block()
697 dev_err(hdev->dev, in get_va_block()
708 if (reserved_valid_start > new_va_block->start) { in get_va_block()
709 prev_start = new_va_block->start; in get_va_block()
710 prev_end = reserved_valid_start - 1; in get_va_block()
712 new_va_block->start = reserved_valid_start; in get_va_block()
713 new_va_block->size = reserved_valid_size; in get_va_block()
718 if (new_va_block->size > size) { in get_va_block()
719 new_va_block->start += size; in get_va_block()
720 new_va_block->size = new_va_block->end - new_va_block->start + 1; in get_va_block()
722 list_del(&new_va_block->node); in get_va_block()
727 rc = add_va_block_locked(hdev, &va_range->list, prev_start, prev_end); in get_va_block()
734 print_va_list_locked(hdev, &va_range->list); in get_va_block()
736 mutex_unlock(&va_range->lock); in get_va_block()
742 * hl_reserve_va_block() - reserve a virtual block of a given size.
744 * @ctx: current context
751 * - Iterate on the virtual block list to find a suitable virtual block for the
753 * - Reserve the requested block and update the list.
754 * - Return the start address of the virtual block.
756 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, in hl_reserve_va_block() argument
759 return get_va_block(hdev, ctx->va_range[type], size, 0, in hl_reserve_va_block()
760 max(alignment, ctx->va_range[type]->page_size), in hl_reserve_va_block()
765 * hl_get_va_range_type() - get va_range type for the given address and size.
766 * @ctx: context to fetch va_range from.
773 static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, in hl_get_va_range_type() argument
780 ctx->va_range[i]->start_addr, in hl_get_va_range_type()
781 ctx->va_range[i]->end_addr)) { in hl_get_va_range_type()
787 return -EINVAL; in hl_get_va_range_type()
791 * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
793 * @ctx: pointer to the context structure.
798 * - Takes the list lock and calls add_va_block_locked.
800 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, in hl_unreserve_va_block() argument
806 rc = hl_get_va_range_type(ctx, start_addr, size, &type); in hl_unreserve_va_block()
808 dev_err(hdev->dev, in hl_unreserve_va_block()
814 rc = add_va_block(hdev, ctx->va_range[type], start_addr, in hl_unreserve_va_block()
815 start_addr + size - 1); in hl_unreserve_va_block()
817 dev_warn(hdev->dev, in hl_unreserve_va_block()
824 * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
826 * @ctx: pointer to the context structure.
835 * - Create a physical page pack from the physical pages related to the given
838 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, in init_phys_pg_pack_from_userptr() argument
844 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; in init_phys_pg_pack_from_userptr()
855 return -ENOMEM; in init_phys_pg_pack_from_userptr()
857 phys_pg_pack->vm_type = userptr->vm_type; in init_phys_pg_pack_from_userptr()
858 phys_pg_pack->created_from_userptr = true; in init_phys_pg_pack_from_userptr()
859 phys_pg_pack->asid = ctx->asid; in init_phys_pg_pack_from_userptr()
860 atomic_set(&phys_pg_pack->mapping_cnt, 1); in init_phys_pg_pack_from_userptr()
871 for_each_sgtable_dma_sg(userptr->sgt, sg, i) { in init_phys_pg_pack_from_userptr()
877 (dma_addr & (huge_page_size - 1))) in init_phys_pg_pack_from_userptr()
886 page_mask = ~(((u64) page_size) - 1); in init_phys_pg_pack_from_userptr()
888 phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), in init_phys_pg_pack_from_userptr()
890 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { in init_phys_pg_pack_from_userptr()
891 rc = -ENOMEM; in init_phys_pg_pack_from_userptr()
895 phys_pg_pack->npages = total_npages; in init_phys_pg_pack_from_userptr()
896 phys_pg_pack->page_size = page_size; in init_phys_pg_pack_from_userptr()
897 phys_pg_pack->total_size = total_npages * page_size; in init_phys_pg_pack_from_userptr()
900 for_each_sgtable_dma_sg(userptr->sgt, sg, i) { in init_phys_pg_pack_from_userptr()
906 phys_pg_pack->offset = dma_addr & (page_size - 1); in init_phys_pg_pack_from_userptr()
911 phys_pg_pack->pages[j++] = dma_addr; in init_phys_pg_pack_from_userptr()
915 npages -= pgs_in_huge_page; in init_phys_pg_pack_from_userptr()
917 npages--; in init_phys_pg_pack_from_userptr()
932 * map_phys_pg_pack() - maps the physical page pack..
933 * @ctx: pointer to the context structure.
938 * - Maps each chunk of virtual memory to matching physical chunk.
939 * - Stores number of successful mappings in the given argument.
940 * - Returns 0 on success, error code otherwise.
942 static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, in map_phys_pg_pack() argument
945 struct hl_device *hdev = ctx->hdev; in map_phys_pg_pack()
947 u32 page_size = phys_pg_pack->page_size; in map_phys_pg_pack()
951 for (i = 0 ; i < phys_pg_pack->npages ; i++) { in map_phys_pg_pack()
952 paddr = phys_pg_pack->pages[i]; in map_phys_pg_pack()
954 rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size, in map_phys_pg_pack()
955 (i + 1) == phys_pg_pack->npages); in map_phys_pg_pack()
957 dev_err(hdev->dev, in map_phys_pg_pack()
959 rc, phys_pg_pack->handle, phys_pg_pack->npages, in map_phys_pg_pack()
975 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, in map_phys_pg_pack()
977 dev_warn_ratelimited(hdev->dev, in map_phys_pg_pack()
979 phys_pg_pack->handle, next_vaddr, in map_phys_pg_pack()
980 phys_pg_pack->pages[i], page_size); in map_phys_pg_pack()
992 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) in map_phys_pg_pack()
1000 * unmap_phys_pg_pack() - unmaps the physical page pack.
1001 * @ctx: pointer to the context structure.
1005 static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, in unmap_phys_pg_pack() argument
1008 struct hl_device *hdev = ctx->hdev; in unmap_phys_pg_pack()
1014 page_size = phys_pg_pack->page_size; in unmap_phys_pg_pack()
1017 for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { in unmap_phys_pg_pack()
1018 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, in unmap_phys_pg_pack()
1019 (i + 1) == phys_pg_pack->npages)) in unmap_phys_pg_pack()
1020 dev_warn_ratelimited(hdev->dev, in unmap_phys_pg_pack()
1031 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) in unmap_phys_pg_pack()
1037 * map_device_va() - map the given memory.
1038 * @ctx: pointer to the context structure.
1043 * - If given a physical device memory handle, map to a device virtual block
1045 * - If given a host virtual address and size, find the related physical pages,
1049 static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) in map_device_va() argument
1053 struct hl_device *hdev = ctx->hdev; in map_device_va()
1057 struct hl_vm *vm = &hdev->vm; in map_device_va()
1065 is_userptr = args->flags & HL_MEM_USERPTR; in map_device_va()
1066 do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); in map_device_va()
1072 u64 addr = args->map_host.host_virt_addr, in map_device_va()
1073 size = args->map_host.mem_size; in map_device_va()
1074 u32 page_size = hdev->asic_prop.pmmu.page_size, in map_device_va()
1075 huge_page_size = hdev->asic_prop.pmmu_huge.page_size; in map_device_va()
1081 rc = init_phys_pg_pack_from_userptr(ctx, userptr, in map_device_va()
1084 dev_err(hdev->dev, in map_device_va()
1091 hint_addr = args->map_host.hint_addr; in map_device_va()
1092 handle = phys_pg_pack->handle; in map_device_va()
1095 if (phys_pg_pack->page_size == page_size) { in map_device_va()
1096 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; in map_device_va()
1102 if (addr & (huge_page_size - 1)) in map_device_va()
1111 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; in map_device_va()
1116 handle = lower_32_bits(args->map_device.handle); in map_device_va()
1118 spin_lock(&vm->idr_lock); in map_device_va()
1119 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); in map_device_va()
1121 spin_unlock(&vm->idr_lock); in map_device_va()
1122 dev_err(hdev->dev, in map_device_va()
1124 return -EINVAL; in map_device_va()
1128 atomic_inc(&phys_pg_pack->mapping_cnt); in map_device_va()
1130 spin_unlock(&vm->idr_lock); in map_device_va()
1134 hint_addr = args->map_device.hint_addr; in map_device_va()
1137 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; in map_device_va()
1139 va_block_align = hdev->asic_prop.dmmu.page_size; in map_device_va()
1146 if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && in map_device_va()
1147 phys_pg_pack->asid != ctx->asid) { in map_device_va()
1148 dev_err(hdev->dev, in map_device_va()
1151 rc = -EPERM; in map_device_va()
1157 rc = -ENOMEM; in map_device_va()
1161 if (hint_addr && phys_pg_pack->offset) { in map_device_va()
1162 if (args->flags & HL_MEM_FORCE_HINT) { in map_device_va()
1164 dev_err(hdev->dev, in map_device_va()
1166 hint_addr, phys_pg_pack->offset); in map_device_va()
1167 rc = -EINVAL; in map_device_va()
1170 dev_dbg(hdev->dev, in map_device_va()
1172 hint_addr, phys_pg_pack->offset); in map_device_va()
1175 ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, in map_device_va()
1177 va_range_type, args->flags); in map_device_va()
1179 dev_err(hdev->dev, "no available va block for handle %u\n", in map_device_va()
1181 rc = -ENOMEM; in map_device_va()
1185 mutex_lock(&hdev->mmu_lock); in map_device_va()
1187 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); in map_device_va()
1189 dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n", in map_device_va()
1191 mutex_unlock(&hdev->mmu_lock); in map_device_va()
1196 ctx->asid, ret_vaddr, phys_pg_pack->total_size); in map_device_va()
1197 mutex_unlock(&hdev->mmu_lock); in map_device_va()
1206 rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr, in map_device_va()
1207 phys_pg_pack->total_size); in map_device_va()
1212 ret_vaddr += phys_pg_pack->offset; in map_device_va()
1214 hnode->ptr = vm_type; in map_device_va()
1215 hnode->vaddr = ret_vaddr; in map_device_va()
1216 hnode->handle = is_userptr ? MEM_HANDLE_INVALID : handle; in map_device_va()
1218 mutex_lock(&ctx->mem_hash_lock); in map_device_va()
1219 hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); in map_device_va()
1220 mutex_unlock(&ctx->mem_hash_lock); in map_device_va()
1231 ret_vaddr + phys_pg_pack->total_size - 1)) in map_device_va()
1232 dev_warn(hdev->dev, in map_device_va()
1240 atomic_dec(&phys_pg_pack->mapping_cnt); in map_device_va()
1251 static struct hl_vm_hash_node *get_vm_hash_node_locked(struct hl_ctx *ctx, u64 vaddr) in get_vm_hash_node_locked() argument
1255 hash_for_each_possible(ctx->mem_hash, hnode, node, vaddr) in get_vm_hash_node_locked()
1256 if (vaddr == hnode->vaddr) in get_vm_hash_node_locked()
1263 * unmap_device_va() - unmap the given device virtual address.
1264 * @ctx: pointer to the context structure.
1269 * - unmap the physical pages related to the given virtual address.
1270 * - return the device virtual block to the virtual block list.
1272 static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, in unmap_device_va() argument
1276 u64 vaddr = args->unmap.device_virt_addr; in unmap_device_va()
1278 struct hl_device *hdev = ctx->hdev; in unmap_device_va()
1286 prop = &hdev->asic_prop; in unmap_device_va()
1289 mutex_lock(&ctx->mem_hash_lock); in unmap_device_va()
1290 hnode = get_vm_hash_node_locked(ctx, vaddr); in unmap_device_va()
1292 mutex_unlock(&ctx->mem_hash_lock); in unmap_device_va()
1293 dev_err(hdev->dev, "unmap failed, no mem hnode for vaddr 0x%llx\n", vaddr); in unmap_device_va()
1294 return -EINVAL; in unmap_device_va()
1297 if (hnode->export_cnt) { in unmap_device_va()
1298 mutex_unlock(&ctx->mem_hash_lock); in unmap_device_va()
1299 dev_err(hdev->dev, "failed to unmap %#llx, memory is exported\n", vaddr); in unmap_device_va()
1300 return -EINVAL; in unmap_device_va()
1303 hash_del(&hnode->node); in unmap_device_va()
1304 mutex_unlock(&ctx->mem_hash_lock); in unmap_device_va()
1306 vm_type = hnode->ptr; in unmap_device_va()
1310 userptr = hnode->ptr; in unmap_device_va()
1312 rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack, in unmap_device_va()
1315 dev_err(hdev->dev, in unmap_device_va()
1321 if (phys_pg_pack->page_size == in unmap_device_va()
1322 hdev->asic_prop.pmmu.page_size) in unmap_device_va()
1323 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; in unmap_device_va()
1325 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; in unmap_device_va()
1328 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; in unmap_device_va()
1329 phys_pg_pack = hnode->ptr; in unmap_device_va()
1331 dev_warn(hdev->dev, in unmap_device_va()
1334 rc = -EFAULT; in unmap_device_va()
1338 if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { in unmap_device_va()
1339 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); in unmap_device_va()
1340 rc = -EINVAL; in unmap_device_va()
1344 if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size)) in unmap_device_va()
1345 vaddr = prop->dram_base_address + in unmap_device_va()
1346 DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address, in unmap_device_va()
1347 phys_pg_pack->page_size) * in unmap_device_va()
1348 phys_pg_pack->page_size; in unmap_device_va()
1350 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); in unmap_device_va()
1352 mutex_lock(&hdev->mmu_lock); in unmap_device_va()
1354 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); in unmap_device_va()
1362 rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, in unmap_device_va()
1363 phys_pg_pack->total_size); in unmap_device_va()
1365 mutex_unlock(&hdev->mmu_lock); in unmap_device_va()
1377 vaddr + phys_pg_pack->total_size - 1); in unmap_device_va()
1379 dev_warn(hdev->dev, in unmap_device_va()
1387 atomic_dec(&phys_pg_pack->mapping_cnt); in unmap_device_va()
1401 mutex_lock(&ctx->mem_hash_lock); in unmap_device_va()
1402 hash_add(ctx->mem_hash, &hnode->node, vaddr); in unmap_device_va()
1403 mutex_unlock(&ctx->mem_hash_lock); in unmap_device_va()
1417 rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); in map_block()
1430 (struct hl_vm_hw_block_list_node *) vma->vm_private_data; in hw_block_vm_close()
1431 struct hl_ctx *ctx = lnode->ctx; in hw_block_vm_close() local
1434 new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start); in hw_block_vm_close()
1436 lnode->mapped_size = new_mmap_size; in hw_block_vm_close()
1440 mutex_lock(&ctx->hw_block_list_lock); in hw_block_vm_close()
1441 list_del(&lnode->node); in hw_block_vm_close()
1442 mutex_unlock(&ctx->hw_block_list_lock); in hw_block_vm_close()
1443 hl_ctx_put(ctx); in hw_block_vm_close()
1445 vma->vm_private_data = NULL; in hw_block_vm_close()
1453 * hl_hw_block_mmap() - mmap a hw block to user.
1463 struct hl_device *hdev = hpriv->hdev; in hl_hw_block_mmap()
1464 struct hl_ctx *ctx = hpriv->ctx; in hl_hw_block_mmap() local
1471 block_id = vma->vm_pgoff; in hl_hw_block_mmap()
1472 vma->vm_pgoff = 0; in hl_hw_block_mmap()
1475 block_size = vma->vm_end - vma->vm_start; in hl_hw_block_mmap()
1477 if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) { in hl_hw_block_mmap()
1478 dev_err(hdev->dev, in hl_hw_block_mmap()
1479 "user pointer is invalid - 0x%lx\n", in hl_hw_block_mmap()
1480 vma->vm_start); in hl_hw_block_mmap()
1482 return -EINVAL; in hl_hw_block_mmap()
1487 return -ENOMEM; in hl_hw_block_mmap()
1489 rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); in hl_hw_block_mmap()
1495 hl_ctx_get(ctx); in hl_hw_block_mmap()
1497 lnode->ctx = ctx; in hl_hw_block_mmap()
1498 lnode->vaddr = vma->vm_start; in hl_hw_block_mmap()
1499 lnode->block_size = block_size; in hl_hw_block_mmap()
1500 lnode->mapped_size = lnode->block_size; in hl_hw_block_mmap()
1501 lnode->id = block_id; in hl_hw_block_mmap()
1503 vma->vm_private_data = lnode; in hl_hw_block_mmap()
1504 vma->vm_ops = &hw_block_vm_ops; in hl_hw_block_mmap()
1506 mutex_lock(&ctx->hw_block_list_lock); in hl_hw_block_mmap()
1507 list_add_tail(&lnode->node, &ctx->hw_block_mem_list); in hl_hw_block_mmap()
1508 mutex_unlock(&ctx->hw_block_list_lock); in hl_hw_block_mmap()
1510 vma->vm_pgoff = block_id; in hl_hw_block_mmap()
1540 struct asic_fixed_properties *prop = &hdev->asic_prop; in alloc_sgt_from_device_pages()
1550 dev_err_ratelimited(hdev->dev, in alloc_sgt_from_device_pages()
1553 return ERR_PTR(-EINVAL); in alloc_sgt_from_device_pages()
1558 return ERR_PTR(-ENOMEM); in alloc_sgt_from_device_pages()
1566 offset -= page_size; in alloc_sgt_from_device_pages()
1573 left_size_in_page = page_size - offset; in alloc_sgt_from_device_pages()
1579 left_size_to_export -= size; in alloc_sgt_from_device_pages()
1580 left_size_in_page -= size; in alloc_sgt_from_device_pages()
1581 left_size_in_dma_seg -= size; in alloc_sgt_from_device_pages()
1614 left_size_in_page = page_size - offset; in alloc_sgt_from_device_pages()
1619 bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address); in alloc_sgt_from_device_pages()
1625 left_size_to_export -= size; in alloc_sgt_from_device_pages()
1626 left_size_in_page -= size; in alloc_sgt_from_device_pages()
1627 left_size_in_dma_seg -= size; in alloc_sgt_from_device_pages()
1668 dev_err(hdev->dev, in alloc_sgt_from_device_pages()
1670 left_size_to_export, sgt->nents); in alloc_sgt_from_device_pages()
1671 rc = -ENOMEM; in alloc_sgt_from_device_pages()
1680 sgt->orig_nents = 0; in alloc_sgt_from_device_pages()
1682 dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n", in alloc_sgt_from_device_pages()
1685 dev_dbg(hdev->dev, in alloc_sgt_from_device_pages()
1714 hl_dmabuf = dmabuf->priv; in hl_dmabuf_attach()
1715 hdev = hl_dmabuf->ctx->hdev; in hl_dmabuf_attach()
1717 rc = pci_p2pdma_distance(hdev->pdev, attachment->dev, true); in hl_dmabuf_attach()
1720 attachment->peer2peer = false; in hl_dmabuf_attach()
1728 struct dma_buf *dma_buf = attachment->dmabuf; in hl_map_dmabuf()
1734 hl_dmabuf = dma_buf->priv; in hl_map_dmabuf()
1735 hdev = hl_dmabuf->ctx->hdev; in hl_map_dmabuf()
1737 if (!attachment->peer2peer) { in hl_map_dmabuf()
1738 dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); in hl_map_dmabuf()
1739 return ERR_PTR(-EPERM); in hl_map_dmabuf()
1742 exported_size = hl_dmabuf->dmabuf->size; in hl_map_dmabuf()
1743 offset = hl_dmabuf->offset; in hl_map_dmabuf()
1744 phys_pg_pack = hl_dmabuf->phys_pg_pack; in hl_map_dmabuf()
1747 pages = phys_pg_pack->pages; in hl_map_dmabuf()
1748 npages = phys_pg_pack->npages; in hl_map_dmabuf()
1749 page_size = phys_pg_pack->page_size; in hl_map_dmabuf()
1751 pages = &hl_dmabuf->device_phys_addr; in hl_map_dmabuf()
1753 page_size = hl_dmabuf->dmabuf->size; in hl_map_dmabuf()
1757 attachment->dev, dir); in hl_map_dmabuf()
1759 dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); in hl_map_dmabuf()
1771 /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives in hl_unmap_dmabuf()
1779 dma_unmap_resource(attachment->dev, sg_dma_address(sg), in hl_unmap_dmabuf()
1784 sgt->orig_nents = sgt->nents; in hl_unmap_dmabuf()
1789 static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr) in memhash_node_export_get() argument
1791 struct hl_device *hdev = ctx->hdev; in memhash_node_export_get()
1795 mutex_lock(&ctx->mem_hash_lock); in memhash_node_export_get()
1796 hnode = get_vm_hash_node_locked(ctx, addr); in memhash_node_export_get()
1798 mutex_unlock(&ctx->mem_hash_lock); in memhash_node_export_get()
1799 dev_dbg(hdev->dev, "map address %#llx not found\n", addr); in memhash_node_export_get()
1800 return ERR_PTR(-EINVAL); in memhash_node_export_get()
1803 if (upper_32_bits(hnode->handle)) { in memhash_node_export_get()
1804 mutex_unlock(&ctx->mem_hash_lock); in memhash_node_export_get()
1805 dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n", in memhash_node_export_get()
1806 hnode->handle, addr); in memhash_node_export_get()
1807 return ERR_PTR(-EINVAL); in memhash_node_export_get()
1814 hnode->export_cnt++; in memhash_node_export_get()
1815 mutex_unlock(&ctx->mem_hash_lock); in memhash_node_export_get()
1820 static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode) in memhash_node_export_put() argument
1822 mutex_lock(&ctx->mem_hash_lock); in memhash_node_export_put()
1823 hnode->export_cnt--; in memhash_node_export_put()
1824 mutex_unlock(&ctx->mem_hash_lock); in memhash_node_export_put()
1829 struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; in hl_release_dmabuf()
1830 struct hl_ctx *ctx; in hl_release_dmabuf() local
1835 ctx = hl_dmabuf->ctx; in hl_release_dmabuf()
1837 if (hl_dmabuf->memhash_hnode) in hl_release_dmabuf()
1838 memhash_node_export_put(ctx, hl_dmabuf->memhash_hnode); in hl_release_dmabuf()
1840 atomic_dec(&ctx->hdev->dmabuf_export_cnt); in hl_release_dmabuf()
1841 hl_ctx_put(ctx); in hl_release_dmabuf()
1844 fput(ctx->hpriv->file_priv->filp); in hl_release_dmabuf()
1856 static int export_dmabuf(struct hl_ctx *ctx, in export_dmabuf() argument
1861 struct hl_device *hdev = ctx->hdev; in export_dmabuf()
1869 hl_dmabuf->dmabuf = dma_buf_export(&exp_info); in export_dmabuf()
1870 if (IS_ERR(hl_dmabuf->dmabuf)) { in export_dmabuf()
1871 dev_err(hdev->dev, "failed to export dma-buf\n"); in export_dmabuf()
1872 return PTR_ERR(hl_dmabuf->dmabuf); in export_dmabuf()
1875 fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); in export_dmabuf()
1877 dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); in export_dmabuf()
1882 hl_dmabuf->ctx = ctx; in export_dmabuf()
1883 hl_ctx_get(hl_dmabuf->ctx); in export_dmabuf()
1884 atomic_inc(&ctx->hdev->dmabuf_export_cnt); in export_dmabuf()
1886 /* Get compute device file to enforce release order, such that all exported dma-buf will be in export_dmabuf()
1890 get_file(ctx->hpriv->file_priv->filp); in export_dmabuf()
1897 hl_dmabuf->dmabuf->priv = NULL; in export_dmabuf()
1898 dma_buf_put(hl_dmabuf->dmabuf); in export_dmabuf()
1905 dev_dbg(hdev->dev, in validate_export_params_common()
1908 return -EINVAL; in validate_export_params_common()
1912 dev_dbg(hdev->dev, in validate_export_params_common()
1915 return -EINVAL; in validate_export_params_common()
1919 dev_dbg(hdev->dev, in validate_export_params_common()
1922 return -EINVAL; in validate_export_params_common()
1930 struct asic_fixed_properties *prop = &hdev->asic_prop; in validate_export_params_no_mmu()
1938 if (device_addr < prop->dram_user_base_address || in validate_export_params_no_mmu()
1939 (device_addr + size) > prop->dram_end_address || in validate_export_params_no_mmu()
1941 dev_dbg(hdev->dev, in validate_export_params_no_mmu()
1944 return -EINVAL; in validate_export_params_no_mmu()
1947 bar_address = hdev->dram_pci_bar_start + (device_addr - prop->dram_base_address); in validate_export_params_no_mmu()
1949 if ((bar_address + size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || in validate_export_params_no_mmu()
1951 dev_dbg(hdev->dev, in validate_export_params_no_mmu()
1954 return -EINVAL; in validate_export_params_no_mmu()
1963 struct asic_fixed_properties *prop = &hdev->asic_prop; in validate_export_params()
1971 if ((offset + size) > phys_pg_pack->total_size) { in validate_export_params()
1972 dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n", in validate_export_params()
1973 offset, size, phys_pg_pack->total_size); in validate_export_params()
1974 return -EINVAL; in validate_export_params()
1977 for (i = 0 ; i < phys_pg_pack->npages ; i++) { in validate_export_params()
1978 bar_address = hdev->dram_pci_bar_start + in validate_export_params()
1979 (phys_pg_pack->pages[i] - prop->dram_base_address); in validate_export_params()
1981 if ((bar_address + phys_pg_pack->page_size) > in validate_export_params()
1982 (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || in validate_export_params()
1983 (bar_address + phys_pg_pack->page_size) < bar_address) { in validate_export_params()
1984 dev_dbg(hdev->dev, in validate_export_params()
1986 phys_pg_pack->pages[i], phys_pg_pack->page_size); in validate_export_params()
1987 return -EINVAL; in validate_export_params()
1998 struct hl_vm *vm = &hdev->vm; in get_phys_pg_pack_from_hash_node()
2000 spin_lock(&vm->idr_lock); in get_phys_pg_pack_from_hash_node()
2001 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) hnode->handle); in get_phys_pg_pack_from_hash_node()
2003 spin_unlock(&vm->idr_lock); in get_phys_pg_pack_from_hash_node()
2004 dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) hnode->handle); in get_phys_pg_pack_from_hash_node()
2005 return ERR_PTR(-EINVAL); in get_phys_pg_pack_from_hash_node()
2008 spin_unlock(&vm->idr_lock); in get_phys_pg_pack_from_hash_node()
2010 if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) { in get_phys_pg_pack_from_hash_node()
2011 dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", hnode->handle); in get_phys_pg_pack_from_hash_node()
2012 return ERR_PTR(-EINVAL); in get_phys_pg_pack_from_hash_node()
2019 * export_dmabuf_from_addr() - export a dma-buf object for the given memory
2021 * @ctx: pointer to the context structure.
2025 * @flags: DMA-BUF file/FD flags.
2026 * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
2028 * Create and export a dma-buf object for an existing memory allocation inside
2029 * the device memory, and return a FD which is associated with the dma-buf
2032 * Return: 0 on success, non-zero for failure.
2034 static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 offset, in export_dmabuf_from_addr() argument
2044 hdev = ctx->hdev; in export_dmabuf_from_addr()
2045 prop = &hdev->asic_prop; in export_dmabuf_from_addr()
2048 if (!prop->dram_supports_virtual_memory && offset) { in export_dmabuf_from_addr()
2049 dev_dbg(hdev->dev, "offset is not allowed in device without virtual memory\n"); in export_dmabuf_from_addr()
2050 return -EINVAL; in export_dmabuf_from_addr()
2055 return -ENOMEM; in export_dmabuf_from_addr()
2057 if (prop->dram_supports_virtual_memory) { in export_dmabuf_from_addr()
2058 hnode = memhash_node_export_get(ctx, addr); in export_dmabuf_from_addr()
2072 hl_dmabuf->phys_pg_pack = phys_pg_pack; in export_dmabuf_from_addr()
2073 hl_dmabuf->memhash_hnode = hnode; in export_dmabuf_from_addr()
2074 hl_dmabuf->offset = offset; in export_dmabuf_from_addr()
2080 hl_dmabuf->device_phys_addr = addr; in export_dmabuf_from_addr()
2083 rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd); in export_dmabuf_from_addr()
2090 if (prop->dram_supports_virtual_memory) in export_dmabuf_from_addr()
2091 memhash_node_export_put(ctx, hnode); in export_dmabuf_from_addr()
2099 struct hl_ts_buff *ts_buff = buf->private; in ts_buff_release()
2101 vfree(ts_buff->kernel_buff_address); in ts_buff_release()
2102 vfree(ts_buff->user_buff_address); in ts_buff_release()
2108 struct hl_ts_buff *ts_buff = buf->private; in hl_ts_mmap()
2111 return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0); in hl_ts_mmap()
2125 return -ENOMEM; in hl_ts_alloc_buf()
2133 ts_buff->user_buff_address = p; in hl_ts_alloc_buf()
2134 buf->mappable_size = size; in hl_ts_alloc_buf()
2142 ts_buff->kernel_buff_address = p; in hl_ts_alloc_buf()
2143 ts_buff->kernel_buff_size = size; in hl_ts_alloc_buf()
2145 buf->private = ts_buff; in hl_ts_alloc_buf()
2150 vfree(ts_buff->user_buff_address); in hl_ts_alloc_buf()
2153 return -ENOMEM; in hl_ts_alloc_buf()
2165 * allocate_timestamps_buffers() - allocate timestamps buffers
2169 * since we cannot fail during registration for out-of-memory situation, so
2180 struct hl_mem_mgr *mmg = &hpriv->mem_mgr; in allocate_timestamps_buffers()
2183 if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { in allocate_timestamps_buffers()
2184 dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", in allocate_timestamps_buffers()
2185 args->num_of_elements, TS_MAX_ELEMENTS_NUM); in allocate_timestamps_buffers()
2186 return -EINVAL; in allocate_timestamps_buffers()
2189 buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements); in allocate_timestamps_buffers()
2191 return -ENOMEM; in allocate_timestamps_buffers()
2193 *handle = buf->handle; in allocate_timestamps_buffers()
2200 struct hl_fpriv *hpriv = file_priv->driver_priv; in hl_mem_ioctl()
2203 struct hl_device *hdev = hpriv->hdev; in hl_mem_ioctl()
2204 struct hl_ctx *ctx = hpriv->ctx; in hl_mem_ioctl() local
2207 int rc, dmabuf_fd = -EBADF; in hl_mem_ioctl()
2210 dev_dbg_ratelimited(hdev->dev, in hl_mem_ioctl()
2212 hdev->status[status]); in hl_mem_ioctl()
2213 return -EBUSY; in hl_mem_ioctl()
2216 switch (args->in.op) { in hl_mem_ioctl()
2218 if (args->in.alloc.mem_size == 0) { in hl_mem_ioctl()
2219 dev_err(hdev->dev, in hl_mem_ioctl()
2221 rc = -EINVAL; in hl_mem_ioctl()
2233 if (!hdev->asic_prop.dram_supports_virtual_memory) { in hl_mem_ioctl()
2234 atomic64_add(args->in.alloc.mem_size, in hl_mem_ioctl()
2235 &ctx->dram_phys_mem); in hl_mem_ioctl()
2236 atomic64_add(args->in.alloc.mem_size, in hl_mem_ioctl()
2237 &hdev->dram_used_mem); in hl_mem_ioctl()
2239 dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); in hl_mem_ioctl()
2243 args->out.handle = 0; in hl_mem_ioctl()
2247 rc = alloc_device_memory(ctx, &args->in, &handle); in hl_mem_ioctl()
2250 args->out.handle = (__u64) handle; in hl_mem_ioctl()
2262 if (!hdev->asic_prop.dram_supports_virtual_memory) { in hl_mem_ioctl()
2263 atomic64_sub(args->in.alloc.mem_size, in hl_mem_ioctl()
2264 &ctx->dram_phys_mem); in hl_mem_ioctl()
2265 atomic64_sub(args->in.alloc.mem_size, in hl_mem_ioctl()
2266 &hdev->dram_used_mem); in hl_mem_ioctl()
2268 dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); in hl_mem_ioctl()
2274 rc = free_device_memory(ctx, &args->in); in hl_mem_ioctl()
2278 rc = map_device_va(ctx, &args->in, &device_addr); in hl_mem_ioctl()
2281 args->out.device_virt_addr = device_addr; in hl_mem_ioctl()
2285 rc = unmap_device_va(ctx, &args->in, false); in hl_mem_ioctl()
2289 rc = map_block(hdev, args->in.map_block.block_addr, in hl_mem_ioctl()
2291 args->out.block_handle = block_handle; in hl_mem_ioctl()
2292 args->out.block_size = block_size; in hl_mem_ioctl()
2296 rc = export_dmabuf_from_addr(ctx, in hl_mem_ioctl()
2297 args->in.export_dmabuf_fd.addr, in hl_mem_ioctl()
2298 args->in.export_dmabuf_fd.mem_size, in hl_mem_ioctl()
2299 args->in.export_dmabuf_fd.offset, in hl_mem_ioctl()
2300 args->in.flags, in hl_mem_ioctl()
2303 args->out.fd = dmabuf_fd; in hl_mem_ioctl()
2307 rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); in hl_mem_ioctl()
2310 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); in hl_mem_ioctl()
2311 rc = -EINVAL; in hl_mem_ioctl()
2326 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); in get_user_memory()
2327 return -EFAULT; in get_user_memory()
2330 userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); in get_user_memory()
2331 if (!userptr->pages) in get_user_memory()
2332 return -ENOMEM; in get_user_memory()
2335 userptr->pages); in get_user_memory()
2338 dev_err(hdev->dev, in get_user_memory()
2344 rc = -EFAULT; in get_user_memory()
2347 userptr->npages = npages; in get_user_memory()
2349 rc = sg_alloc_table_from_pages(userptr->sgt, in get_user_memory()
2350 userptr->pages, in get_user_memory()
2353 dev_err(hdev->dev, "failed to create SG table from pages\n"); in get_user_memory()
2360 unpin_user_pages(userptr->pages, npages); in get_user_memory()
2362 kvfree(userptr->pages); in get_user_memory()
2367 * hl_pin_host_memory() - pins a chunk of host memory.
2374 * - Pins the physical pages.
2375 * - Create an SG list from those pages.
2385 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); in hl_pin_host_memory()
2386 return -EINVAL; in hl_pin_host_memory()
2395 dev_err(hdev->dev, in hl_pin_host_memory()
2398 return -EINVAL; in hl_pin_host_memory()
2401 userptr->pid = current->pid; in hl_pin_host_memory()
2402 userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); in hl_pin_host_memory()
2403 if (!userptr->sgt) in hl_pin_host_memory()
2404 return -ENOMEM; in hl_pin_host_memory()
2409 npages = (end - start) >> PAGE_SHIFT; in hl_pin_host_memory()
2411 userptr->size = size; in hl_pin_host_memory()
2412 userptr->addr = addr; in hl_pin_host_memory()
2413 userptr->dma_mapped = false; in hl_pin_host_memory()
2414 INIT_LIST_HEAD(&userptr->job_node); in hl_pin_host_memory()
2419 dev_err(hdev->dev, in hl_pin_host_memory()
2430 kfree(userptr->sgt); in hl_pin_host_memory()
2435 * hl_unpin_host_memory - unpins a chunk of host memory.
2440 * - Unpins the physical pages related to the host memory
2441 * - Free the SG list
2447 if (userptr->dma_mapped) in hl_unpin_host_memory()
2448 hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); in hl_unpin_host_memory()
2450 unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); in hl_unpin_host_memory()
2451 kvfree(userptr->pages); in hl_unpin_host_memory()
2453 list_del(&userptr->job_node); in hl_unpin_host_memory()
2455 sg_free_table(userptr->sgt); in hl_unpin_host_memory()
2456 kfree(userptr->sgt); in hl_unpin_host_memory()
2460 * hl_userptr_delete_list() - clear userptr list.
2465 * - Iterates over the list and unpins the host memory and frees the userptr
2482 * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
2490 * - Iterates over the list and checks if the given userptr is in it, means is
2498 if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) in hl_userptr_is_pinned()
2506 * va_range_init() - initialize virtual addresses range.
2515 * - Initializes the virtual addresses list of the given range with the given
2525 INIT_LIST_HEAD(&va_range->list); in va_range_init()
2542 end = round_down(end + 1, page_size) - 1; in va_range_init()
2546 dev_err(hdev->dev, "too small vm range for va list\n"); in va_range_init()
2547 return -EFAULT; in va_range_init()
2553 dev_err(hdev->dev, "Failed to init host va list\n"); in va_range_init()
2557 va_range->start_addr = start; in va_range_init()
2558 va_range->end_addr = end; in va_range_init()
2559 va_range->page_size = page_size; in va_range_init()
2565 * va_range_fini() - clear a virtual addresses range.
2570 * - Frees the virtual addresses block list and its lock.
2574 mutex_lock(&va_range->lock); in va_range_fini()
2575 clear_va_list_locked(hdev, &va_range->list); in va_range_fini()
2576 mutex_unlock(&va_range->lock); in va_range_fini()
2578 mutex_destroy(&va_range->lock); in va_range_fini()
2583 * vm_ctx_init_with_ranges() - initialize virtual memory for context.
2584 * @ctx: pointer to the habanalabs context structure.
2598 * - MMU for context.
2599 * - Virtual address to area descriptor hashtable.
2600 * - Virtual block list of available virtual memory.
2602 static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, in vm_ctx_init_with_ranges() argument
2613 struct hl_device *hdev = ctx->hdev; in vm_ctx_init_with_ranges()
2617 ctx->va_range[i] = in vm_ctx_init_with_ranges()
2619 if (!ctx->va_range[i]) { in vm_ctx_init_with_ranges()
2620 rc = -ENOMEM; in vm_ctx_init_with_ranges()
2625 rc = hl_mmu_ctx_init(ctx); in vm_ctx_init_with_ranges()
2627 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); in vm_ctx_init_with_ranges()
2631 mutex_init(&ctx->mem_hash_lock); in vm_ctx_init_with_ranges()
2632 hash_init(ctx->mem_hash); in vm_ctx_init_with_ranges()
2634 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); in vm_ctx_init_with_ranges()
2636 rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_HOST, in vm_ctx_init_with_ranges()
2639 dev_err(hdev->dev, "failed to init host vm range\n"); in vm_ctx_init_with_ranges()
2643 if (hdev->pmmu_huge_range) { in vm_ctx_init_with_ranges()
2644 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); in vm_ctx_init_with_ranges()
2647 ctx->va_range, HL_VA_RANGE_TYPE_HOST_HUGE, in vm_ctx_init_with_ranges()
2651 dev_err(hdev->dev, in vm_ctx_init_with_ranges()
2656 kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); in vm_ctx_init_with_ranges()
2657 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] = in vm_ctx_init_with_ranges()
2658 ctx->va_range[HL_VA_RANGE_TYPE_HOST]; in vm_ctx_init_with_ranges()
2661 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); in vm_ctx_init_with_ranges()
2663 rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_DRAM, in vm_ctx_init_with_ranges()
2666 dev_err(hdev->dev, "failed to init dram vm range\n"); in vm_ctx_init_with_ranges()
2670 hl_debugfs_add_ctx_mem_hash(hdev, ctx); in vm_ctx_init_with_ranges()
2675 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); in vm_ctx_init_with_ranges()
2677 if (hdev->pmmu_huge_range) { in vm_ctx_init_with_ranges()
2678 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); in vm_ctx_init_with_ranges()
2680 &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list); in vm_ctx_init_with_ranges()
2681 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); in vm_ctx_init_with_ranges()
2684 if (hdev->pmmu_huge_range) in vm_ctx_init_with_ranges()
2685 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); in vm_ctx_init_with_ranges()
2686 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); in vm_ctx_init_with_ranges()
2687 clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list); in vm_ctx_init_with_ranges()
2688 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); in vm_ctx_init_with_ranges()
2690 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); in vm_ctx_init_with_ranges()
2691 mutex_destroy(&ctx->mem_hash_lock); in vm_ctx_init_with_ranges()
2692 hl_mmu_ctx_fini(ctx); in vm_ctx_init_with_ranges()
2695 kfree(ctx->va_range[i]); in vm_ctx_init_with_ranges()
2700 int hl_vm_ctx_init(struct hl_ctx *ctx) in hl_vm_ctx_init() argument
2702 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; in hl_vm_ctx_init()
2707 atomic64_set(&ctx->dram_phys_mem, 0); in hl_vm_ctx_init()
2713 if (ctx->hdev->mmu_disable) in hl_vm_ctx_init()
2716 dram_range_start = prop->dmmu.start_addr; in hl_vm_ctx_init()
2717 dram_range_end = prop->dmmu.end_addr - 1; in hl_vm_ctx_init()
2718 dram_page_size = prop->dram_page_size ? in hl_vm_ctx_init()
2719 prop->dram_page_size : prop->dmmu.page_size; in hl_vm_ctx_init()
2720 host_range_start = prop->pmmu.start_addr; in hl_vm_ctx_init()
2721 host_range_end = prop->pmmu.end_addr - 1; in hl_vm_ctx_init()
2722 host_page_size = prop->pmmu.page_size; in hl_vm_ctx_init()
2723 host_huge_range_start = prop->pmmu_huge.start_addr; in hl_vm_ctx_init()
2724 host_huge_range_end = prop->pmmu_huge.end_addr - 1; in hl_vm_ctx_init()
2725 host_huge_page_size = prop->pmmu_huge.page_size; in hl_vm_ctx_init()
2727 return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, in hl_vm_ctx_init()
2734 * hl_vm_ctx_fini() - virtual memory teardown of context.
2735 * @ctx: pointer to the habanalabs context structure.
2738 * - Virtual block list of available virtual memory.
2739 * - Virtual address to area descriptor hashtable.
2740 * - MMU for context.
2743 * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
2746 * - Frees any existing physical page list from the idr which relates to the
2747 * current context asid.
2748 * - This function checks the virtual block list for correctness. At this point
2752 void hl_vm_ctx_fini(struct hl_ctx *ctx) in hl_vm_ctx_fini() argument
2755 struct hl_device *hdev = ctx->hdev; in hl_vm_ctx_fini()
2757 struct hl_vm *vm = &hdev->vm; in hl_vm_ctx_fini()
2763 if (hdev->mmu_disable) in hl_vm_ctx_fini()
2766 hl_debugfs_remove_ctx_mem_hash(hdev, ctx); in hl_vm_ctx_fini()
2772 if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash)) in hl_vm_ctx_fini()
2773 dev_dbg(hdev->dev, in hl_vm_ctx_fini()
2776 hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { in hl_vm_ctx_fini()
2777 dev_dbg(hdev->dev, in hl_vm_ctx_fini()
2778 "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", in hl_vm_ctx_fini()
2779 hnode->vaddr, ctx->asid); in hl_vm_ctx_fini()
2780 args.unmap.device_virt_addr = hnode->vaddr; in hl_vm_ctx_fini()
2781 unmap_device_va(ctx, &args, true); in hl_vm_ctx_fini()
2784 mutex_lock(&hdev->mmu_lock); in hl_vm_ctx_fini()
2790 mutex_unlock(&hdev->mmu_lock); in hl_vm_ctx_fini()
2794 spin_lock(&vm->idr_lock); in hl_vm_ctx_fini()
2795 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) in hl_vm_ctx_fini()
2796 if (phys_pg_list->asid == ctx->asid) { in hl_vm_ctx_fini()
2797 dev_dbg(hdev->dev, in hl_vm_ctx_fini()
2798 "page list 0x%px of asid %d is still alive\n", in hl_vm_ctx_fini()
2799 phys_pg_list, ctx->asid); in hl_vm_ctx_fini()
2801 atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); in hl_vm_ctx_fini()
2802 idr_remove(&vm->phys_pg_pack_handles, i); in hl_vm_ctx_fini()
2803 list_add(&phys_pg_list->node, &free_list); in hl_vm_ctx_fini()
2805 spin_unlock(&vm->idr_lock); in hl_vm_ctx_fini()
2810 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); in hl_vm_ctx_fini()
2811 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); in hl_vm_ctx_fini()
2813 if (hdev->pmmu_huge_range) in hl_vm_ctx_fini()
2814 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); in hl_vm_ctx_fini()
2816 mutex_destroy(&ctx->mem_hash_lock); in hl_vm_ctx_fini()
2817 hl_mmu_ctx_fini(ctx); in hl_vm_ctx_fini()
2823 if (ctx->asid != HL_KERNEL_ASID_ID && in hl_vm_ctx_fini()
2824 !hdev->asic_prop.dram_supports_virtual_memory) in hl_vm_ctx_fini()
2825 atomic64_set(&hdev->dram_used_mem, 0); in hl_vm_ctx_fini()
2829 * hl_vm_init() - initialize virtual memory module.
2833 * - MMU module.
2834 * - DRAM physical pages pool of 2MB.
2835 * - Idr for device memory allocation handles.
2839 struct asic_fixed_properties *prop = &hdev->asic_prop; in hl_vm_init()
2840 struct hl_vm *vm = &hdev->vm; in hl_vm_init()
2843 if (is_power_of_2(prop->dram_page_size)) in hl_vm_init()
2844 vm->dram_pg_pool = in hl_vm_init()
2845 gen_pool_create(__ffs(prop->dram_page_size), -1); in hl_vm_init()
2847 vm->dram_pg_pool = in hl_vm_init()
2848 gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1); in hl_vm_init()
2850 if (!vm->dram_pg_pool) { in hl_vm_init()
2851 dev_err(hdev->dev, "Failed to create dram page pool\n"); in hl_vm_init()
2852 return -ENOMEM; in hl_vm_init()
2855 kref_init(&vm->dram_pg_pool_refcount); in hl_vm_init()
2857 rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, in hl_vm_init()
2858 prop->dram_end_address - prop->dram_user_base_address, in hl_vm_init()
2859 -1); in hl_vm_init()
2862 dev_err(hdev->dev, in hl_vm_init()
2867 spin_lock_init(&vm->idr_lock); in hl_vm_init()
2868 idr_init(&vm->phys_pg_pack_handles); in hl_vm_init()
2870 atomic64_set(&hdev->dram_used_mem, 0); in hl_vm_init()
2872 vm->init_done = true; in hl_vm_init()
2877 gen_pool_destroy(vm->dram_pg_pool); in hl_vm_init()
2883 * hl_vm_fini() - virtual memory module teardown.
2887 * - Idr for device memory allocation handles.
2888 * - DRAM physical pages pool of 2MB.
2889 * - MMU module.
2893 struct hl_vm *vm = &hdev->vm; in hl_vm_fini()
2895 if (!vm->init_done) in hl_vm_fini()
2902 if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) in hl_vm_fini()
2903 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", in hl_vm_fini()
2906 vm->init_done = false; in hl_vm_fini()
2910 * hl_hw_block_mem_init() - HW block memory initialization.
2911 * @ctx: pointer to the habanalabs context structure.
2916 void hl_hw_block_mem_init(struct hl_ctx *ctx) in hl_hw_block_mem_init() argument
2918 mutex_init(&ctx->hw_block_list_lock); in hl_hw_block_mem_init()
2919 INIT_LIST_HEAD(&ctx->hw_block_mem_list); in hl_hw_block_mem_init()
2923 * hl_hw_block_mem_fini() - HW block memory teardown.
2924 * @ctx: pointer to the habanalabs context structure.
2929 void hl_hw_block_mem_fini(struct hl_ctx *ctx) in hl_hw_block_mem_fini() argument
2933 if (!list_empty(&ctx->hw_block_mem_list)) in hl_hw_block_mem_fini()
2934 dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n"); in hl_hw_block_mem_fini()
2936 list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { in hl_hw_block_mem_fini()
2937 list_del(&lnode->node); in hl_hw_block_mem_fini()
2941 mutex_destroy(&ctx->hw_block_list_lock); in hl_hw_block_mem_fini()