main.c - OpenGrok cross reference for /linux-6.14.4/drivers/vfio/pci/nvgrace-gpu/main.c

Lines Matching +full:memory +full:- +full:region
1 // SPDX-License-Identifier: GPL-2.0-only
12  * The device memory usable to the workloads running in the VM is cached
13  * and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
15  * Moreover, the VM GPU device driver needs a non-cacheable region to
16  * support the MIG feature. This region is also exposed as a 64b BAR
17  * (comprising of BAR2 and BAR3 region) and represented as resmem.
38  * The state of the two device memory region - resmem and usemem - is
42 	phys_addr_t memphys;    /* Base physical address of the region */
43 	size_t memlength;       /* Region size */
44 	size_t bar_size;        /* Reported region BAR size */
49 	};                      /* Base virtual address of the region */
54 	/* Cached and usable memory for the VM. */
56 	/* Non cached memory carved out from the end of device memory */
58 	/* Lock to control device memory kernel mapping */
69 	nvdev->resmem.bar_val = 0;  in nvgrace_gpu_init_fake_bar_emu_regs()
70 	nvdev->usemem.bar_val = 0;  in nvgrace_gpu_init_fake_bar_emu_regs()
79 		return &nvdev->usemem;  in nvgrace_gpu_memregion()
81 	if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)  in nvgrace_gpu_memregion()
82 		return &nvdev->resmem;  in nvgrace_gpu_memregion()
100 	if (nvdev->usemem.memlength) {  in nvgrace_gpu_open_device()
102 		mutex_init(&nvdev->remap_lock);  in nvgrace_gpu_open_device()
116 	/* Unmap the mapping to the device memory cached region */  in nvgrace_gpu_close_device()
117 	if (nvdev->usemem.memaddr) {  in nvgrace_gpu_close_device()
118 		memunmap(nvdev->usemem.memaddr);  in nvgrace_gpu_close_device()
119 		nvdev->usemem.memaddr = NULL;  in nvgrace_gpu_close_device()
122 	/* Unmap the mapping to the device memory non-cached region */  in nvgrace_gpu_close_device()
123 	if (nvdev->resmem.ioaddr) {  in nvgrace_gpu_close_device()
124 		iounmap(nvdev->resmem.ioaddr);  in nvgrace_gpu_close_device()
125 		nvdev->resmem.ioaddr = NULL;  in nvgrace_gpu_close_device()
128 	mutex_destroy(&nvdev->remap_lock);  in nvgrace_gpu_close_device()
145 	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);  in nvgrace_gpu_mmap()
152 	 * Request to mmap the BAR. Map to the CPU accessible memory on the  in nvgrace_gpu_mmap()
153 	 * GPU using the memory information gathered from the system ACPI  in nvgrace_gpu_mmap()
156 	pgoff = vma->vm_pgoff &  in nvgrace_gpu_mmap()
157 		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);  in nvgrace_gpu_mmap()
159 	if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) ||  in nvgrace_gpu_mmap()
160 	    check_add_overflow(PHYS_PFN(memregion->memphys), pgoff, &start_pfn) ||  in nvgrace_gpu_mmap()
162 		return -EOVERFLOW;  in nvgrace_gpu_mmap()
166 	 * memory size  in nvgrace_gpu_mmap()
168 	if (end > memregion->memlength)  in nvgrace_gpu_mmap()
169 		return -EINVAL;  in nvgrace_gpu_mmap()
172 	 * The carved out region of the device memory needs the NORMAL_NC  in nvgrace_gpu_mmap()
177 		 * The nvgrace-gpu module has no issues with uncontained  in nvgrace_gpu_mmap()
184 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);  in nvgrace_gpu_mmap()
188 	 * Perform a PFN map to the memory and back the device BAR by the  in nvgrace_gpu_mmap()
189 	 * GPU memory.  in nvgrace_gpu_mmap()
191 	 * The available GPU memory size may not be power-of-2 aligned. The  in nvgrace_gpu_mmap()
198 	ret = remap_pfn_range(vma, vma->vm_start, start_pfn,  in nvgrace_gpu_mmap()
199 			      req_len, vma->vm_page_prot);  in nvgrace_gpu_mmap()
203 	vma->vm_pgoff = start_pfn;  in nvgrace_gpu_mmap()
224 		return -EFAULT;  in nvgrace_gpu_ioctl_get_region_info()
227 		return -EINVAL;  in nvgrace_gpu_ioctl_get_region_info()
230 	 * Request to determine the BAR region information. Send the  in nvgrace_gpu_ioctl_get_region_info()
231 	 * GPU memory information.  in nvgrace_gpu_ioctl_get_region_info()
241 	 * Setup for sparse mapping for the device memory. Only the  in nvgrace_gpu_ioctl_get_region_info()
242 	 * available device memory on the hardware is shown as a  in nvgrace_gpu_ioctl_get_region_info()
243 	 * mappable region.  in nvgrace_gpu_ioctl_get_region_info()
247 		return -ENOMEM;  in nvgrace_gpu_ioctl_get_region_info()
249 	sparse->nr_areas = 1;  in nvgrace_gpu_ioctl_get_region_info()
250 	sparse->areas[0].offset = 0;  in nvgrace_gpu_ioctl_get_region_info()
251 	sparse->areas[0].size = memregion->memlength;  in nvgrace_gpu_ioctl_get_region_info()
252 	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;  in nvgrace_gpu_ioctl_get_region_info()
253 	sparse->header.version = 1;  in nvgrace_gpu_ioctl_get_region_info()
255 	ret = vfio_info_add_capability(&caps, &sparse->header, size);  in nvgrace_gpu_ioctl_get_region_info()
262 	 * The region memory size may not be power-of-2 aligned.  in nvgrace_gpu_ioctl_get_region_info()
263 	 * Given that the memory  as a BAR and may not be  in nvgrace_gpu_ioctl_get_region_info()
264 	 * aligned, roundup to the next power-of-2.  in nvgrace_gpu_ioctl_get_region_info()
266 	info.size = memregion->bar_size;  in nvgrace_gpu_ioctl_get_region_info()
282 				return -EFAULT;  in nvgrace_gpu_ioctl_get_region_info()
289 			    -EFAULT : 0;  in nvgrace_gpu_ioctl_get_region_info()
299 		return -ENOTTY;  in nvgrace_gpu_ioctl()
314 	tmp_val &= ~(bar_size - 1);  in nvgrace_gpu_get_read_value()
321  * Both the usable (usemem) and the reserved (resmem) device memory region
360 		val64 = nvgrace_gpu_get_read_value(memregion->bar_size,  in nvgrace_gpu_read_config_emu()
363 						   memregion->bar_val);  in nvgrace_gpu_read_config_emu()
371 			*ppos -= count;  in nvgrace_gpu_read_config_emu()
372 			return -EFAULT;  in nvgrace_gpu_read_config_emu()
402 		if (copy_from_user((void *)&memregion->bar_val + register_offset,  in nvgrace_gpu_write_config_emu()
404 			return -EFAULT;  in nvgrace_gpu_write_config_emu()
413  * Ad hoc map the device memory in the module kernel VA space. Primarily needed
415  * mmaps of the vfio-pci BAR regions and such accesses should be supported using
418  * The usemem region is cacheable memory and hence is memremaped.
419  * The resmem region is non-cached and is mapped using ioremap_wc (NORMAL_NC).
430 		return -EINVAL;  in nvgrace_gpu_map_device_mem()
432 	mutex_lock(&nvdev->remap_lock);  in nvgrace_gpu_map_device_mem()
434 	if (memregion->memaddr)  in nvgrace_gpu_map_device_mem()
438 		memregion->memaddr = memremap(memregion->memphys,  in nvgrace_gpu_map_device_mem()
439 					      memregion->memlength,  in nvgrace_gpu_map_device_mem()
442 		memregion->ioaddr = ioremap_wc(memregion->memphys,  in nvgrace_gpu_map_device_mem()
443 					       memregion->memlength);  in nvgrace_gpu_map_device_mem()
445 	if (!memregion->memaddr)  in nvgrace_gpu_map_device_mem()
446 		ret = -ENOMEM;  in nvgrace_gpu_map_device_mem()
449 	mutex_unlock(&nvdev->remap_lock);  in nvgrace_gpu_map_device_mem()
455  * Read the data from the device memory (mapped either through ioremap
470 	 * Handle read on the BAR regions. Map to the target device memory  in nvgrace_gpu_map_and_read()
479 				 (u8 *)nvdev->usemem.memaddr + offset,  in nvgrace_gpu_map_and_read()
481 			ret = -EFAULT;  in nvgrace_gpu_map_and_read()
485 		 * the device memory is accessed with the memory enable  in nvgrace_gpu_map_and_read()
491 		ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,  in nvgrace_gpu_map_and_read()
492 					     nvdev->resmem.ioaddr,  in nvgrace_gpu_map_and_read()
501  * Read count bytes from the device memory at an offset. The actual device
502  * memory size (available) may not be a power-of-2. So the driver fakes
503  * the size to a power-of-2 (reported) when exposing to a user space driver.
505  * Reads starting beyond the reported size generate -EINVAL; reads extending
523 	if (offset >= memregion->bar_size)  in nvgrace_gpu_read_mem()
524 		return -EINVAL;  in nvgrace_gpu_read_mem()
527 	count = min(count, memregion->bar_size - (size_t)offset);  in nvgrace_gpu_read_mem()
530 	 * Determine how many bytes to be actually read from the device memory.  in nvgrace_gpu_read_mem()
531 	 * Read request beyond the actual device memory size is filled with ~0,  in nvgrace_gpu_read_mem()
534 	if (offset >= memregion->memlength)  in nvgrace_gpu_read_mem()
537 		mem_count = min(count, memregion->memlength - (size_t)offset);  in nvgrace_gpu_read_mem()
544 	 * Only the device memory present on the hardware is mapped, which may  in nvgrace_gpu_read_mem()
545 	 * not be power-of-2 aligned. A read to an offset beyond the device memory  in nvgrace_gpu_read_mem()
577  * Write the data to the device memory (mapped either through ioremap
597 		if (copy_from_user((u8 *)nvdev->usemem.memaddr + pos,  in nvgrace_gpu_map_and_write()
599 			return -EFAULT;  in nvgrace_gpu_map_and_write()
603 		 * the device memory is accessed with the memory enable  in nvgrace_gpu_map_and_write()
609 		ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,  in nvgrace_gpu_map_and_write()
610 					     nvdev->resmem.ioaddr,  in nvgrace_gpu_map_and_write()
619  * Write count bytes to the device memory at a given offset. The actual device
620  * memory size (available) may not be a power-of-2. So the driver fakes the
621  * size to a power-of-2 (reported) when exposing to a user space driver.
624  * beyond the reported size generate -EINVAL.
639 	if (offset >= memregion->bar_size)  in nvgrace_gpu_write_mem()
640 		return -EINVAL;  in nvgrace_gpu_write_mem()
643 	count = min(count, memregion->bar_size - (size_t)offset);  in nvgrace_gpu_write_mem()
646 	 * Determine how many bytes to be actually written to the device memory.  in nvgrace_gpu_write_mem()
649 	if (offset >= memregion->memlength)  in nvgrace_gpu_write_mem()
653 	 * Only the device memory present on the hardware is mapped, which may  in nvgrace_gpu_write_mem()
654 	 * not be power-of-2 aligned. Drop access outside the available device  in nvgrace_gpu_write_mem()
655 	 * memory on the hardware.  in nvgrace_gpu_write_mem()
657 	mem_count = min(count, memregion->memlength - (size_t)offset);  in nvgrace_gpu_write_mem()
687 	.name		= "nvgrace-gpu-vfio-pci",
706 	.name		= "nvgrace-gpu-vfio-pci-core",
731 	 * The memory information is present in the system ACPI tables as DSD  in nvgrace_gpu_fetch_memory_property()
732 	 * properties nvidia,gpu-mem-base-pa and nvidia,gpu-mem-size.  in nvgrace_gpu_fetch_memory_property()
734 	ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-base-pa",  in nvgrace_gpu_fetch_memory_property()
740 		return -EOVERFLOW;  in nvgrace_gpu_fetch_memory_property()
742 	ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-size",  in nvgrace_gpu_fetch_memory_property()
748 		return -EOVERFLOW;  in nvgrace_gpu_fetch_memory_property()
752 	 * memory size is returned as 0. Fail in such case.  in nvgrace_gpu_fetch_memory_property()
755 		return -ENOMEM;  in nvgrace_gpu_fetch_memory_property()
769 	 * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable  in nvgrace_gpu_init_nvdev_struct()
770 	 * region to support the MIG feature owing to a hardware bug. Since the  in nvgrace_gpu_init_nvdev_struct()
771 	 * device memory is mapped as NORMAL cached, carve out a region from the end  in nvgrace_gpu_init_nvdev_struct()
772 	 * with a different NORMAL_NC property (called as reserved memory and  in nvgrace_gpu_init_nvdev_struct()
773 	 * represented as resmem). This region then is exposed as a 64b BAR  in nvgrace_gpu_init_nvdev_struct()
774 	 * (region 2 and 3) to the VM, while exposing the rest (termed as usable  in nvgrace_gpu_init_nvdev_struct()
775 	 * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5).  in nvgrace_gpu_init_nvdev_struct()
778 	 * |-------------------------------------------------|  in nvgrace_gpu_init_nvdev_struct()
783 	 * presence of the bug can be determined through nvdev->has_mig_hw_bug.  in nvgrace_gpu_init_nvdev_struct()
785 	 * the GPU device memory and the entire memory is usable and mapped as  in nvgrace_gpu_init_nvdev_struct()
788 	if (nvdev->has_mig_hw_bug)  in nvgrace_gpu_init_nvdev_struct()
791 	nvdev->usemem.memphys = memphys;  in nvgrace_gpu_init_nvdev_struct()
794 	 * The device memory exposed to the VM is added to the kernel by the  in nvgrace_gpu_init_nvdev_struct()
795 	 * VM driver module in chunks of memory block size. Note that only the  in nvgrace_gpu_init_nvdev_struct()
796 	 * usable memory (usemem) is added to the kernel for usage by the VM  in nvgrace_gpu_init_nvdev_struct()
800 			       &nvdev->usemem.memlength)) {  in nvgrace_gpu_init_nvdev_struct()
801 		ret = -EOVERFLOW;  in nvgrace_gpu_init_nvdev_struct()
806 	 * The usemem region is exposed as a 64B Bar composed of region 4 and 5.  in nvgrace_gpu_init_nvdev_struct()
807 	 * Calculate and save the BAR size for the region.  in nvgrace_gpu_init_nvdev_struct()
809 	nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);  in nvgrace_gpu_init_nvdev_struct()
813 	 * for splitting the device memory to create RESMEM. The entire  in nvgrace_gpu_init_nvdev_struct()
814 	 * device memory is usable and will be USEMEM. Return here for  in nvgrace_gpu_init_nvdev_struct()
817 	if (!nvdev->has_mig_hw_bug)  in nvgrace_gpu_init_nvdev_struct()
821 	 * When the device memory is split to workaround the MIG bug on  in nvgrace_gpu_init_nvdev_struct()
822 	 * Grace Hopper, the USEMEM part of the device memory has to be  in nvgrace_gpu_init_nvdev_struct()
826 	 * size. Note that the device memory may not be 512M aligned.  in nvgrace_gpu_init_nvdev_struct()
828 	nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,  in nvgrace_gpu_init_nvdev_struct()
830 	if (nvdev->usemem.memlength == 0) {  in nvgrace_gpu_init_nvdev_struct()
831 		ret = -EINVAL;  in nvgrace_gpu_init_nvdev_struct()
835 	if ((check_add_overflow(nvdev->usemem.memphys,  in nvgrace_gpu_init_nvdev_struct()
836 				nvdev->usemem.memlength,  in nvgrace_gpu_init_nvdev_struct()
837 				&nvdev->resmem.memphys)) ||  in nvgrace_gpu_init_nvdev_struct()
838 	    (check_sub_overflow(memlength, nvdev->usemem.memlength,  in nvgrace_gpu_init_nvdev_struct()
839 				&nvdev->resmem.memlength))) {  in nvgrace_gpu_init_nvdev_struct()
840 		ret = -EOVERFLOW;  in nvgrace_gpu_init_nvdev_struct()
845 	 * The resmem region is exposed as a 64b BAR composed of region 2 and 3  in nvgrace_gpu_init_nvdev_struct()
846 	 * for Grace Hopper. Calculate and save the BAR size for the region.  in nvgrace_gpu_init_nvdev_struct()
848 	nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);  in nvgrace_gpu_init_nvdev_struct()
875  * been moved out of the UEFI on the Grace-Blackwell systems.
882  * CPU-GPU chip-to-chip (C2C) cache coherent interconnect.
891  * Ensure that the BAR0 region is enabled before accessing the
898 	int ret = -ETIME;  in nvgrace_gpu_wait_device_ready()
910 		ret = -ENOMEM;  in nvgrace_gpu_wait_device_ready()
949 				  &pdev->dev, ops);  in nvgrace_gpu_probe()
953 	dev_set_drvdata(&pdev->dev, &nvdev->core_device);  in nvgrace_gpu_probe()
956 		nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev);  in nvgrace_gpu_probe()
959 		 * Device memory properties are identified in the host ACPI  in nvgrace_gpu_probe()
968 	ret = vfio_pci_core_register_device(&nvdev->core_device);  in nvgrace_gpu_probe()
975 	vfio_put_device(&nvdev->core_device.vdev);  in nvgrace_gpu_probe()
981 	struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);  in nvgrace_gpu_remove()
984 	vfio_put_device(&core_device->vdev);  in nvgrace_gpu_remove()
1015 …E_DESCRIPTION("VFIO NVGRACE GPU PF - User Level driver for NVIDIA devices with CPU coherently acce…