Lines Matching +full:memory +full:- +full:region
1 // SPDX-License-Identifier: GPL-2.0-only
12 * The device memory usable to the workloads running in the VM is cached
13 * and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
15 * Moreover, the VM GPU device driver needs a non-cacheable region to
16 * support the MIG feature. This region is also exposed as a 64b BAR
17 * (comprising of BAR2 and BAR3 region) and represented as resmem.
38 * The state of the two device memory region - resmem and usemem - is
42 phys_addr_t memphys; /* Base physical address of the region */
43 size_t memlength; /* Region size */
44 size_t bar_size; /* Reported region BAR size */
49 }; /* Base virtual address of the region */
54 /* Cached and usable memory for the VM. */
56 /* Non cached memory carved out from the end of device memory */
58 /* Lock to control device memory kernel mapping */
69 nvdev->resmem.bar_val = 0; in nvgrace_gpu_init_fake_bar_emu_regs()
70 nvdev->usemem.bar_val = 0; in nvgrace_gpu_init_fake_bar_emu_regs()
79 return &nvdev->usemem; in nvgrace_gpu_memregion()
81 if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX) in nvgrace_gpu_memregion()
82 return &nvdev->resmem; in nvgrace_gpu_memregion()
100 if (nvdev->usemem.memlength) { in nvgrace_gpu_open_device()
102 mutex_init(&nvdev->remap_lock); in nvgrace_gpu_open_device()
116 /* Unmap the mapping to the device memory cached region */ in nvgrace_gpu_close_device()
117 if (nvdev->usemem.memaddr) { in nvgrace_gpu_close_device()
118 memunmap(nvdev->usemem.memaddr); in nvgrace_gpu_close_device()
119 nvdev->usemem.memaddr = NULL; in nvgrace_gpu_close_device()
122 /* Unmap the mapping to the device memory non-cached region */ in nvgrace_gpu_close_device()
123 if (nvdev->resmem.ioaddr) { in nvgrace_gpu_close_device()
124 iounmap(nvdev->resmem.ioaddr); in nvgrace_gpu_close_device()
125 nvdev->resmem.ioaddr = NULL; in nvgrace_gpu_close_device()
128 mutex_destroy(&nvdev->remap_lock); in nvgrace_gpu_close_device()
145 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); in nvgrace_gpu_mmap()
152 * Request to mmap the BAR. Map to the CPU accessible memory on the in nvgrace_gpu_mmap()
153 * GPU using the memory information gathered from the system ACPI in nvgrace_gpu_mmap()
156 pgoff = vma->vm_pgoff & in nvgrace_gpu_mmap()
157 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); in nvgrace_gpu_mmap()
159 if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) || in nvgrace_gpu_mmap()
160 check_add_overflow(PHYS_PFN(memregion->memphys), pgoff, &start_pfn) || in nvgrace_gpu_mmap()
162 return -EOVERFLOW; in nvgrace_gpu_mmap()
166 * memory size in nvgrace_gpu_mmap()
168 if (end > memregion->memlength) in nvgrace_gpu_mmap()
169 return -EINVAL; in nvgrace_gpu_mmap()
172 * The carved out region of the device memory needs the NORMAL_NC in nvgrace_gpu_mmap()
177 * The nvgrace-gpu module has no issues with uncontained in nvgrace_gpu_mmap()
184 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); in nvgrace_gpu_mmap()
188 * Perform a PFN map to the memory and back the device BAR by the in nvgrace_gpu_mmap()
189 * GPU memory. in nvgrace_gpu_mmap()
191 * The available GPU memory size may not be power-of-2 aligned. The in nvgrace_gpu_mmap()
198 ret = remap_pfn_range(vma, vma->vm_start, start_pfn, in nvgrace_gpu_mmap()
199 req_len, vma->vm_page_prot); in nvgrace_gpu_mmap()
203 vma->vm_pgoff = start_pfn; in nvgrace_gpu_mmap()
224 return -EFAULT; in nvgrace_gpu_ioctl_get_region_info()
227 return -EINVAL; in nvgrace_gpu_ioctl_get_region_info()
230 * Request to determine the BAR region information. Send the in nvgrace_gpu_ioctl_get_region_info()
231 * GPU memory information. in nvgrace_gpu_ioctl_get_region_info()
241 * Setup for sparse mapping for the device memory. Only the in nvgrace_gpu_ioctl_get_region_info()
242 * available device memory on the hardware is shown as a in nvgrace_gpu_ioctl_get_region_info()
243 * mappable region. in nvgrace_gpu_ioctl_get_region_info()
247 return -ENOMEM; in nvgrace_gpu_ioctl_get_region_info()
249 sparse->nr_areas = 1; in nvgrace_gpu_ioctl_get_region_info()
250 sparse->areas[0].offset = 0; in nvgrace_gpu_ioctl_get_region_info()
251 sparse->areas[0].size = memregion->memlength; in nvgrace_gpu_ioctl_get_region_info()
252 sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; in nvgrace_gpu_ioctl_get_region_info()
253 sparse->header.version = 1; in nvgrace_gpu_ioctl_get_region_info()
255 ret = vfio_info_add_capability(&caps, &sparse->header, size); in nvgrace_gpu_ioctl_get_region_info()
262 * The region memory size may not be power-of-2 aligned. in nvgrace_gpu_ioctl_get_region_info()
263 * Given that the memory as a BAR and may not be in nvgrace_gpu_ioctl_get_region_info()
264 * aligned, roundup to the next power-of-2. in nvgrace_gpu_ioctl_get_region_info()
266 info.size = memregion->bar_size; in nvgrace_gpu_ioctl_get_region_info()
282 return -EFAULT; in nvgrace_gpu_ioctl_get_region_info()
289 -EFAULT : 0; in nvgrace_gpu_ioctl_get_region_info()
299 return -ENOTTY; in nvgrace_gpu_ioctl()
314 tmp_val &= ~(bar_size - 1); in nvgrace_gpu_get_read_value()
321 * Both the usable (usemem) and the reserved (resmem) device memory region
360 val64 = nvgrace_gpu_get_read_value(memregion->bar_size, in nvgrace_gpu_read_config_emu()
363 memregion->bar_val); in nvgrace_gpu_read_config_emu()
371 *ppos -= count; in nvgrace_gpu_read_config_emu()
372 return -EFAULT; in nvgrace_gpu_read_config_emu()
402 if (copy_from_user((void *)&memregion->bar_val + register_offset, in nvgrace_gpu_write_config_emu()
404 return -EFAULT; in nvgrace_gpu_write_config_emu()
413 * Ad hoc map the device memory in the module kernel VA space. Primarily needed
415 * mmaps of the vfio-pci BAR regions and such accesses should be supported using
418 * The usemem region is cacheable memory and hence is memremaped.
419 * The resmem region is non-cached and is mapped using ioremap_wc (NORMAL_NC).
430 return -EINVAL; in nvgrace_gpu_map_device_mem()
432 mutex_lock(&nvdev->remap_lock); in nvgrace_gpu_map_device_mem()
434 if (memregion->memaddr) in nvgrace_gpu_map_device_mem()
438 memregion->memaddr = memremap(memregion->memphys, in nvgrace_gpu_map_device_mem()
439 memregion->memlength, in nvgrace_gpu_map_device_mem()
442 memregion->ioaddr = ioremap_wc(memregion->memphys, in nvgrace_gpu_map_device_mem()
443 memregion->memlength); in nvgrace_gpu_map_device_mem()
445 if (!memregion->memaddr) in nvgrace_gpu_map_device_mem()
446 ret = -ENOMEM; in nvgrace_gpu_map_device_mem()
449 mutex_unlock(&nvdev->remap_lock); in nvgrace_gpu_map_device_mem()
455 * Read the data from the device memory (mapped either through ioremap
470 * Handle read on the BAR regions. Map to the target device memory in nvgrace_gpu_map_and_read()
479 (u8 *)nvdev->usemem.memaddr + offset, in nvgrace_gpu_map_and_read()
481 ret = -EFAULT; in nvgrace_gpu_map_and_read()
485 * the device memory is accessed with the memory enable in nvgrace_gpu_map_and_read()
491 ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, in nvgrace_gpu_map_and_read()
492 nvdev->resmem.ioaddr, in nvgrace_gpu_map_and_read()
501 * Read count bytes from the device memory at an offset. The actual device
502 * memory size (available) may not be a power-of-2. So the driver fakes
503 * the size to a power-of-2 (reported) when exposing to a user space driver.
505 * Reads starting beyond the reported size generate -EINVAL; reads extending
523 if (offset >= memregion->bar_size) in nvgrace_gpu_read_mem()
524 return -EINVAL; in nvgrace_gpu_read_mem()
527 count = min(count, memregion->bar_size - (size_t)offset); in nvgrace_gpu_read_mem()
530 * Determine how many bytes to be actually read from the device memory. in nvgrace_gpu_read_mem()
531 * Read request beyond the actual device memory size is filled with ~0, in nvgrace_gpu_read_mem()
534 if (offset >= memregion->memlength) in nvgrace_gpu_read_mem()
537 mem_count = min(count, memregion->memlength - (size_t)offset); in nvgrace_gpu_read_mem()
544 * Only the device memory present on the hardware is mapped, which may in nvgrace_gpu_read_mem()
545 * not be power-of-2 aligned. A read to an offset beyond the device memory in nvgrace_gpu_read_mem()
577 * Write the data to the device memory (mapped either through ioremap
597 if (copy_from_user((u8 *)nvdev->usemem.memaddr + pos, in nvgrace_gpu_map_and_write()
599 return -EFAULT; in nvgrace_gpu_map_and_write()
603 * the device memory is accessed with the memory enable in nvgrace_gpu_map_and_write()
609 ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, in nvgrace_gpu_map_and_write()
610 nvdev->resmem.ioaddr, in nvgrace_gpu_map_and_write()
619 * Write count bytes to the device memory at a given offset. The actual device
620 * memory size (available) may not be a power-of-2. So the driver fakes the
621 * size to a power-of-2 (reported) when exposing to a user space driver.
624 * beyond the reported size generate -EINVAL.
639 if (offset >= memregion->bar_size) in nvgrace_gpu_write_mem()
640 return -EINVAL; in nvgrace_gpu_write_mem()
643 count = min(count, memregion->bar_size - (size_t)offset); in nvgrace_gpu_write_mem()
646 * Determine how many bytes to be actually written to the device memory. in nvgrace_gpu_write_mem()
649 if (offset >= memregion->memlength) in nvgrace_gpu_write_mem()
653 * Only the device memory present on the hardware is mapped, which may in nvgrace_gpu_write_mem()
654 * not be power-of-2 aligned. Drop access outside the available device in nvgrace_gpu_write_mem()
655 * memory on the hardware. in nvgrace_gpu_write_mem()
657 mem_count = min(count, memregion->memlength - (size_t)offset); in nvgrace_gpu_write_mem()
687 .name = "nvgrace-gpu-vfio-pci",
706 .name = "nvgrace-gpu-vfio-pci-core",
731 * The memory information is present in the system ACPI tables as DSD in nvgrace_gpu_fetch_memory_property()
732 * properties nvidia,gpu-mem-base-pa and nvidia,gpu-mem-size. in nvgrace_gpu_fetch_memory_property()
734 ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-base-pa", in nvgrace_gpu_fetch_memory_property()
740 return -EOVERFLOW; in nvgrace_gpu_fetch_memory_property()
742 ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-size", in nvgrace_gpu_fetch_memory_property()
748 return -EOVERFLOW; in nvgrace_gpu_fetch_memory_property()
752 * memory size is returned as 0. Fail in such case. in nvgrace_gpu_fetch_memory_property()
755 return -ENOMEM; in nvgrace_gpu_fetch_memory_property()
769 * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable in nvgrace_gpu_init_nvdev_struct()
770 * region to support the MIG feature owing to a hardware bug. Since the in nvgrace_gpu_init_nvdev_struct()
771 * device memory is mapped as NORMAL cached, carve out a region from the end in nvgrace_gpu_init_nvdev_struct()
772 * with a different NORMAL_NC property (called as reserved memory and in nvgrace_gpu_init_nvdev_struct()
773 * represented as resmem). This region then is exposed as a 64b BAR in nvgrace_gpu_init_nvdev_struct()
774 * (region 2 and 3) to the VM, while exposing the rest (termed as usable in nvgrace_gpu_init_nvdev_struct()
775 * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5). in nvgrace_gpu_init_nvdev_struct()
778 * |-------------------------------------------------| in nvgrace_gpu_init_nvdev_struct()
783 * presence of the bug can be determined through nvdev->has_mig_hw_bug. in nvgrace_gpu_init_nvdev_struct()
785 * the GPU device memory and the entire memory is usable and mapped as in nvgrace_gpu_init_nvdev_struct()
788 if (nvdev->has_mig_hw_bug) in nvgrace_gpu_init_nvdev_struct()
791 nvdev->usemem.memphys = memphys; in nvgrace_gpu_init_nvdev_struct()
794 * The device memory exposed to the VM is added to the kernel by the in nvgrace_gpu_init_nvdev_struct()
795 * VM driver module in chunks of memory block size. Note that only the in nvgrace_gpu_init_nvdev_struct()
796 * usable memory (usemem) is added to the kernel for usage by the VM in nvgrace_gpu_init_nvdev_struct()
800 &nvdev->usemem.memlength)) { in nvgrace_gpu_init_nvdev_struct()
801 ret = -EOVERFLOW; in nvgrace_gpu_init_nvdev_struct()
806 * The usemem region is exposed as a 64B Bar composed of region 4 and 5. in nvgrace_gpu_init_nvdev_struct()
807 * Calculate and save the BAR size for the region. in nvgrace_gpu_init_nvdev_struct()
809 nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength); in nvgrace_gpu_init_nvdev_struct()
813 * for splitting the device memory to create RESMEM. The entire in nvgrace_gpu_init_nvdev_struct()
814 * device memory is usable and will be USEMEM. Return here for in nvgrace_gpu_init_nvdev_struct()
817 if (!nvdev->has_mig_hw_bug) in nvgrace_gpu_init_nvdev_struct()
821 * When the device memory is split to workaround the MIG bug on in nvgrace_gpu_init_nvdev_struct()
822 * Grace Hopper, the USEMEM part of the device memory has to be in nvgrace_gpu_init_nvdev_struct()
826 * size. Note that the device memory may not be 512M aligned. in nvgrace_gpu_init_nvdev_struct()
828 nvdev->usemem.memlength = round_down(nvdev->usemem.memlength, in nvgrace_gpu_init_nvdev_struct()
830 if (nvdev->usemem.memlength == 0) { in nvgrace_gpu_init_nvdev_struct()
831 ret = -EINVAL; in nvgrace_gpu_init_nvdev_struct()
835 if ((check_add_overflow(nvdev->usemem.memphys, in nvgrace_gpu_init_nvdev_struct()
836 nvdev->usemem.memlength, in nvgrace_gpu_init_nvdev_struct()
837 &nvdev->resmem.memphys)) || in nvgrace_gpu_init_nvdev_struct()
838 (check_sub_overflow(memlength, nvdev->usemem.memlength, in nvgrace_gpu_init_nvdev_struct()
839 &nvdev->resmem.memlength))) { in nvgrace_gpu_init_nvdev_struct()
840 ret = -EOVERFLOW; in nvgrace_gpu_init_nvdev_struct()
845 * The resmem region is exposed as a 64b BAR composed of region 2 and 3 in nvgrace_gpu_init_nvdev_struct()
846 * for Grace Hopper. Calculate and save the BAR size for the region. in nvgrace_gpu_init_nvdev_struct()
848 nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength); in nvgrace_gpu_init_nvdev_struct()
875 * been moved out of the UEFI on the Grace-Blackwell systems.
882 * CPU-GPU chip-to-chip (C2C) cache coherent interconnect.
891 * Ensure that the BAR0 region is enabled before accessing the
898 int ret = -ETIME; in nvgrace_gpu_wait_device_ready()
910 ret = -ENOMEM; in nvgrace_gpu_wait_device_ready()
949 &pdev->dev, ops); in nvgrace_gpu_probe()
953 dev_set_drvdata(&pdev->dev, &nvdev->core_device); in nvgrace_gpu_probe()
956 nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev); in nvgrace_gpu_probe()
959 * Device memory properties are identified in the host ACPI in nvgrace_gpu_probe()
968 ret = vfio_pci_core_register_device(&nvdev->core_device); in nvgrace_gpu_probe()
975 vfio_put_device(&nvdev->core_device.vdev); in nvgrace_gpu_probe()
981 struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); in nvgrace_gpu_remove()
984 vfio_put_device(&core_device->vdev); in nvgrace_gpu_remove()
1015 …E_DESCRIPTION("VFIO NVGRACE GPU PF - User Level driver for NVIDIA devices with CPU coherently acce…