Lines Matching +full:gfx +full:- +full:mem
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
46 /* topology_device_list - Master list of all topology devices */
60 if (top_dev->proximity_domain == proximity_domain) { in kfd_topology_device_by_proximity_domain_no_lock()
90 if (top_dev->gpu_id == gpu_id) { in kfd_topology_device_by_id()
108 return top_dev->gpu; in kfd_device_by_id()
119 if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { in kfd_device_by_pci_dev()
120 device = top_dev->gpu; in kfd_device_by_pci_dev()
132 struct kfd_mem_properties *mem; in kfd_release_topology_device() local
138 list_del(&dev->list); in kfd_release_topology_device()
140 while (dev->mem_props.next != &dev->mem_props) { in kfd_release_topology_device()
141 mem = container_of(dev->mem_props.next, in kfd_release_topology_device()
143 list_del(&mem->list); in kfd_release_topology_device()
144 kfree(mem); in kfd_release_topology_device()
147 while (dev->cache_props.next != &dev->cache_props) { in kfd_release_topology_device()
148 cache = container_of(dev->cache_props.next, in kfd_release_topology_device()
150 list_del(&cache->list); in kfd_release_topology_device()
154 while (dev->io_link_props.next != &dev->io_link_props) { in kfd_release_topology_device()
155 iolink = container_of(dev->io_link_props.next, in kfd_release_topology_device()
157 list_del(&iolink->list); in kfd_release_topology_device()
161 while (dev->p2p_link_props.next != &dev->p2p_link_props) { in kfd_release_topology_device()
162 p2plink = container_of(dev->p2p_link_props.next, in kfd_release_topology_device()
164 list_del(&p2plink->list); in kfd_release_topology_device()
168 while (dev->perf_props.next != &dev->perf_props) { in kfd_release_topology_device()
169 perf = container_of(dev->perf_props.next, in kfd_release_topology_device()
171 list_del(&perf->list); in kfd_release_topology_device()
206 INIT_LIST_HEAD(&dev->mem_props); in kfd_create_topology_device()
207 INIT_LIST_HEAD(&dev->cache_props); in kfd_create_topology_device()
208 INIT_LIST_HEAD(&dev->io_link_props); in kfd_create_topology_device()
209 INIT_LIST_HEAD(&dev->p2p_link_props); in kfd_create_topology_device()
210 INIT_LIST_HEAD(&dev->perf_props); in kfd_create_topology_device()
212 list_add_tail(&dev->list, device_list); in kfd_create_topology_device()
219 (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
249 offs = -EINVAL; in sysprops_show()
279 if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) in iolink_show()
280 return -EPERM; in iolink_show()
281 sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); in iolink_show()
282 sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); in iolink_show()
283 sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); in iolink_show()
284 sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); in iolink_show()
285 sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); in iolink_show()
286 sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); in iolink_show()
287 sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); in iolink_show()
288 sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); in iolink_show()
290 iolink->min_bandwidth); in iolink_show()
292 iolink->max_bandwidth); in iolink_show()
294 iolink->rec_transfer_size); in iolink_show()
296 iolink->rec_sdma_eng_id_mask); in iolink_show()
297 sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); in iolink_show()
315 struct kfd_mem_properties *mem; in mem_show() local
320 mem = container_of(attr, struct kfd_mem_properties, attr); in mem_show()
321 if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) in mem_show()
322 return -EPERM; in mem_show()
323 sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); in mem_show()
325 mem->size_in_bytes); in mem_show()
326 sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); in mem_show()
327 sysfs_show_32bit_prop(buffer, offs, "width", mem->width); in mem_show()
329 mem->mem_clk_max); in mem_show()
353 if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) in kfd_cache_show()
354 return -EPERM; in kfd_cache_show()
356 cache->processor_id_low); in kfd_cache_show()
357 sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); in kfd_cache_show()
358 sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); in kfd_cache_show()
360 cache->cacheline_size); in kfd_cache_show()
362 cache->cachelines_per_tag); in kfd_cache_show()
363 sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); in kfd_cache_show()
364 sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); in kfd_cache_show()
365 sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); in kfd_cache_show()
367 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); in kfd_cache_show()
368 for (i = 0; i < cache->sibling_map_size; i++) in kfd_cache_show()
369 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) in kfd_cache_show()
371 offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", in kfd_cache_show()
372 (cache->sibling_map[i] >> j) & 1); in kfd_cache_show()
375 buffer[offs-1] = '\n'; in kfd_cache_show()
403 if (!attr->data) /* invalid data for PMC */ in perf_show()
406 return sysfs_show_32bit_val(buf, offs, attr->data); in perf_show()
432 if (strcmp(attr->name, "gpu_id") == 0) { in node_show()
435 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
436 return -EPERM; in node_show()
437 return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); in node_show()
440 if (strcmp(attr->name, "name") == 0) { in node_show()
444 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
445 return -EPERM; in node_show()
446 return sysfs_show_str_val(buffer, offs, dev->node_props.name); in node_show()
451 if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) in node_show()
452 return -EPERM; in node_show()
454 dev->node_props.cpu_cores_count); in node_show()
456 dev->gpu ? dev->node_props.simd_count : 0); in node_show()
458 dev->node_props.mem_banks_count); in node_show()
460 dev->node_props.caches_count); in node_show()
462 dev->node_props.io_links_count); in node_show()
464 dev->node_props.p2p_links_count); in node_show()
466 dev->node_props.cpu_core_id_base); in node_show()
468 dev->node_props.simd_id_base); in node_show()
470 dev->node_props.max_waves_per_simd); in node_show()
472 dev->node_props.lds_size_in_kb); in node_show()
474 dev->node_props.gds_size_in_kb); in node_show()
476 dev->node_props.num_gws); in node_show()
478 dev->node_props.wave_front_size); in node_show()
480 dev->gpu ? (dev->node_props.array_count * in node_show()
481 NUM_XCC(dev->gpu->xcc_mask)) : 0); in node_show()
483 dev->node_props.simd_arrays_per_engine); in node_show()
485 dev->node_props.cu_per_simd_array); in node_show()
487 dev->node_props.simd_per_cu); in node_show()
489 dev->node_props.max_slots_scratch_cu); in node_show()
491 dev->node_props.gfx_target_version); in node_show()
493 dev->node_props.vendor_id); in node_show()
495 dev->node_props.device_id); in node_show()
497 dev->node_props.location_id); in node_show()
499 dev->node_props.domain); in node_show()
501 dev->node_props.drm_render_minor); in node_show()
503 dev->node_props.hive_id); in node_show()
505 dev->node_props.num_sdma_engines); in node_show()
507 dev->node_props.num_sdma_xgmi_engines); in node_show()
509 dev->node_props.num_sdma_queues_per_engine); in node_show()
511 dev->node_props.num_cp_queues); in node_show()
513 if (dev->gpu) { in node_show()
515 __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); in node_show()
518 dev->node_props.capability |= in node_show()
521 dev->node_props.capability |= in node_show()
527 if (dev->gpu->adev->asic_type == CHIP_TONGA) in node_show()
528 dev->node_props.capability |= in node_show()
532 dev->node_props.max_engine_clk_fcompute); in node_show()
537 dev->gpu->kfd->mec_fw_version); in node_show()
539 dev->node_props.capability); in node_show()
541 dev->node_props.debug_prop); in node_show()
543 dev->gpu->kfd->sdma_fw_version); in node_show()
545 dev->gpu->adev->unique_id); in node_show()
547 NUM_XCC(dev->gpu->xcc_mask)); in node_show()
575 struct kfd_mem_properties *mem; in kfd_remove_sysfs_node_entry() local
578 if (dev->kobj_iolink) { in kfd_remove_sysfs_node_entry()
579 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_remove_sysfs_node_entry()
580 if (iolink->kobj) { in kfd_remove_sysfs_node_entry()
581 kfd_remove_sysfs_file(iolink->kobj, in kfd_remove_sysfs_node_entry()
582 &iolink->attr); in kfd_remove_sysfs_node_entry()
583 iolink->kobj = NULL; in kfd_remove_sysfs_node_entry()
585 kobject_del(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
586 kobject_put(dev->kobj_iolink); in kfd_remove_sysfs_node_entry()
587 dev->kobj_iolink = NULL; in kfd_remove_sysfs_node_entry()
590 if (dev->kobj_p2plink) { in kfd_remove_sysfs_node_entry()
591 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_remove_sysfs_node_entry()
592 if (p2plink->kobj) { in kfd_remove_sysfs_node_entry()
593 kfd_remove_sysfs_file(p2plink->kobj, in kfd_remove_sysfs_node_entry()
594 &p2plink->attr); in kfd_remove_sysfs_node_entry()
595 p2plink->kobj = NULL; in kfd_remove_sysfs_node_entry()
597 kobject_del(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
598 kobject_put(dev->kobj_p2plink); in kfd_remove_sysfs_node_entry()
599 dev->kobj_p2plink = NULL; in kfd_remove_sysfs_node_entry()
602 if (dev->kobj_cache) { in kfd_remove_sysfs_node_entry()
603 list_for_each_entry(cache, &dev->cache_props, list) in kfd_remove_sysfs_node_entry()
604 if (cache->kobj) { in kfd_remove_sysfs_node_entry()
605 kfd_remove_sysfs_file(cache->kobj, in kfd_remove_sysfs_node_entry()
606 &cache->attr); in kfd_remove_sysfs_node_entry()
607 cache->kobj = NULL; in kfd_remove_sysfs_node_entry()
609 kobject_del(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
610 kobject_put(dev->kobj_cache); in kfd_remove_sysfs_node_entry()
611 dev->kobj_cache = NULL; in kfd_remove_sysfs_node_entry()
614 if (dev->kobj_mem) { in kfd_remove_sysfs_node_entry()
615 list_for_each_entry(mem, &dev->mem_props, list) in kfd_remove_sysfs_node_entry()
616 if (mem->kobj) { in kfd_remove_sysfs_node_entry()
617 kfd_remove_sysfs_file(mem->kobj, &mem->attr); in kfd_remove_sysfs_node_entry()
618 mem->kobj = NULL; in kfd_remove_sysfs_node_entry()
620 kobject_del(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
621 kobject_put(dev->kobj_mem); in kfd_remove_sysfs_node_entry()
622 dev->kobj_mem = NULL; in kfd_remove_sysfs_node_entry()
625 if (dev->kobj_perf) { in kfd_remove_sysfs_node_entry()
626 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_remove_sysfs_node_entry()
627 kfree(perf->attr_group); in kfd_remove_sysfs_node_entry()
628 perf->attr_group = NULL; in kfd_remove_sysfs_node_entry()
630 kobject_del(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
631 kobject_put(dev->kobj_perf); in kfd_remove_sysfs_node_entry()
632 dev->kobj_perf = NULL; in kfd_remove_sysfs_node_entry()
635 if (dev->kobj_node) { in kfd_remove_sysfs_node_entry()
636 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); in kfd_remove_sysfs_node_entry()
637 sysfs_remove_file(dev->kobj_node, &dev->attr_name); in kfd_remove_sysfs_node_entry()
638 sysfs_remove_file(dev->kobj_node, &dev->attr_props); in kfd_remove_sysfs_node_entry()
639 kobject_del(dev->kobj_node); in kfd_remove_sysfs_node_entry()
640 kobject_put(dev->kobj_node); in kfd_remove_sysfs_node_entry()
641 dev->kobj_node = NULL; in kfd_remove_sysfs_node_entry()
651 struct kfd_mem_properties *mem; in kfd_build_sysfs_node_entry() local
657 if (WARN_ON(dev->kobj_node)) in kfd_build_sysfs_node_entry()
658 return -EEXIST; in kfd_build_sysfs_node_entry()
663 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); in kfd_build_sysfs_node_entry()
664 if (!dev->kobj_node) in kfd_build_sysfs_node_entry()
665 return -ENOMEM; in kfd_build_sysfs_node_entry()
667 ret = kobject_init_and_add(dev->kobj_node, &node_type, in kfd_build_sysfs_node_entry()
670 kobject_put(dev->kobj_node); in kfd_build_sysfs_node_entry()
674 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); in kfd_build_sysfs_node_entry()
675 if (!dev->kobj_mem) in kfd_build_sysfs_node_entry()
676 return -ENOMEM; in kfd_build_sysfs_node_entry()
678 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); in kfd_build_sysfs_node_entry()
679 if (!dev->kobj_cache) in kfd_build_sysfs_node_entry()
680 return -ENOMEM; in kfd_build_sysfs_node_entry()
682 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
683 if (!dev->kobj_iolink) in kfd_build_sysfs_node_entry()
684 return -ENOMEM; in kfd_build_sysfs_node_entry()
686 dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); in kfd_build_sysfs_node_entry()
687 if (!dev->kobj_p2plink) in kfd_build_sysfs_node_entry()
688 return -ENOMEM; in kfd_build_sysfs_node_entry()
690 dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); in kfd_build_sysfs_node_entry()
691 if (!dev->kobj_perf) in kfd_build_sysfs_node_entry()
692 return -ENOMEM; in kfd_build_sysfs_node_entry()
697 dev->attr_gpuid.name = "gpu_id"; in kfd_build_sysfs_node_entry()
698 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
699 sysfs_attr_init(&dev->attr_gpuid); in kfd_build_sysfs_node_entry()
700 dev->attr_name.name = "name"; in kfd_build_sysfs_node_entry()
701 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
702 sysfs_attr_init(&dev->attr_name); in kfd_build_sysfs_node_entry()
703 dev->attr_props.name = "properties"; in kfd_build_sysfs_node_entry()
704 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
705 sysfs_attr_init(&dev->attr_props); in kfd_build_sysfs_node_entry()
706 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); in kfd_build_sysfs_node_entry()
709 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); in kfd_build_sysfs_node_entry()
712 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); in kfd_build_sysfs_node_entry()
717 list_for_each_entry(mem, &dev->mem_props, list) { in kfd_build_sysfs_node_entry()
718 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
719 if (!mem->kobj) in kfd_build_sysfs_node_entry()
720 return -ENOMEM; in kfd_build_sysfs_node_entry()
721 ret = kobject_init_and_add(mem->kobj, &mem_type, in kfd_build_sysfs_node_entry()
722 dev->kobj_mem, "%d", i); in kfd_build_sysfs_node_entry()
724 kobject_put(mem->kobj); in kfd_build_sysfs_node_entry()
728 mem->attr.name = "properties"; in kfd_build_sysfs_node_entry()
729 mem->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
730 sysfs_attr_init(&mem->attr); in kfd_build_sysfs_node_entry()
731 ret = sysfs_create_file(mem->kobj, &mem->attr); in kfd_build_sysfs_node_entry()
738 list_for_each_entry(cache, &dev->cache_props, list) { in kfd_build_sysfs_node_entry()
739 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
740 if (!cache->kobj) in kfd_build_sysfs_node_entry()
741 return -ENOMEM; in kfd_build_sysfs_node_entry()
742 ret = kobject_init_and_add(cache->kobj, &cache_type, in kfd_build_sysfs_node_entry()
743 dev->kobj_cache, "%d", i); in kfd_build_sysfs_node_entry()
745 kobject_put(cache->kobj); in kfd_build_sysfs_node_entry()
749 cache->attr.name = "properties"; in kfd_build_sysfs_node_entry()
750 cache->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
751 sysfs_attr_init(&cache->attr); in kfd_build_sysfs_node_entry()
752 ret = sysfs_create_file(cache->kobj, &cache->attr); in kfd_build_sysfs_node_entry()
759 list_for_each_entry(iolink, &dev->io_link_props, list) { in kfd_build_sysfs_node_entry()
760 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
761 if (!iolink->kobj) in kfd_build_sysfs_node_entry()
762 return -ENOMEM; in kfd_build_sysfs_node_entry()
763 ret = kobject_init_and_add(iolink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
764 dev->kobj_iolink, "%d", i); in kfd_build_sysfs_node_entry()
766 kobject_put(iolink->kobj); in kfd_build_sysfs_node_entry()
770 iolink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
771 iolink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
772 sysfs_attr_init(&iolink->attr); in kfd_build_sysfs_node_entry()
773 ret = sysfs_create_file(iolink->kobj, &iolink->attr); in kfd_build_sysfs_node_entry()
780 list_for_each_entry(p2plink, &dev->p2p_link_props, list) { in kfd_build_sysfs_node_entry()
781 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_sysfs_node_entry()
782 if (!p2plink->kobj) in kfd_build_sysfs_node_entry()
783 return -ENOMEM; in kfd_build_sysfs_node_entry()
784 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_sysfs_node_entry()
785 dev->kobj_p2plink, "%d", i); in kfd_build_sysfs_node_entry()
787 kobject_put(p2plink->kobj); in kfd_build_sysfs_node_entry()
791 p2plink->attr.name = "properties"; in kfd_build_sysfs_node_entry()
792 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_sysfs_node_entry()
793 sysfs_attr_init(&p2plink->attr); in kfd_build_sysfs_node_entry()
794 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_sysfs_node_entry()
802 list_for_each_entry(perf, &dev->perf_props, list) { in kfd_build_sysfs_node_entry()
803 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) in kfd_build_sysfs_node_entry()
806 if (!perf->attr_group) in kfd_build_sysfs_node_entry()
807 return -ENOMEM; in kfd_build_sysfs_node_entry()
809 attrs = (struct attribute **)(perf->attr_group + 1); in kfd_build_sysfs_node_entry()
810 if (!strcmp(perf->block_name, "iommu")) { in kfd_build_sysfs_node_entry()
815 perf_attr_iommu[0].data = perf->max_concurrent; in kfd_build_sysfs_node_entry()
819 perf->attr_group->name = perf->block_name; in kfd_build_sysfs_node_entry()
820 perf->attr_group->attrs = attrs; in kfd_build_sysfs_node_entry()
821 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); in kfd_build_sysfs_node_entry()
863 return -ENOMEM; in kfd_topology_update_sysfs()
866 &sysprops_type, &kfd_device->kobj, in kfd_topology_update_sysfs()
876 return -ENOMEM; in kfd_topology_update_sysfs()
924 list_move_tail(temp_list->next, master_list); in kfd_topology_update_device_list()
938 if (dev->node_props.cpu_cores_count && in kfd_debug_print_topology()
939 dev->node_props.simd_count) { in kfd_debug_print_topology()
941 dev->node_props.device_id, in kfd_debug_print_topology()
942 dev->node_props.vendor_id); in kfd_debug_print_topology()
943 } else if (dev->node_props.cpu_cores_count) in kfd_debug_print_topology()
945 else if (dev->node_props.simd_count) in kfd_debug_print_topology()
947 dev->node_props.device_id, in kfd_debug_print_topology()
948 dev->node_props.vendor_id); in kfd_debug_print_topology()
964 sys_props.platform_id = dev->oem_id64; in kfd_update_system_properties()
965 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); in kfd_update_system_properties()
966 sys_props.platform_rev = dev->oem_revision; in kfd_update_system_properties()
974 struct kfd_mem_properties *mem; in find_system_memory() local
980 if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { in find_system_memory()
983 list_for_each_entry(mem, &kdev->mem_props, list) { in find_system_memory()
985 mem->width = mem_width; in find_system_memory()
987 mem->mem_clk_max = mem_clock; in find_system_memory()
992 /* kfd_add_non_crat_information - Add information that is not currently
994 * @dev - topology device to which addition info is added
999 if (!kdev->gpu) { in kfd_add_non_crat_information()
1016 /* topology_device_list - Master list of all topology devices in kfd_topology_init()
1017 * temp_topology_device_list - temporary list created while parsing CRAT in kfd_topology_init()
1059 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_init()
1108 local_mem_size = gpu->local_mem_info.local_mem_size_private + in kfd_generate_gpu_id()
1109 gpu->local_mem_info.local_mem_size_public; in kfd_generate_gpu_id()
1110 buf[0] = gpu->adev->pdev->devfn; in kfd_generate_gpu_id()
1111 buf[1] = gpu->adev->pdev->subsystem_vendor | in kfd_generate_gpu_id()
1112 (gpu->adev->pdev->subsystem_device << 16); in kfd_generate_gpu_id()
1113 buf[2] = pci_domain_nr(gpu->adev->pdev->bus); in kfd_generate_gpu_id()
1114 buf[3] = gpu->adev->pdev->device; in kfd_generate_gpu_id()
1115 buf[4] = gpu->adev->pdev->bus->number; in kfd_generate_gpu_id()
1118 buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); in kfd_generate_gpu_id()
1121 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1125 * that the value could be 0 or non-unique. So, check if in kfd_generate_gpu_id()
1126 * it is unique and non-zero. If not unique increment till in kfd_generate_gpu_id()
1136 if (dev->gpu && dev->gpu_id == gpu_id) { in kfd_generate_gpu_id()
1143 ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); in kfd_generate_gpu_id()
1149 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1158 struct kfd_mem_properties *mem; in kfd_assign_gpu() local
1167 if (dev->node_props.cpu_cores_count) in kfd_assign_gpu()
1170 if (!dev->gpu && (dev->node_props.simd_count > 0)) { in kfd_assign_gpu()
1171 dev->gpu = gpu; in kfd_assign_gpu()
1174 list_for_each_entry(mem, &dev->mem_props, list) in kfd_assign_gpu()
1175 mem->gpu = dev->gpu; in kfd_assign_gpu()
1176 list_for_each_entry(cache, &dev->cache_props, list) in kfd_assign_gpu()
1177 cache->gpu = dev->gpu; in kfd_assign_gpu()
1178 list_for_each_entry(iolink, &dev->io_link_props, list) in kfd_assign_gpu()
1179 iolink->gpu = dev->gpu; in kfd_assign_gpu()
1180 list_for_each_entry(p2plink, &dev->p2p_link_props, list) in kfd_assign_gpu()
1181 p2plink->gpu = dev->gpu; in kfd_assign_gpu()
1196 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1201 struct kfd_mem_properties *mem; in kfd_fill_mem_clk_max_info() local
1209 * for dGPUs - VCRAT reports only one bank of Local Memory in kfd_fill_mem_clk_max_info()
1210 * for APUs - If CRAT from ACPI reports more than one bank, then in kfd_fill_mem_clk_max_info()
1213 amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, in kfd_fill_mem_clk_max_info()
1214 dev->gpu->xcp); in kfd_fill_mem_clk_max_info()
1216 list_for_each_entry(mem, &dev->mem_props, list) in kfd_fill_mem_clk_max_info()
1217 mem->mem_clk_max = local_mem_info.mem_clk_max; in kfd_fill_mem_clk_max_info()
1225 if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) in kfd_set_iolink_no_atomics()
1232 pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, in kfd_set_iolink_no_atomics()
1237 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1241 if (!dev->gpu->kfd->pci_atomic_requested || in kfd_set_iolink_no_atomics()
1242 dev->gpu->adev->asic_type == CHIP_HAWAII) in kfd_set_iolink_no_atomics()
1243 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | in kfd_set_iolink_no_atomics()
1252 /* CPU -> GPU with PCIe */ in kfd_set_iolink_non_coherent()
1253 if (!to_dev->gpu && in kfd_set_iolink_non_coherent()
1254 inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) in kfd_set_iolink_non_coherent()
1255 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1257 if (to_dev->gpu) { in kfd_set_iolink_non_coherent()
1258 /* GPU <-> GPU with PCIe and in kfd_set_iolink_non_coherent()
1261 if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || in kfd_set_iolink_non_coherent()
1262 (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_iolink_non_coherent()
1263 KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { in kfd_set_iolink_non_coherent()
1264 outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1265 inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; in kfd_set_iolink_non_coherent()
1272 { -1, 14, 12, 2, 4, 8, 10, 6 },
1273 { 14, -1, 2, 10, 8, 4, 6, 12 },
1274 { 10, 2, -1, 12, 14, 6, 4, 8 },
1275 { 2, 12, 10, -1, 6, 14, 8, 4 },
1276 { 4, 8, 14, 6, -1, 10, 12, 2 },
1277 { 8, 4, 6, 14, 12, -1, 2, 10 },
1278 { 10, 6, 4, 8, 12, 2, -1, 14 },
1279 { 6, 12, 8, 4, 2, 10, 14, -1 }};
1285 struct kfd_node *gpu = outbound_link->gpu; in kfd_set_recommended_sdma_engines()
1286 struct amdgpu_device *adev = gpu->adev; in kfd_set_recommended_sdma_engines()
1287 int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; in kfd_set_recommended_sdma_engines()
1288 bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && in kfd_set_recommended_sdma_engines()
1289 adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && in kfd_set_recommended_sdma_engines()
1291 (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); in kfd_set_recommended_sdma_engines()
1294 int src_socket_id = adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1295 int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; in kfd_set_recommended_sdma_engines()
1297 outbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1299 inbound_link->rec_sdma_eng_id_mask = in kfd_set_recommended_sdma_engines()
1305 if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && in kfd_set_recommended_sdma_engines()
1306 kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { in kfd_set_recommended_sdma_engines()
1312 outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); in kfd_set_recommended_sdma_engines()
1313 inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); in kfd_set_recommended_sdma_engines()
1323 if (!dev || !dev->gpu) in kfd_fill_iolink_non_crat_info()
1327 list_for_each_entry(link, &dev->io_link_props, list) { in kfd_fill_iolink_non_crat_info()
1328 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1331 link->node_to); in kfd_fill_iolink_non_crat_info()
1337 if (!peer_dev->gpu && in kfd_fill_iolink_non_crat_info()
1338 link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { in kfd_fill_iolink_non_crat_info()
1343 if (!dev->node_props.hive_id) in kfd_fill_iolink_non_crat_info()
1344 dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); in kfd_fill_iolink_non_crat_info()
1345 peer_dev->node_props.hive_id = dev->node_props.hive_id; in kfd_fill_iolink_non_crat_info()
1348 list_for_each_entry(inbound_link, &peer_dev->io_link_props, in kfd_fill_iolink_non_crat_info()
1350 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1353 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1361 list_for_each_entry(link, &dev->p2p_link_props, list) { in kfd_fill_iolink_non_crat_info()
1362 link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1365 link->node_to); in kfd_fill_iolink_non_crat_info()
1370 list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, in kfd_fill_iolink_non_crat_info()
1372 if (inbound_link->node_to != link->node_from) in kfd_fill_iolink_non_crat_info()
1375 inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; in kfd_fill_iolink_non_crat_info()
1387 p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); in kfd_build_p2p_node_entry()
1388 if (!p2plink->kobj) in kfd_build_p2p_node_entry()
1389 return -ENOMEM; in kfd_build_p2p_node_entry()
1391 ret = kobject_init_and_add(p2plink->kobj, &iolink_type, in kfd_build_p2p_node_entry()
1392 dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); in kfd_build_p2p_node_entry()
1394 kobject_put(p2plink->kobj); in kfd_build_p2p_node_entry()
1398 p2plink->attr.name = "properties"; in kfd_build_p2p_node_entry()
1399 p2plink->attr.mode = KFD_SYSFS_FILE_MODE; in kfd_build_p2p_node_entry()
1400 sysfs_attr_init(&p2plink->attr); in kfd_build_p2p_node_entry()
1401 ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); in kfd_build_p2p_node_entry()
1418 if (cpu_dev->gpu) in kfd_create_indirect_link_prop()
1423 if (list_empty(&kdev->io_link_props)) in kfd_create_indirect_link_prop()
1424 return -ENODATA; in kfd_create_indirect_link_prop()
1426 gpu_link = list_first_entry(&kdev->io_link_props, in kfd_create_indirect_link_prop()
1430 /* CPU <--> GPU */ in kfd_create_indirect_link_prop()
1431 if (gpu_link->node_to == i) in kfd_create_indirect_link_prop()
1434 /* find CPU <--> CPU links */ in kfd_create_indirect_link_prop()
1439 &cpu_dev->io_link_props, list) { in kfd_create_indirect_link_prop()
1440 if (tmp_link->node_to == gpu_link->node_to) { in kfd_create_indirect_link_prop()
1448 return -ENOMEM; in kfd_create_indirect_link_prop()
1450 /* CPU <--> CPU <--> GPU, GPU node*/ in kfd_create_indirect_link_prop()
1453 return -ENOMEM; in kfd_create_indirect_link_prop()
1456 props->weight = gpu_link->weight + cpu_link->weight; in kfd_create_indirect_link_prop()
1457 props->min_latency = gpu_link->min_latency + cpu_link->min_latency; in kfd_create_indirect_link_prop()
1458 props->max_latency = gpu_link->max_latency + cpu_link->max_latency; in kfd_create_indirect_link_prop()
1459 props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); in kfd_create_indirect_link_prop()
1460 props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); in kfd_create_indirect_link_prop()
1462 props->node_from = gpu_node; in kfd_create_indirect_link_prop()
1463 props->node_to = i; in kfd_create_indirect_link_prop()
1464 kdev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1465 list_add_tail(&props->list, &kdev->p2p_link_props); in kfd_create_indirect_link_prop()
1470 /* for small Bar, no CPU --> GPU in-direct links */ in kfd_create_indirect_link_prop()
1471 if (kfd_dev_is_large_bar(kdev->gpu)) { in kfd_create_indirect_link_prop()
1472 /* CPU <--> CPU <--> GPU, CPU node*/ in kfd_create_indirect_link_prop()
1475 return -ENOMEM; in kfd_create_indirect_link_prop()
1478 props2->node_from = i; in kfd_create_indirect_link_prop()
1479 props2->node_to = gpu_node; in kfd_create_indirect_link_prop()
1480 props2->kobj = NULL; in kfd_create_indirect_link_prop()
1481 cpu_dev->node_props.p2p_links_count++; in kfd_create_indirect_link_prop()
1482 list_add_tail(&props2->list, &cpu_dev->p2p_link_props); in kfd_create_indirect_link_prop()
1501 kdev->gpu->adev, in kfd_add_peer_prop()
1502 peer->gpu->adev)) in kfd_add_peer_prop()
1505 if (list_empty(&kdev->io_link_props)) in kfd_add_peer_prop()
1506 return -ENODATA; in kfd_add_peer_prop()
1508 iolink1 = list_first_entry(&kdev->io_link_props, in kfd_add_peer_prop()
1511 if (list_empty(&peer->io_link_props)) in kfd_add_peer_prop()
1512 return -ENODATA; in kfd_add_peer_prop()
1514 iolink2 = list_first_entry(&peer->io_link_props, in kfd_add_peer_prop()
1519 return -ENOMEM; in kfd_add_peer_prop()
1523 props->weight = iolink1->weight + iolink2->weight; in kfd_add_peer_prop()
1524 props->min_latency = iolink1->min_latency + iolink2->min_latency; in kfd_add_peer_prop()
1525 props->max_latency = iolink1->max_latency + iolink2->max_latency; in kfd_add_peer_prop()
1526 props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); in kfd_add_peer_prop()
1527 props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); in kfd_add_peer_prop()
1529 if (iolink1->node_to != iolink2->node_to) { in kfd_add_peer_prop()
1530 /* CPU->CPU link*/ in kfd_add_peer_prop()
1531 cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); in kfd_add_peer_prop()
1533 list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { in kfd_add_peer_prop()
1534 if (iolink3->node_to != iolink2->node_to) in kfd_add_peer_prop()
1537 props->weight += iolink3->weight; in kfd_add_peer_prop()
1538 props->min_latency += iolink3->min_latency; in kfd_add_peer_prop()
1539 props->max_latency += iolink3->max_latency; in kfd_add_peer_prop()
1540 props->min_bandwidth = min(props->min_bandwidth, in kfd_add_peer_prop()
1541 iolink3->min_bandwidth); in kfd_add_peer_prop()
1542 props->max_bandwidth = min(props->max_bandwidth, in kfd_add_peer_prop()
1543 iolink3->max_bandwidth); in kfd_add_peer_prop()
1551 props->node_from = from; in kfd_add_peer_prop()
1552 props->node_to = to; in kfd_add_peer_prop()
1553 peer->node_props.p2p_links_count++; in kfd_add_peer_prop()
1554 list_add_tail(&props->list, &peer->p2p_link_props); in kfd_add_peer_prop()
1578 if (WARN_ON(!new_dev->gpu)) in kfd_dev_create_p2p_links()
1581 k--; in kfd_dev_create_p2p_links()
1583 /* create in-direct links */ in kfd_dev_create_p2p_links()
1594 if (!dev->gpu || !dev->gpu->adev || in kfd_dev_create_p2p_links()
1595 (dev->gpu->kfd->hive_id && in kfd_dev_create_p2p_links()
1596 dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) in kfd_dev_create_p2p_links()
1599 /* check if node(s) is/are peer accessible in one direction or bi-direction */ in kfd_dev_create_p2p_links()
1629 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l1_pcache()
1633 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l1_pcache()
1639 return -ENOMEM; in fill_in_l1_pcache()
1642 pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); in fill_in_l1_pcache()
1643 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l1_pcache()
1644 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l1_pcache()
1645 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l1_pcache()
1648 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l1_pcache()
1650 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l1_pcache()
1652 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l1_pcache()
1654 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l1_pcache()
1660 cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l1_pcache()
1662 pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l1_pcache()
1663 pcache->sibling_map[1] = in fill_in_l1_pcache()
1665 pcache->sibling_map[2] = in fill_in_l1_pcache()
1667 pcache->sibling_map[3] = in fill_in_l1_pcache()
1670 pcache->sibling_map_size = 4; in fill_in_l1_pcache()
1689 int num_xcc = NUM_XCC(knode->xcc_mask); in fill_in_l2_l3_pcache()
1692 struct amdgpu_device *adev = knode->adev; in fill_in_l2_l3_pcache()
1694 start = ffs(knode->xcc_mask) - 1; in fill_in_l2_l3_pcache()
1696 cu_sibling_map_mask = cu_info->bitmap[start][0][0]; in fill_in_l2_l3_pcache()
1698 ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1702 * CU. and incase of non-shared cache check if the CU is inactive. If in fill_in_l2_l3_pcache()
1708 return -ENOMEM; in fill_in_l2_l3_pcache()
1711 pcache->processor_id_low = cu_processor_id in fill_in_l2_l3_pcache()
1712 + (first_active_cu - 1); in fill_in_l2_l3_pcache()
1713 pcache->cache_level = pcache_info[cache_type].cache_level; in fill_in_l2_l3_pcache()
1714 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; in fill_in_l2_l3_pcache()
1719 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); in fill_in_l2_l3_pcache()
1723 pcache->cache_size = pcache_info[cache_type].cache_size; in fill_in_l2_l3_pcache()
1725 if (mode && pcache->cache_level == 3) in fill_in_l2_l3_pcache()
1726 pcache->cache_size /= mode; in fill_in_l2_l3_pcache()
1729 pcache->cache_type |= HSA_CACHE_TYPE_DATA; in fill_in_l2_l3_pcache()
1731 pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; in fill_in_l2_l3_pcache()
1733 pcache->cache_type |= HSA_CACHE_TYPE_CPU; in fill_in_l2_l3_pcache()
1735 pcache->cache_type |= HSA_CACHE_TYPE_HSACU; in fill_in_l2_l3_pcache()
1740 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); in fill_in_l2_l3_pcache()
1744 for (i = 0; i < gfx_info->max_shader_engines; i++) { in fill_in_l2_l3_pcache()
1745 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in fill_in_l2_l3_pcache()
1746 pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); in fill_in_l2_l3_pcache()
1747 pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); in fill_in_l2_l3_pcache()
1748 pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); in fill_in_l2_l3_pcache()
1749 pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); in fill_in_l2_l3_pcache()
1752 cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; in fill_in_l2_l3_pcache()
1753 cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); in fill_in_l2_l3_pcache()
1757 pcache->sibling_map_size = k; in fill_in_l2_l3_pcache()
1766 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
1777 struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; in kfd_fill_cache_non_crat_info()
1778 struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; in kfd_fill_cache_non_crat_info()
1786 gpu_processor_id = dev->node_props.simd_id_base; in kfd_fill_cache_non_crat_info()
1805 start = ffs(kdev->xcc_mask) - 1; in kfd_fill_cache_non_crat_info()
1806 end = start + NUM_XCC(kdev->xcc_mask); in kfd_fill_cache_non_crat_info()
1812 for (i = 0; i < gfx_info->max_shader_engines; i++) { in kfd_fill_cache_non_crat_info()
1813 for (j = 0; j < gfx_info->max_sh_per_se; j++) { in kfd_fill_cache_non_crat_info()
1814 for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { in kfd_fill_cache_non_crat_info()
1817 cu_info->bitmap[xcc][i % 4][j + i / 4], ct, in kfd_fill_cache_non_crat_info()
1825 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1830 gfx_info->max_cu_per_sh) ? in kfd_fill_cache_non_crat_info()
1832 (gfx_info->max_cu_per_sh - k); in kfd_fill_cache_non_crat_info()
1847 list_add_tail(&props_ext->list, &dev->cache_props); in kfd_fill_cache_non_crat_info()
1851 dev->node_props.caches_count += num_of_entries; in kfd_fill_cache_non_crat_info()
1868 dev_err(gpu->adev->dev, "Error creating VCRAT\n"); in kfd_topology_add_device_locked()
1869 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1879 dev_err(gpu->adev->dev, "Error parsing VCRAT\n"); in kfd_topology_add_device_locked()
1880 topology_crat_proximity_domain--; in kfd_topology_add_device_locked()
1889 res = -ENODEV; in kfd_topology_add_device_locked()
1905 dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n", in kfd_topology_add_device_locked()
1917 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && in kfd_topology_set_dbg_firmware_support()
1918 KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { in kfd_topology_set_dbg_firmware_support()
1919 uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1922 uint32_t mes_rev = dev->gpu->adev->mes.sched_version & in kfd_topology_set_dbg_firmware_support()
1933 switch (KFD_GC_VERSION(dev->gpu)) { in kfd_topology_set_dbg_firmware_support()
1935 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; in kfd_topology_set_dbg_firmware_support()
1942 firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; in kfd_topology_set_dbg_firmware_support()
1945 firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; in kfd_topology_set_dbg_firmware_support()
1948 firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; in kfd_topology_set_dbg_firmware_support()
1953 firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; in kfd_topology_set_dbg_firmware_support()
1960 firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; in kfd_topology_set_dbg_firmware_support()
1972 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; in kfd_topology_set_dbg_firmware_support()
1977 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << in kfd_topology_set_capabilities()
1981 dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | in kfd_topology_set_capabilities()
1985 if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) in kfd_topology_set_capabilities()
1986 dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; in kfd_topology_set_capabilities()
1988 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { in kfd_topology_set_capabilities()
1989 if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || in kfd_topology_set_capabilities()
1990 KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) in kfd_topology_set_capabilities()
1991 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1995 dev->node_props.debug_prop |= in kfd_topology_set_capabilities()
1999 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) in kfd_topology_set_capabilities()
2000 dev->node_props.capability |= in kfd_topology_set_capabilities()
2003 dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; in kfd_topology_set_capabilities()
2005 dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | in kfd_topology_set_capabilities()
2008 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) in kfd_topology_set_capabilities()
2009 dev->node_props.capability |= in kfd_topology_set_capabilities()
2012 if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) in kfd_topology_set_capabilities()
2013 dev->node_props.capability |= in kfd_topology_set_capabilities()
2026 const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; in kfd_topology_add_device()
2027 struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; in kfd_topology_add_device()
2028 struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; in kfd_topology_add_device()
2030 if (gpu->xcp && !gpu->xcp->ddev) { in kfd_topology_add_device()
2031 dev_warn(gpu->adev->dev, in kfd_topology_add_device()
2035 dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n"); in kfd_topology_add_device()
2053 dev->gpu_id = gpu_id; in kfd_topology_add_device()
2054 gpu->id = gpu_id; in kfd_topology_add_device()
2062 /* Fill-in additional information that is not available in CRAT but in kfd_topology_add_device()
2065 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { in kfd_topology_add_device()
2066 dev->node_props.name[i] = __tolower(asic_name[i]); in kfd_topology_add_device()
2070 dev->node_props.name[i] = '\0'; in kfd_topology_add_device()
2072 dev->node_props.simd_arrays_per_engine = in kfd_topology_add_device()
2073 gfx_info->max_sh_per_se; in kfd_topology_add_device()
2075 dev->node_props.gfx_target_version = in kfd_topology_add_device()
2076 gpu->kfd->device_info.gfx_target_version; in kfd_topology_add_device()
2077 dev->node_props.vendor_id = gpu->adev->pdev->vendor; in kfd_topology_add_device()
2078 dev->node_props.device_id = gpu->adev->pdev->device; in kfd_topology_add_device()
2079 dev->node_props.capability |= in kfd_topology_add_device()
2080 ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & in kfd_topology_add_device()
2083 dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); in kfd_topology_add_device()
2084 if (gpu->kfd->num_nodes > 1) in kfd_topology_add_device()
2085 dev->node_props.location_id |= dev->gpu->node_id; in kfd_topology_add_device()
2087 dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); in kfd_topology_add_device()
2088 dev->node_props.max_engine_clk_fcompute = in kfd_topology_add_device()
2089 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); in kfd_topology_add_device()
2090 dev->node_props.max_engine_clk_ccompute = in kfd_topology_add_device()
2093 if (gpu->xcp) in kfd_topology_add_device()
2094 dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; in kfd_topology_add_device()
2096 dev->node_props.drm_render_minor = in kfd_topology_add_device()
2097 gpu->kfd->shared_resources.drm_render_minor; in kfd_topology_add_device()
2099 dev->node_props.hive_id = gpu->kfd->hive_id; in kfd_topology_add_device()
2100 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); in kfd_topology_add_device()
2101 dev->node_props.num_sdma_xgmi_engines = in kfd_topology_add_device()
2103 dev->node_props.num_sdma_queues_per_engine = in kfd_topology_add_device()
2104 gpu->kfd->device_info.num_sdma_queues_per_engine - in kfd_topology_add_device()
2105 gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; in kfd_topology_add_device()
2106 dev->node_props.num_gws = (dev->gpu->gws && in kfd_topology_add_device()
2107 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? in kfd_topology_add_device()
2108 dev->gpu->adev->gds.gws_size : 0; in kfd_topology_add_device()
2109 dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); in kfd_topology_add_device()
2114 switch (dev->gpu->adev->asic_type) { in kfd_topology_add_device()
2118 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << in kfd_topology_add_device()
2129 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << in kfd_topology_add_device()
2134 if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2136 dev->gpu->adev->asic_type); in kfd_topology_add_device()
2145 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; in kfd_topology_add_device()
2152 if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { in kfd_topology_add_device()
2153 dev->node_props.simd_count = in kfd_topology_add_device()
2154 cu_info->simd_per_cu * cu_info->number; in kfd_topology_add_device()
2155 dev->node_props.max_waves_per_simd = 10; in kfd_topology_add_device()
2158 /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ in kfd_topology_add_device()
2159 dev->node_props.capability |= in kfd_topology_add_device()
2160 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? in kfd_topology_add_device()
2162 dev->node_props.capability |= in kfd_topology_add_device()
2163 ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? in kfd_topology_add_device()
2166 if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) in kfd_topology_add_device()
2167 dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? in kfd_topology_add_device()
2170 if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) in kfd_topology_add_device()
2171 dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; in kfd_topology_add_device()
2173 if (dev->gpu->adev->gmc.is_app_apu || in kfd_topology_add_device()
2174 dev->gpu->adev->gmc.xgmi.connected_to_cpu) in kfd_topology_add_device()
2175 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; in kfd_topology_add_device()
2187 * kfd_topology_update_io_links() - Update IO links after device removal.
2211 if (dev->proximity_domain > proximity_domain) in kfd_topology_update_io_links()
2212 dev->proximity_domain--; in kfd_topology_update_io_links()
2214 list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { in kfd_topology_update_io_links()
2219 if (iolink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2220 list_del(&iolink->list); in kfd_topology_update_io_links()
2221 dev->node_props.io_links_count--; in kfd_topology_update_io_links()
2223 if (iolink->node_from > proximity_domain) in kfd_topology_update_io_links()
2224 iolink->node_from--; in kfd_topology_update_io_links()
2225 if (iolink->node_to > proximity_domain) in kfd_topology_update_io_links()
2226 iolink->node_to--; in kfd_topology_update_io_links()
2230 list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { in kfd_topology_update_io_links()
2235 if (p2plink->node_to == proximity_domain) { in kfd_topology_update_io_links()
2236 list_del(&p2plink->list); in kfd_topology_update_io_links()
2237 dev->node_props.p2p_links_count--; in kfd_topology_update_io_links()
2239 if (p2plink->node_from > proximity_domain) in kfd_topology_update_io_links()
2240 p2plink->node_from--; in kfd_topology_update_io_links()
2241 if (p2plink->node_to > proximity_domain) in kfd_topology_update_io_links()
2242 p2plink->node_to--; in kfd_topology_update_io_links()
2252 int res = -ENODEV; in kfd_topology_remove_device()
2258 if (dev->gpu == gpu) { in kfd_topology_remove_device()
2259 gpu_id = dev->gpu_id; in kfd_topology_remove_device()
2262 sys_props.num_devices--; in kfd_topology_remove_device()
2264 topology_crat_proximity_domain = sys_props.num_devices-1; in kfd_topology_remove_device()
2282 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
2285 * Return - 0: On success (@kdev will be NULL for non GPU nodes)
2286 * -1: If end of list
2299 *kdev = top_dev->gpu; in kfd_topology_enum_kfd_devices()
2309 return -1; in kfd_topology_enum_kfd_devices()
2318 return -1; in kfd_cpumask_to_apic_id()
2321 return -1; in kfd_cpumask_to_apic_id()
2329 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
2331 * Return -1 on failure
2335 if (numa_node_id == -1) { in kfd_numa_node_to_apic_id()
2353 if (!dev->gpu) { in kfd_debugfs_hqds_by_device()
2358 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_hqds_by_device()
2359 r = dqm_debugfs_hqds(m, dev->gpu->dqm); in kfd_debugfs_hqds_by_device()
2378 if (!dev->gpu) { in kfd_debugfs_rls_by_device()
2383 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); in kfd_debugfs_rls_by_device()
2384 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); in kfd_debugfs_rls_by_device()