1# Copyright © 2024 Intel Corporation 2 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22from textwrap import dedent 23 24# TYPES is an ordered list of all declarations in this file. 25TYPES = [] 26 27# TYPES_BY_NAME allows the lookup of any declaration 28TYPES_BY_NAME = {} 29 30class Define: 31 """Specifies a c macro definition.""" 32 def __init__(self, name, value, comment=None): 33 self.name = name 34 self.value = value 35 self.comment = comment 36 TYPES.append(self) 37 38class EnumValue: 39 """allows comments and setting of enum values""" 40 def __init__(self, name, value=None, comment=None, 41 group_begin=None, group_end=None): 42 self.name = name 43 self.value = value 44 self.comment = comment 45 self.group_begin = group_begin 46 self.group_end = group_end 47 48 def __str__(self): 49 return self.name 50 51class Enum: 52 """Stores details needed to declare and serialize an enumeration""" 53 def __init__(self, name, values, external=False): 54 self.name = name 55 self.values = [] 56 for v in values: 57 if isinstance(v, EnumValue): 58 self.values.append(v) 59 else: 60 self.values.append(EnumValue(v)) 61 62 self.external = external 63 TYPES.append(self) 64 TYPES_BY_NAME[name] = TYPES[-1] 65 66class Member: 67 """Stores details needed to declare and serialize the member of a struct.""" 68 def __init__(self, member_type, name, array=None, 69 compiler_field=False, ray_tracing_field=False, 70 comment=None): 71 self.member_type = member_type 72 self.name = name 73 self.array = array 74 # indicates whether this field is used by the compiler, and whether it 75 # should be included in the shader compiler cache hash function. 76 self.compiler_field = compiler_field 77 self.ray_tracing_field = ray_tracing_field 78 self.comment=comment 79 80class Struct: 81 """Stores details needed to declare and serialize a struct""" 82 def __init__(self, name, members): 83 self.name = name 84 self.members = members 85 TYPES.append(self) 86 TYPES_BY_NAME[name] = TYPES[-1] 87 88INT_TYPES = set(["uint8_t", 89 "uint16_t", 90 "uint32_t", 91 "uint64_t", 92 "unsigned", 93 "int"]) 94 95FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES) 96 97Define("INTEL_DEVICE_MAX_NAME_SIZE", 64) 98Define("INTEL_DEVICE_MAX_SLICES", 8) 99Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11") 100Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11") 101Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2") 102 103Enum("intel_platform", 104 [EnumValue("INTEL_PLATFORM_GFX3", value=1), 105 "INTEL_PLATFORM_I965", 106 "INTEL_PLATFORM_ILK", 107 "INTEL_PLATFORM_G4X", 108 "INTEL_PLATFORM_SNB", 109 "INTEL_PLATFORM_IVB", 110 "INTEL_PLATFORM_BYT", 111 "INTEL_PLATFORM_HSW", 112 "INTEL_PLATFORM_BDW", 113 "INTEL_PLATFORM_CHV", 114 "INTEL_PLATFORM_SKL", 115 "INTEL_PLATFORM_BXT", 116 "INTEL_PLATFORM_KBL", 117 "INTEL_PLATFORM_GLK", 118 "INTEL_PLATFORM_CFL", 119 "INTEL_PLATFORM_ICL", 120 "INTEL_PLATFORM_EHL", 121 "INTEL_PLATFORM_TGL", 122 "INTEL_PLATFORM_RKL", 123 "INTEL_PLATFORM_DG1", 124 "INTEL_PLATFORM_ADL", 125 "INTEL_PLATFORM_RPL", 126 EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"), 127 "INTEL_PLATFORM_DG2_G11", 128 EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"), 129 EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"), 130 EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"), 131 EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"), 132 EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"), 133 EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"), 134 EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"), 135 "INTEL_PLATFORM_LNL", 136 "INTEL_PLATFORM_BMG", 137 ]) 138 139Struct("intel_memory_class_instance", 140 [ Member("int", "klass", 141 comment = "Kernel backend specific class value, no translation needed yet"), 142 Member("int", "instance")]) 143 144Enum("intel_device_info_mmap_mode", 145 [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0), 146 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"), 147 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"), 148 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD", 149 comment=dedent("""\ 150 Xe2+ only. Only supported in GPU side and used for displayable 151 buffers.""")) 152 ]) 153 154Struct("intel_device_info_pat_entry", 155 [Member("uint8_t", "index"), 156 Member("intel_device_info_mmap_mode", "mmap", 157 comment=dedent("""\ 158 This tells KMD what caching mode the CPU mapping should use. 159 It has nothing to do with any PAT cache modes."""))]) 160 161Enum("intel_cmat_scope", 162 [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0), 163 "INTEL_CMAT_SCOPE_SUBGROUP"]) 164 165Enum("intel_cooperative_matrix_component_type", 166 ["INTEL_CMAT_FLOAT16", 167 "INTEL_CMAT_FLOAT32", 168 "INTEL_CMAT_SINT32", 169 "INTEL_CMAT_SINT8", 170 "INTEL_CMAT_UINT32", 171 "INTEL_CMAT_UINT8"]) 172 173Enum("intel_engine_class", 174 ["INTEL_ENGINE_CLASS_RENDER", 175 "INTEL_ENGINE_CLASS_COPY", 176 "INTEL_ENGINE_CLASS_VIDEO", 177 "INTEL_ENGINE_CLASS_VIDEO_ENHANCE", 178 "INTEL_ENGINE_CLASS_COMPUTE", 179 "INTEL_ENGINE_CLASS_INVALID"]) 180 181Struct("intel_cooperative_matrix_configuration", 182 [Member("intel_cmat_scope", "scope", 183 comment=dedent("""\ 184 Matrix A is MxK. 185 Matrix B is KxN. 186 Matrix C and Matrix Result are MxN. 187 188 Result = A * B + C;""")), 189 Member("uint8_t", "m"), 190 Member("uint8_t", "n"), 191 Member("uint8_t", "k"), 192 Member("intel_cooperative_matrix_component_type", "a"), 193 Member("intel_cooperative_matrix_component_type", "b"), 194 Member("intel_cooperative_matrix_component_type", "c"), 195 Member("intel_cooperative_matrix_component_type", "result")]) 196 197Enum("intel_kmd_type", 198 ["INTEL_KMD_TYPE_INVALID", 199 "INTEL_KMD_TYPE_I915", 200 "INTEL_KMD_TYPE_XE", 201 "INTEL_KMD_TYPE_STUB", 202 "INTEL_KMD_TYPE_LAST" 203 ], external=True) 204 205Struct("intel_device_info_mem_region", 206 [Member("uint64_t", "size"), 207 Member("uint64_t", "free")]) 208 209Struct("intel_device_info_ram_desc", 210 [Member("intel_memory_class_instance", "mem"), 211 Member("intel_device_info_mem_region", "mappable"), 212 Member("intel_device_info_mem_region", "unmappable")]) 213 214Struct("intel_device_info_mem_desc", 215 [Member("bool", "use_class_instance"), 216 Member("intel_device_info_ram_desc", "sram"), 217 Member("intel_device_info_ram_desc", "vram")]) 218 219Struct("intel_device_info_urb_desc", 220 [Member("int", "size"), 221 Member("int", "min_entries", array=4), 222 Member("int", "max_entries", array=4)]) 223 224Struct("intel_device_info_pat_desc", 225 [Member("intel_device_info_pat_entry", "cached_coherent", 226 comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"), 227 228 Member("intel_device_info_pat_entry", "scanout", 229 comment="scanout and external BOs"), 230 231 Member("intel_device_info_pat_entry", "compressed", 232 comment="Only supported in Xe2, compressed + WC"), 233 234 Member("intel_device_info_pat_entry", "writeback_incoherent", 235 comment=("BOs without special needs, can be WB not coherent " 236 "or WC it depends on the platforms and KMD")), 237 238 Member("intel_device_info_pat_entry", "writecombining")]) 239 240Struct("intel_device_info", 241 [Member("intel_kmd_type", "kmd_type"), 242 243 Member("int", "ver", compiler_field=True, 244 comment="Driver internal numbers used to differentiate platforms."), 245 246 Member("int", "verx10", compiler_field=True), 247 248 Member("uint32_t", "gfx_ip_ver", compiler_field=True, 249 comment=dedent("""\ 250 This is the run-time hardware GFX IP version that may be more specific 251 than ver/verx10. ver/verx10 may be more useful for comparing a class 252 of devices whereas gfx_ip_ver may be more useful for precisely 253 checking for a graphics ip type. GFX_IP_VER(major, minor) should be 254 used to compare IP versions.""")), 255 256 Member("int", "revision", 257 comment=dedent("""\ 258 This revision is queried from KMD unlike 259 pci_revision_id from drm device. Its value is not always 260 same as the pci_revision_id. 261 For LNL+ this is the stepping of GT IP/GMD RevId.""")), 262 263 Member("int", "gt"), 264 Member("uint16_t", "pci_domain", comment="PCI info"), 265 Member("uint8_t", "pci_bus"), 266 Member("uint8_t", "pci_dev"), 267 Member("uint8_t", "pci_func"), 268 Member("uint16_t", "pci_device_id"), 269 Member("uint8_t", "pci_revision_id"), 270 Member("intel_platform", "platform", compiler_field=True), 271 Member("bool", "has_hiz_and_separate_stencil"), 272 Member("bool", "must_use_separate_stencil"), 273 Member("bool", "has_sample_with_hiz"), 274 Member("bool", "has_bit6_swizzle"), 275 Member("bool", "has_llc"), 276 Member("bool", "has_pln", compiler_field=True), 277 Member("bool", "has_64bit_float", compiler_field=True), 278 Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True), 279 Member("bool", "has_64bit_int", compiler_field=True), 280 Member("bool", "has_integer_dword_mul", compiler_field=True), 281 Member("bool", "has_compr4", compiler_field=True), 282 Member("bool", "has_surface_tile_offset"), 283 Member("bool", "supports_simd16_3src", compiler_field=True), 284 Member("bool", "disable_ccs_repack"), 285 286 Member("bool", "has_illegal_ccs_values", 287 comment="True if CCS needs to be initialized before use."), 288 289 Member("bool", "has_flat_ccs", 290 comment=dedent("""\ 291 True if CCS uses a flat virtual address translation to a memory 292 carve-out, rather than aux map translations, or additional surfaces.""")), 293 294 Member("bool", "has_aux_map"), 295 Member("bool", "has_caching_uapi"), 296 Member("bool", "has_tiling_uapi"), 297 Member("bool", "has_ray_tracing", compiler_field=True), 298 Member("bool", "has_ray_query"), 299 Member("bool", "has_local_mem"), 300 Member("bool", "has_lsc", compiler_field=True), 301 Member("bool", "has_mesh_shading"), 302 Member("bool", "has_mmap_offset"), 303 Member("bool", "has_userptr_probe"), 304 Member("bool", "has_context_isolation"), 305 Member("bool", "has_set_pat_uapi"), 306 Member("bool", "has_indirect_unroll"), 307 Member("bool", "has_negative_rhw_bug", compiler_field=True, 308 comment="Intel hardware quirks"), 309 310 Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True, 311 comment=dedent("""\ 312 Whether this platform supports fragment shading rate controlled by a 313 primitive in geometry shaders and by a control buffer.""")), 314 315 Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"), 316 317 Member("bool", "needs_unlit_centroid_workaround", compiler_field=True, 318 comment=dedent("""\ 319 Some versions of Gen hardware don't do centroid interpolation correctly 320 on unlit pixels, causing incorrect values for derivatives near triangle 321 edges. Enabling this flag causes the fragment shader to use 322 non-centroid interpolation for unlit pixels, at the expense of two extra 323 fragment shader instructions.""")), 324 325 Member("bool", "needs_null_push_constant_tbimr_workaround", 326 comment=dedent("""\ 327 Whether the platform needs an undocumented workaround for a hardware bug 328 that affects draw calls with a pixel shader that has 0 push constant cycles 329 when TBIMR is enabled, which has been seen to lead to hangs. To avoid the 330 issue we simply pad the push constant payload to be at least 1 register.""")), 331 332 Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."), 333 334 Member("unsigned", "num_slices", 335 comment=dedent("""\ 336 GPU hardware limits 337 338 In general, you can find shader thread maximums by looking at the "Maximum 339 Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, 340 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry 341 limits come from the "Number of URB Entries" field in the 342 3DSTATE_URB_VS command and friends. 343 344 These fields are used to calculate the scratch space to allocate. The 345 amount of scratch space can be larger without being harmful on modern 346 GPUs, however, prior to Haswell, programming the maximum number of threads 347 to greater than the hardware maximum would cause GPU performance to tank. 348 349 Total number of slices present on the device whether or not they've been 350 fused off. 351 352 XXX: CS thread counts are limited by the inability to do cross subslice 353 communication. It is the effectively the number of logical threads which 354 can be executed in a subslice. Fuse configurations may cause this number 355 to change, so we program @max_cs_threads as the lower maximum.""")), 356 357 Member("unsigned", "max_slices", compiler_field=True, 358 comment=dedent("""\ 359 Maximum number of slices present on this device (can be more than 360 num_slices if some slices are fused).""")), 361 362 Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES", 363 comment="Number of subslices for each slice (used to be uniform until CNL)."), 364 365 Member("unsigned", "max_subslices_per_slice", compiler_field=True, 366 comment=dedent("""\ 367 Maximum number of subslices per slice present on this device (can be 368 more than the maximum value in the num_subslices[] array if some 369 subslices are fused). 370 371 This is GT_SS_PER_SLICE in SKU.""")), 372 373 Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES", 374 comment="Number of subslices on each pixel pipe (ICL)."), 375 376 Member("unsigned", "max_eus_per_subslice", compiler_field=True, 377 comment="Maximum number of EUs per subslice (some EUs can be fused off)."), 378 379 Member("unsigned", "num_thread_per_eu", compiler_field=True, 380 comment="Number of threads per eu, varies between 4 and 8 between generations."), 381 382 Member("uint8_t", "grf_size", 383 comment="Size of a register from the EU GRF file in bytes."), 384 385 Member("uint8_t", "slice_masks", 386 comment="A bit mask of the slices available."), 387 388 Member("uint8_t", "subslice_masks", 389 array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)", 390 compiler_field=True, 391 ray_tracing_field=True, 392 comment=dedent("""\ 393 An array of bit mask of the subslices available, use subslice_slice_stride 394 to access this array.""")), 395 396 Member("unsigned", "subslice_total", 397 comment=dedent("""\ 398 The number of enabled subslices (considering fusing). For exactly which 399 subslices are enabled, see subslice_masks[].""")), 400 401 Member("uint8_t", "eu_masks", 402 array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * " 403 "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"), 404 comment=dedent("""\ 405 An array of bit mask of EUs available, use eu_slice_stride & 406 eu_subslice_stride to access this array.""")), 407 408 Member("uint16_t", "subslice_slice_stride", compiler_field=True, 409 comment="Stride to access subslice_masks[]."), 410 411 Member("uint16_t", "eu_slice_stride", 412 comment="Strides to access eu_masks[]."), 413 414 Member("uint16_t", "eu_subslice_stride"), 415 Member("unsigned", "l3_banks"), 416 417 Member("unsigned", "max_vs_threads", 418 comment="Maximum Vertex Shader threads"), 419 420 Member("unsigned", "max_tcs_threads", 421 comment="Maximum Hull Shader threads"), 422 423 Member("unsigned", "max_tes_threads", 424 comment="Maximum Domain Shader threads"), 425 426 Member("unsigned", "max_gs_threads", 427 comment="Maximum Geometry Shader threads"), 428 429 Member("unsigned", "max_wm_threads", 430 comment=dedent("""\ 431 Theoretical maximum number of Pixel Shader threads. 432 433 PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will 434 automatically scale pixel shader thread count, based on a single value 435 programmed into 3DSTATE_PS. 436 437 To calculate the maximum number of threads for Gfx8 beyond (which have 438 multiple Pixel Shader Dispatchers): 439 440 - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" 441 - Usually there's only one PSD per subslice, so use the number of 442 subslices for number of PSDs. 443 - For max_wm_threads, the total should be PSD threads * #PSDs.""")), 444 445 Member("unsigned", "max_threads_per_psd"), 446 447 Member("unsigned", "max_cs_threads", 448 comment=dedent("""\ 449 Maximum Compute Shader threads. 450 451 Thread count * number of EUs per subslice""")), 452 453 Member("unsigned", "max_cs_workgroup_threads", compiler_field=True, 454 comment=dedent("""\ 455 Maximum number of threads per workgroup supported by the GPGPU_WALKER or 456 COMPUTE_WALKER command. 457 458 This may be smaller than max_cs_threads as it takes into account added 459 restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads 460 expresses the total parallelism of the GPU, this expresses the maximum 461 number of threads we can dispatch in a single workgroup.""")), 462 463 464 Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True, 465 comment=dedent("""\ 466 The maximum number of potential scratch ids. Due to hardware 467 implementation details, the range of scratch ids may be larger than the 468 number of subslices.""")), 469 470 Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True), 471 472 Member("intel_device_info_urb_desc", "urb"), 473 Member("unsigned", "max_constant_urb_size_kb"), 474 Member("unsigned", "mesh_max_constant_urb_size_kb"), 475 Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"), 476 Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"), 477 Member("unsigned", "mem_alignment"), 478 Member("uint64_t", "timestamp_frequency"), 479 Member("uint64_t", "aperture_bytes"), 480 Member("uint64_t", "gtt_size"), 481 Member("int", "simulator_id"), 482 Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"), 483 Member("bool", "no_hw"), 484 Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"), 485 Member("intel_device_info_mem_desc", "mem"), 486 Member("intel_device_info_pat_desc", "pat"), 487 Member("intel_cooperative_matrix_configuration", 488 "cooperative_matrix_configurations", array=4)] 489 ) 490