xref: /aosp_15_r20/external/mesa3d/src/intel/dev/intel_device_info.py (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1# Copyright © 2024 Intel Corporation
2
3# Permission is hereby granted, free of charge, to any person obtaining a
4# copy of this software and associated documentation files (the "Software"),
5# to deal in the Software without restriction, including without limitation
6# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7# and/or sell copies of the Software, and to permit persons to whom the
8# Software is furnished to do so, subject to the following conditions:
9
10# The above copyright notice and this permission notice (including the next
11# paragraph) shall be included in all copies or substantial portions of the
12# Software.
13
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21
22from textwrap import dedent
23
24# TYPES is an ordered list of all declarations in this file.
25TYPES = []
26
27# TYPES_BY_NAME allows the lookup of any declaration
28TYPES_BY_NAME = {}
29
30class Define:
31    """Specifies a c macro definition."""
32    def __init__(self, name, value, comment=None):
33        self.name = name
34        self.value = value
35        self.comment = comment
36        TYPES.append(self)
37
38class EnumValue:
39    """allows comments and setting of enum values"""
40    def __init__(self, name, value=None, comment=None,
41                 group_begin=None, group_end=None):
42        self.name = name
43        self.value = value
44        self.comment = comment
45        self.group_begin = group_begin
46        self.group_end = group_end
47
48    def __str__(self):
49        return self.name
50
51class Enum:
52    """Stores details needed to declare and serialize an enumeration"""
53    def __init__(self, name, values, external=False):
54        self.name = name
55        self.values = []
56        for v in values:
57            if isinstance(v, EnumValue):
58                self.values.append(v)
59            else:
60                self.values.append(EnumValue(v))
61
62        self.external = external
63        TYPES.append(self)
64        TYPES_BY_NAME[name] = TYPES[-1]
65
66class Member:
67    """Stores details needed to declare and serialize the member of a struct."""
68    def __init__(self, member_type, name, array=None,
69                 compiler_field=False, ray_tracing_field=False,
70                 comment=None):
71        self.member_type = member_type
72        self.name = name
73        self.array = array
74        # indicates whether this field is used by the compiler, and whether it
75        # should be included in the shader compiler cache hash function.
76        self.compiler_field = compiler_field
77        self.ray_tracing_field = ray_tracing_field
78        self.comment=comment
79
80class Struct:
81    """Stores details needed to declare and serialize a struct"""
82    def __init__(self, name, members):
83        self.name = name
84        self.members = members
85        TYPES.append(self)
86        TYPES_BY_NAME[name] = TYPES[-1]
87
88INT_TYPES = set(["uint8_t",
89                 "uint16_t",
90                 "uint32_t",
91                 "uint64_t",
92                 "unsigned",
93                 "int"])
94
95FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES)
96
97Define("INTEL_DEVICE_MAX_NAME_SIZE", 64)
98Define("INTEL_DEVICE_MAX_SLICES", 8)
99Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11")
100Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11")
101Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2")
102
103Enum("intel_platform",
104     [EnumValue("INTEL_PLATFORM_GFX3", value=1),
105      "INTEL_PLATFORM_I965",
106      "INTEL_PLATFORM_ILK",
107      "INTEL_PLATFORM_G4X",
108      "INTEL_PLATFORM_SNB",
109      "INTEL_PLATFORM_IVB",
110      "INTEL_PLATFORM_BYT",
111      "INTEL_PLATFORM_HSW",
112      "INTEL_PLATFORM_BDW",
113      "INTEL_PLATFORM_CHV",
114      "INTEL_PLATFORM_SKL",
115      "INTEL_PLATFORM_BXT",
116      "INTEL_PLATFORM_KBL",
117      "INTEL_PLATFORM_GLK",
118      "INTEL_PLATFORM_CFL",
119      "INTEL_PLATFORM_ICL",
120      "INTEL_PLATFORM_EHL",
121      "INTEL_PLATFORM_TGL",
122      "INTEL_PLATFORM_RKL",
123      "INTEL_PLATFORM_DG1",
124      "INTEL_PLATFORM_ADL",
125      "INTEL_PLATFORM_RPL",
126      EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"),
127      "INTEL_PLATFORM_DG2_G11",
128      EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"),
129      EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"),
130      EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"),
131      EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"),
132      EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"),
133      EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"),
134      EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"),
135      "INTEL_PLATFORM_LNL",
136      "INTEL_PLATFORM_BMG",
137      ])
138
139Struct("intel_memory_class_instance",
140       [ Member("int", "klass",
141                comment = "Kernel backend specific class value, no translation needed yet"),
142         Member("int", "instance")])
143
144Enum("intel_device_info_mmap_mode",
145      [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0),
146       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"),
147       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"),
148       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD",
149                 comment=dedent("""\
150                 Xe2+ only. Only supported in GPU side and used for displayable
151                 buffers."""))
152       ])
153
154Struct("intel_device_info_pat_entry",
155       [Member("uint8_t", "index"),
156        Member("intel_device_info_mmap_mode", "mmap",
157               comment=dedent("""\
158               This tells KMD what caching mode the CPU mapping should use.
159               It has nothing to do with any PAT cache modes."""))])
160
161Enum("intel_cmat_scope",
162     [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
163     "INTEL_CMAT_SCOPE_SUBGROUP"])
164
165Enum("intel_cooperative_matrix_component_type",
166     ["INTEL_CMAT_FLOAT16",
167      "INTEL_CMAT_FLOAT32",
168      "INTEL_CMAT_SINT32",
169      "INTEL_CMAT_SINT8",
170      "INTEL_CMAT_UINT32",
171      "INTEL_CMAT_UINT8"])
172
173Enum("intel_engine_class",
174     ["INTEL_ENGINE_CLASS_RENDER",
175      "INTEL_ENGINE_CLASS_COPY",
176      "INTEL_ENGINE_CLASS_VIDEO",
177      "INTEL_ENGINE_CLASS_VIDEO_ENHANCE",
178      "INTEL_ENGINE_CLASS_COMPUTE",
179      "INTEL_ENGINE_CLASS_INVALID"])
180
181Struct("intel_cooperative_matrix_configuration",
182   [Member("intel_cmat_scope", "scope",
183           comment=dedent("""\
184           Matrix A is MxK.
185           Matrix B is KxN.
186           Matrix C and Matrix Result are MxN.
187
188           Result = A * B + C;""")),
189    Member("uint8_t", "m"),
190    Member("uint8_t", "n"),
191    Member("uint8_t", "k"),
192    Member("intel_cooperative_matrix_component_type", "a"),
193    Member("intel_cooperative_matrix_component_type", "b"),
194    Member("intel_cooperative_matrix_component_type", "c"),
195    Member("intel_cooperative_matrix_component_type", "result")])
196
197Enum("intel_kmd_type",
198     ["INTEL_KMD_TYPE_INVALID",
199      "INTEL_KMD_TYPE_I915",
200      "INTEL_KMD_TYPE_XE",
201      "INTEL_KMD_TYPE_STUB",
202      "INTEL_KMD_TYPE_LAST"
203      ], external=True)
204
205Struct("intel_device_info_mem_region",
206       [Member("uint64_t", "size"),
207        Member("uint64_t", "free")])
208
209Struct("intel_device_info_ram_desc",
210       [Member("intel_memory_class_instance", "mem"),
211        Member("intel_device_info_mem_region", "mappable"),
212        Member("intel_device_info_mem_region", "unmappable")])
213
214Struct("intel_device_info_mem_desc",
215       [Member("bool", "use_class_instance"),
216        Member("intel_device_info_ram_desc", "sram"),
217        Member("intel_device_info_ram_desc", "vram")])
218
219Struct("intel_device_info_urb_desc",
220       [Member("int", "size"),
221        Member("int", "min_entries", array=4),
222        Member("int", "max_entries", array=4)])
223
224Struct("intel_device_info_pat_desc",
225       [Member("intel_device_info_pat_entry", "cached_coherent",
226               comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"),
227
228        Member("intel_device_info_pat_entry", "scanout",
229               comment="scanout and external BOs"),
230
231        Member("intel_device_info_pat_entry", "compressed",
232               comment="Only supported in Xe2, compressed + WC"),
233
234        Member("intel_device_info_pat_entry", "writeback_incoherent",
235               comment=("BOs without special needs, can be WB not coherent "
236                        "or WC it depends on the platforms and KMD")),
237
238        Member("intel_device_info_pat_entry", "writecombining")])
239
240Struct("intel_device_info",
241       [Member("intel_kmd_type", "kmd_type"),
242
243        Member("int", "ver", compiler_field=True,
244               comment="Driver internal numbers used to differentiate platforms."),
245
246        Member("int", "verx10", compiler_field=True),
247
248        Member("uint32_t", "gfx_ip_ver", compiler_field=True,
249               comment=dedent("""\
250               This is the run-time hardware GFX IP version that may be more specific
251               than ver/verx10. ver/verx10 may be more useful for comparing a class
252               of devices whereas gfx_ip_ver may be more useful for precisely
253               checking for a graphics ip type. GFX_IP_VER(major, minor) should be
254               used to compare IP versions.""")),
255
256        Member("int", "revision",
257               comment=dedent("""\
258               This revision is queried from KMD unlike
259               pci_revision_id from drm device. Its value is not always
260               same as the pci_revision_id.
261               For LNL+ this is the stepping of GT IP/GMD RevId.""")),
262
263        Member("int", "gt"),
264        Member("uint16_t", "pci_domain", comment="PCI info"),
265        Member("uint8_t", "pci_bus"),
266        Member("uint8_t", "pci_dev"),
267        Member("uint8_t", "pci_func"),
268        Member("uint16_t", "pci_device_id"),
269        Member("uint8_t", "pci_revision_id"),
270        Member("intel_platform", "platform", compiler_field=True),
271        Member("bool", "has_hiz_and_separate_stencil"),
272        Member("bool", "must_use_separate_stencil"),
273        Member("bool", "has_sample_with_hiz"),
274        Member("bool", "has_bit6_swizzle"),
275        Member("bool", "has_llc"),
276        Member("bool", "has_pln", compiler_field=True),
277        Member("bool", "has_64bit_float", compiler_field=True),
278        Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
279        Member("bool", "has_64bit_int", compiler_field=True),
280        Member("bool", "has_integer_dword_mul", compiler_field=True),
281        Member("bool", "has_compr4", compiler_field=True),
282        Member("bool", "has_surface_tile_offset"),
283        Member("bool", "supports_simd16_3src", compiler_field=True),
284        Member("bool", "disable_ccs_repack"),
285
286        Member("bool", "has_illegal_ccs_values",
287               comment="True if CCS needs to be initialized before use."),
288
289        Member("bool", "has_flat_ccs",
290               comment=dedent("""\
291               True if CCS uses a flat virtual address translation to a memory
292               carve-out, rather than aux map translations, or additional surfaces.""")),
293
294        Member("bool", "has_aux_map"),
295        Member("bool", "has_caching_uapi"),
296        Member("bool", "has_tiling_uapi"),
297        Member("bool", "has_ray_tracing", compiler_field=True),
298        Member("bool", "has_ray_query"),
299        Member("bool", "has_local_mem"),
300        Member("bool", "has_lsc", compiler_field=True),
301        Member("bool", "has_mesh_shading"),
302        Member("bool", "has_mmap_offset"),
303        Member("bool", "has_userptr_probe"),
304        Member("bool", "has_context_isolation"),
305        Member("bool", "has_set_pat_uapi"),
306        Member("bool", "has_indirect_unroll"),
307        Member("bool", "has_negative_rhw_bug", compiler_field=True,
308               comment="Intel hardware quirks"),
309
310        Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True,
311               comment=dedent("""\
312               Whether this platform supports fragment shading rate controlled by a
313               primitive in geometry shaders and by a control buffer.""")),
314
315        Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"),
316
317        Member("bool", "needs_unlit_centroid_workaround", compiler_field=True,
318               comment=dedent("""\
319               Some versions of Gen hardware don't do centroid interpolation correctly
320               on unlit pixels, causing incorrect values for derivatives near triangle
321               edges.  Enabling this flag causes the fragment shader to use
322               non-centroid interpolation for unlit pixels, at the expense of two extra
323               fragment shader instructions.""")),
324
325        Member("bool", "needs_null_push_constant_tbimr_workaround",
326               comment=dedent("""\
327               Whether the platform needs an undocumented workaround for a hardware bug
328               that affects draw calls with a pixel shader that has 0 push constant cycles
329               when TBIMR is enabled, which has been seen to lead to hangs.  To avoid the
330               issue we simply pad the push constant payload to be at least 1 register.""")),
331
332        Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."),
333
334        Member("unsigned", "num_slices",
335               comment=dedent("""\
336               GPU hardware limits
337
338               In general, you can find shader thread maximums by looking at the "Maximum
339               Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
340               3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
341               limits come from the "Number of URB Entries" field in the
342               3DSTATE_URB_VS command and friends.
343
344               These fields are used to calculate the scratch space to allocate.  The
345               amount of scratch space can be larger without being harmful on modern
346               GPUs, however, prior to Haswell, programming the maximum number of threads
347               to greater than the hardware maximum would cause GPU performance to tank.
348
349               Total number of slices present on the device whether or not they've been
350               fused off.
351
352               XXX: CS thread counts are limited by the inability to do cross subslice
353               communication. It is the effectively the number of logical threads which
354               can be executed in a subslice. Fuse configurations may cause this number
355               to change, so we program @max_cs_threads as the lower maximum.""")),
356
357        Member("unsigned", "max_slices", compiler_field=True,
358               comment=dedent("""\
359               Maximum number of slices present on this device (can be more than
360               num_slices if some slices are fused).""")),
361
362        Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES",
363               comment="Number of subslices for each slice (used to be uniform until CNL)."),
364
365        Member("unsigned", "max_subslices_per_slice", compiler_field=True,
366               comment=dedent("""\
367               Maximum number of subslices per slice present on this device (can be
368               more than the maximum value in the num_subslices[] array if some
369               subslices are fused).
370
371               This is GT_SS_PER_SLICE in SKU.""")),
372
373        Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES",
374               comment="Number of subslices on each pixel pipe (ICL)."),
375
376        Member("unsigned", "max_eus_per_subslice", compiler_field=True,
377               comment="Maximum number of EUs per subslice (some EUs can be fused off)."),
378
379        Member("unsigned", "num_thread_per_eu", compiler_field=True,
380               comment="Number of threads per eu, varies between 4 and 8 between generations."),
381
382        Member("uint8_t", "grf_size",
383               comment="Size of a register from the EU GRF file in bytes."),
384
385        Member("uint8_t", "slice_masks",
386               comment="A bit mask of the slices available."),
387
388        Member("uint8_t", "subslice_masks",
389               array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)",
390               compiler_field=True,
391               ray_tracing_field=True,
392               comment=dedent("""\
393               An array of bit mask of the subslices available, use subslice_slice_stride
394               to access this array.""")),
395
396        Member("unsigned", "subslice_total",
397               comment=dedent("""\
398               The number of enabled subslices (considering fusing). For exactly which
399               subslices are enabled, see subslice_masks[].""")),
400
401        Member("uint8_t", "eu_masks",
402               array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * "
403                      "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"),
404               comment=dedent("""\
405               An array of bit mask of EUs available, use eu_slice_stride &
406               eu_subslice_stride to access this array.""")),
407
408        Member("uint16_t", "subslice_slice_stride", compiler_field=True,
409               comment="Stride to access subslice_masks[]."),
410
411        Member("uint16_t", "eu_slice_stride",
412               comment="Strides to access eu_masks[]."),
413
414        Member("uint16_t", "eu_subslice_stride"),
415        Member("unsigned", "l3_banks"),
416
417        Member("unsigned", "max_vs_threads",
418               comment="Maximum Vertex Shader threads"),
419
420        Member("unsigned", "max_tcs_threads",
421               comment="Maximum Hull Shader threads"),
422
423        Member("unsigned", "max_tes_threads",
424               comment="Maximum Domain Shader threads"),
425
426        Member("unsigned", "max_gs_threads",
427               comment="Maximum Geometry Shader threads"),
428
429        Member("unsigned", "max_wm_threads",
430               comment=dedent("""\
431               Theoretical maximum number of Pixel Shader threads.
432
433               PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
434               automatically scale pixel shader thread count, based on a single value
435               programmed into 3DSTATE_PS.
436
437               To calculate the maximum number of threads for Gfx8 beyond (which have
438               multiple Pixel Shader Dispatchers):
439
440               - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
441               - Usually there's only one PSD per subslice, so use the number of
442                 subslices for number of PSDs.
443               - For max_wm_threads, the total should be PSD threads * #PSDs.""")),
444
445        Member("unsigned", "max_threads_per_psd"),
446
447        Member("unsigned", "max_cs_threads",
448               comment=dedent("""\
449               Maximum Compute Shader threads.
450
451               Thread count * number of EUs per subslice""")),
452
453        Member("unsigned", "max_cs_workgroup_threads", compiler_field=True,
454               comment=dedent("""\
455               Maximum number of threads per workgroup supported by the GPGPU_WALKER or
456               COMPUTE_WALKER command.
457
458               This may be smaller than max_cs_threads as it takes into account added
459               restrictions on the GPGPU/COMPUTE_WALKER commands.  While max_cs_threads
460               expresses the total parallelism of the GPU, this expresses the maximum
461               number of threads we can dispatch in a single workgroup.""")),
462
463
464        Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True,
465               comment=dedent("""\
466               The maximum number of potential scratch ids. Due to hardware
467               implementation details, the range of scratch ids may be larger than the
468               number of subslices.""")),
469
470        Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True),
471
472        Member("intel_device_info_urb_desc", "urb"),
473        Member("unsigned", "max_constant_urb_size_kb"),
474        Member("unsigned", "mesh_max_constant_urb_size_kb"),
475        Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"),
476        Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"),
477        Member("unsigned", "mem_alignment"),
478        Member("uint64_t", "timestamp_frequency"),
479        Member("uint64_t", "aperture_bytes"),
480        Member("uint64_t", "gtt_size"),
481        Member("int", "simulator_id"),
482        Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"),
483        Member("bool", "no_hw"),
484        Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"),
485        Member("intel_device_info_mem_desc", "mem"),
486        Member("intel_device_info_pat_desc", "pat"),
487        Member("intel_cooperative_matrix_configuration",
488               "cooperative_matrix_configurations", array=4)]
489       )
490