1 
2 /*===================== begin_copyright_notice ==================================
3 
4 * Copyright (c) 2024, Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 
24 ======================= end_copyright_notice ==================================*/
25 //!
26 //! \file     mhw_render_hwcmd_xe2_hpg_next.h
27 //! \brief    Auto-generated constructors for MHW and states.
28 //! \details  This file may not be included outside of xe2_hpg as other components
29 //!           should use MHW interface to interact with MHW commands and states.
30 //!
31 
32 // DO NOT EDIT
33 
34 #ifndef __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__
35 #define __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__
36 
37 #include "mhw_hwcmd.h"
38 #pragma once
39 #pragma pack(1)
40 
41 #include <cstdint>
42 #include <cstddef>
43 
44 namespace mhw
45 {
46 namespace render
47 {
48 namespace xe2_hpg_next
49 {
50 struct Cmd
51 {
52 public:
53     // Internal Macros
54     #define __CODEGEN_MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
55     #define __CODEGEN_BITFIELD(l, h) (h) - (l) + 1
56     #define __CODEGEN_OP_LENGTH_BIAS 2
57     #define __CODEGEN_OP_LENGTH(x) (uint32_t)((__CODEGEN_MAX(x, __CODEGEN_OP_LENGTH_BIAS)) - __CODEGEN_OP_LENGTH_BIAS)
58 
GetOpLengthCmd59     static uint32_t GetOpLength(uint32_t uiLength) { return __CODEGEN_OP_LENGTH(uiLength); }
60 
61     //!
62     //! \brief PIPELINE_SELECT
63     //! \details
64     //!     The PIPELINE_SELECT command is used to specify which GPE pipeline is to
65     //!     be considered the 'current' active pipeline. Issuing
66     //!     3D-pipeline-specific commands when the Media pipeline is selected, or
67     //!     viceversa, is UNDEFINED.
68     //!
69     //!     Issuing 3D-pipeline-specific commands when the GPGPU pipeline is
70     //!     selected, or vice versa, is UNDEFINED.
71     //!
72     //!     Programming common non pipeline commands (e.g., STATE_BASE_ADDRESS) is
73     //!     allowed in all pipeline modes.
74     //!
75     //!     Software must ensure Render Cache, Depth Cache and Dataport are flushed
76     //!     through a stalling PIPE_CONTROL command prior to programming of
77     //!     PIPELINE_SELECT command transitioning Pipeline Select from 3D to
78     //!     GPGPU/Media. Similarly software must ensure Dataport flush is issued
79     //!     through a stalling PIPE_CONTROL command prior to programming of
80     //!     PIPELINE_SELECT command transitioning Pipeline Select from GPGPU/Media
81     //!     to 3D.
82     //!     Example:
83     //!
84     //!     Workload-3Dmode,
85     //!
86     //!     PIPE_CONTROL (CS Stall, Depth Cache Flush Enable, Render Target Cache
87     //!     Flush Enable, Dataport Flush Enable) ,
88     //!
89     //!     PIPELINE_SELECT ( GPGPU),
90     //!
91     //!     Workload-GPGPUmode,
92     //!
93     //!     PIPE_CONTROL (CS Stall, Dataport Flush Enable),
94     //!
95     //!     PIPELINE_SELECT ( 3D) ...
96     //!
97     //!
98     //!
99     //!     Workaround
100     //!
101     //!     : This command must be followed by a PIPE_CONTROL with CS Stall bit
102     //!     set.,
103     //!
104     //!     "Pipe Selection" must be never set to "3D" in PIPELINE_SELECT command
105     //!     programmed for workloads submitted to ComputeCS.
106     //!
107     //!     style="margin:0in 0in 0.0001pt">  While GPU is operating in GPGPU mode
108     //!     of operation and when a Mid Thread Preemption (if enabled) occurs on a
109     //!     PIPELINE_SELECT command with Media Sampler DOP CG Enable reset along
110     //!     with Pipeline Select Mode set to 3D and on resubmission of this context
111     //!     on context restore Sampler DOP CG Enable will be reset. This would mean
112     //!     the GPGPU mid thread preempted threads restored will get executed with
113     //!     media sampler DOP clock not gated consuming media sampler DOP power
114     //!     until all GPGPU threads have retired. style="margin:0in 0in 0.0001pt" />
115     //!      style="margin:0in 0in 0.0001pt">  Programming of the PIPELINE_SELECT
116     //!     can be modified to avoid the above inefficiency. This can be done by
117     //!     programming Pipeline Selection and Media Sampler DOP CG Enable fields in
118     //!     two different PIPELINE_SELECT commands instead of on single
119     //!     PIPELINE_SELECTcommand. style="margin:0in 0in 0.0001pt" />
120     //!     style="margin:0in 0in 0.0001pt">  Example: style="margin:0in 0in
121     //!     0.0001pt">  PIPELINE_SELECT ( Pipeline Selection = 3D, Media Sampler DOP
122     //!     CG Enable = False) style="margin:0in 0in 0.0001pt" />  style="margin:0in
123     //!     0in 0.0001pt">  To style="margin:0in 0in 0.0001pt">  PIPELINE_SELECT (
124     //!     Pipeline Selection = 3D)style="margin:0in 0in 0.0001pt">
125     //!     PIPELINE_SELECT (Media Sampler DOP CG Enable = False)
126     //!
127     struct PIPELINE_SELECT_CMD
128     {
129         union
130         {
131             struct
132             {
133                 uint32_t                 PipelineSelection                                : __CODEGEN_BITFIELD( 0,  1)    ; //!< PIPELINE_SELECTION
134                 uint32_t                 RenderSliceCommonPowerGateEnable                 : __CODEGEN_BITFIELD( 2,  2)    ; //!< RENDER_SLICE_COMMON_POWER_GATE_ENABLE
135                 uint32_t                 RenderSamplerPowerGateEnable                     : __CODEGEN_BITFIELD( 3,  3)    ; //!< RENDER_SAMPLER_POWER_GATE_ENABLE
136                 uint32_t                 Reserved4                                        : __CODEGEN_BITFIELD( 4,  4)    ; //!< Reserved
137                 uint32_t                 EnableComputeTo3DPerformanceMode                 : __CODEGEN_BITFIELD( 5,  5)    ; //!< Enable Compute to 3D performance mode
138                 uint32_t                 Reserved6                                        : __CODEGEN_BITFIELD( 6,  6)    ; //!< Reserved
139                 uint32_t                 SystolicModeEnable                               : __CODEGEN_BITFIELD( 7,  7)    ; //!< SYSTOLIC_MODE_ENABLE
140                 uint32_t                 MaskBits                                         : __CODEGEN_BITFIELD( 8, 15)    ; //!< Mask Bits
141                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
142                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
143                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
144                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
145             };
146             uint32_t                     Value;
147         } DW0;
148 
149         //! \name Local enumerations
150 
151         //! \brief PIPELINE_SELECTION
152         //! \details
153         //!     Mask bits [9:8] has to be set for HW to look at this field when
154         //!     PIPELINE_SELECT command is parsed. Setting only one of the mask bit [9]
155         //!     or [8] is illegal.
156         enum PIPELINE_SELECTION
157         {
158             PIPELINE_SELECTION_3D                                            = 0, //!< 3D pipeline is selected
159             PIPELINE_SELECTION_GPGPU                                         = 2, //!< GPGPU pipeline is selected
160         };
161 
162         //! \brief RENDER_SLICE_COMMON_POWER_GATE_ENABLE
163         //! \details
164         //!     Mask bit [10] has to be set for HW to look at this field when
165         //!     PIPELINE_SELECT command is parsed.
166         enum RENDER_SLICE_COMMON_POWER_GATE_ENABLE
167         {
168             RENDER_SLICE_COMMON_POWER_GATE_ENABLE_DISABLED                   = 0, //!< Command Streamer sends message to PM to disable render slice common Power Gating.
169             RENDER_SLICE_COMMON_POWER_GATE_ENABLE_ENABLED                    = 1, //!< Command Streamer sends message to PM to enable render slice common Power Gating.
170         };
171 
172         //! \brief RENDER_SAMPLER_POWER_GATE_ENABLE
173         //! \details
174         //!     Mask bit [11] has to be set for HW to look at this field when
175         //!     PIPELINE_SELECT command is parsed.
176         enum RENDER_SAMPLER_POWER_GATE_ENABLE
177         {
178             RENDER_SAMPLER_POWER_GATE_ENABLE_DISABLED                        = 0, //!< Command Streamer sends message to PM to disable render sampler Power Gating.
179             RENDER_SAMPLER_POWER_GATE_ENABLE_ENABLED                         = 1, //!< Command Streamer sends message to PM to enable render sampler Power Gating.
180         };
181 
182         //! \brief SYSTOLIC_MODE_ENABLE
183         //! \details
184         //!     When set, this will enable systolic mode for the following
185         //!     COMPUTE_WALKER commands. This will lower the Fmax to avoid ICC current
186         //!     issues when executing systolic array commands in the execution units. If
187         //!     this is not set prior to executing systolic array operations, the
188         //!     context will be halted to avoid any ICC issues.
189         enum SYSTOLIC_MODE_ENABLE
190         {
191             SYSTOLIC_MODE_ENABLE_SYSTOLICMODEDISABLED                        = 0, //!< No additional details
192             SYSTOLIC_MODE_ENABLE_SYSTOLICMODEENABLED                         = 1, //!< No additional details
193         };
194 
195         enum _3D_COMMAND_SUB_OPCODE
196         {
197             _3D_COMMAND_SUB_OPCODE_PIPELINESELECT                            = 4, //!< No additional details
198         };
199 
200         enum _3D_COMMAND_OPCODE
201         {
202             _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED                           = 1, //!< No additional details
203         };
204 
205         enum COMMAND_SUBTYPE
206         {
207             COMMAND_SUBTYPE_GFXPIPESINGLEDW                                  = 1, //!< No additional details
208         };
209 
210         enum COMMAND_TYPE
211         {
212             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
213         };
214 
215         //! \name Initializations
216 
217         //! \brief Explicit member initialization function
218         PIPELINE_SELECT_CMD();
219 
220         static const size_t dwSize = 1;
221         static const size_t byteSize = 4;
222     };
223 
224     //!
225     //! \brief STATE_BASE_ADDRESS
226     //! \details
227     //!     The STATE_BASE_ADDRESS command sets the base pointers for subsequent
228     //!     state, instruction, and media indirect object accesses by the GPE.For
229     //!     more information see the Base Address Utilization table in the Memory
230     //!     Access Indirection narrative topic.
231     //!
232     //!     The following commands must be reissued following any change to the base
233     //!     addresses:  3DSTATE_CC_POINTERS
234     //!     3DSTATE_BINDING_TABLE_POINTERS
235     //!     3DSTATE_SAMPLER_STATE_POINTERS
236     //!     3DSTATE_VIEWPORT_STATE_POINTERS
237     //!       />  Execution of this command causes a full pipeline flush, thus its
238     //!     use should be minimized for higher performance.
239     //!
240     //!     If 3DSTATE_PS_EXTRA::Pixel Shader Is Per Coarse Pixel == 1, the
241     //!     3DSTATE_CPS_POINTERS command must be reissued following any change to
242     //!     the dynamic state base address.
243     //!
244     //!     SW must always program PIPE_CONTROL with "CS Stall" and "Render Target
245     //!     Cache Flush Enable" set before programming STATE_BASE_ADDRESS command
246     //!     for GPGPU workloads i.e when pipeline select is GPGPU via
247     //!     PIPELINE_SELECT command. This is required to achieve better GPGPU
248     //!     preemption latencies in certain workload programming sequences. If
249     //!     programming PIPE_CONTROL has performance implications then preemption
250     //!     latencies can be traded off against performance by not implementing this
251     //!     programming note.
252     //!
253     //!     SW must always program PIPE_CONTROL command with HDC Pipleine FLush set
254     //!     prior to programming of STATE_BASE_ADDRESS command for GPGPU/Media
255     //!     workloads i.e when pipeline select is GPGPU or Media via PIPELINE_SELECT
256     //!     command. This is required to ensure thewrite data out of the prior
257     //!     thread group are flushed out prior to the state changes due to the
258     //!     programming of STATE_BASE_ADDRESS command take place.
259     //!
260     struct STATE_BASE_ADDRESS_CMD
261     {
262         union
263         {
264             struct
265             {
266                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
267                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
268                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
269                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
270                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
271                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
272             };
273             uint32_t                     Value;
274         } DW0;
275         union
276         {
277             struct
278             {
279                 uint64_t                 GeneralStateBaseAddressModifyEnable              : __CODEGEN_BITFIELD( 0,  0)    ; //!< GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE
280                 uint64_t                 Reserved33                                       : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
281                 uint64_t                 GeneralStateMemoryObjectControlState             : __CODEGEN_BITFIELD( 4, 10)    ; //!< General State Memory Object Control State
282                 uint64_t                 Reserved43                                       : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
283                 uint64_t                 GeneralStateBaseAddress                          : __CODEGEN_BITFIELD(12, 63)    ; //!< General State Base Address
284             };
285             uint32_t                     Value[2];
286         } DW1_2;
287         union
288         {
289             struct
290             {
291                 uint32_t                 CoherencySettingModifyEnable                     : __CODEGEN_BITFIELD( 0,  0)    ; //!< COHERENCY_SETTING_MODIFY_ENABLE
292                 uint32_t                 Reserved97                                       : __CODEGEN_BITFIELD( 1, 13)    ; //!< Reserved
293                 uint32_t                 DisableSupportForMultiGpuAtomicsForStatelessAccesses : __CODEGEN_BITFIELD(14, 14)    ; //!< DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES
294                 uint32_t                 DisableSupportForMultiGpuPartialWritesForStatelessMessages : __CODEGEN_BITFIELD(15, 15)    ; //!< DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES
295                 uint32_t                 StatelessDataPortAccessMemoryObjectControlState  : __CODEGEN_BITFIELD(16, 22)    ; //!< Stateless Data Port Access Memory Object Control State
296                 uint32_t                 L1CacheControl                                   : __CODEGEN_BITFIELD(23, 25)    ; //!< L1 Cache Control
297                 uint32_t                 Reserved122                                      : __CODEGEN_BITFIELD(26, 31)    ; //!< Reserved
298             };
299             uint32_t                     Value;
300         } DW3;
301         union
302         {
303             struct
304             {
305                 uint64_t                 SurfaceStateBaseAddressModifyEnable              : __CODEGEN_BITFIELD( 0,  0)    ; //!< SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
306                 uint64_t                 Reserved129                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
307                 uint64_t                 SurfaceStateMemoryObjectControlState             : __CODEGEN_BITFIELD( 4, 10)    ; //!< Surface State Memory Object Control State
308                 uint64_t                 Reserved139                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
309                 uint64_t                 SurfaceStateBaseAddress                          : __CODEGEN_BITFIELD(12, 63)    ; //!< Surface State Base Address
310             };
311             uint32_t                     Value[2];
312         } DW4_5;
313         union
314         {
315             struct
316             {
317                 uint64_t                 DynamicStateBaseAddressModifyEnable              : __CODEGEN_BITFIELD( 0,  0)    ; //!< DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE
318                 uint64_t                 Reserved193                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
319                 uint64_t                 DynamicStateMemoryObjectControlState             : __CODEGEN_BITFIELD( 4, 10)    ; //!< Dynamic State Memory Object Control State
320                 uint64_t                 Reserved203                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
321                 uint64_t                 DynamicStateBaseAddress                          : __CODEGEN_BITFIELD(12, 63)    ; //!< Dynamic State Base Address
322             };
323             uint32_t                     Value[2];
324         } DW6_7;
325         union
326         {
327             struct
328             {
329                 uint64_t                 IndirectObjectBaseAddressModifyEnable            : __CODEGEN_BITFIELD( 0,  0)    ; //!< INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE
330                 uint64_t                 Reserved257                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
331                 uint64_t                 IndirectObjectMemoryObjectControlState           : __CODEGEN_BITFIELD( 4, 10)    ; //!< Indirect Object Memory Object Control State
332                 uint64_t                 Reserved267                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
333                 uint64_t                 IndirectObjectBaseAddress                        : __CODEGEN_BITFIELD(12, 63)    ; //!< Indirect Object Base Address
334             };
335             uint32_t                     Value[2];
336         } DW8_9;
337         union
338         {
339             struct
340             {
341                 uint64_t                 InstructionBaseAddressModifyEnable               : __CODEGEN_BITFIELD( 0,  0)    ; //!< INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE
342                 uint64_t                 Reserved321                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
343                 uint64_t                 InstructionMemoryObjectControlState              : __CODEGEN_BITFIELD( 4, 10)    ; //!< Instruction Memory Object Control State
344                 uint64_t                 Reserved331                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
345                 uint64_t                 InstructionBaseAddress                           : __CODEGEN_BITFIELD(12, 63)    ; //!< Instruction Base Address
346             };
347             uint32_t                     Value[2];
348         } DW10_11;
349         union
350         {
351             struct
352             {
353                 uint32_t                 GeneralStateBufferSizeModifyEnable               : __CODEGEN_BITFIELD( 0,  0)    ; //!< GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE
354                 uint32_t                 Reserved385                                      : __CODEGEN_BITFIELD( 1, 11)    ; //!< Reserved
355                 uint32_t                 GeneralStateBufferSize                           : __CODEGEN_BITFIELD(12, 31)    ; //!< General State Buffer Size
356             };
357             uint32_t                     Value;
358         } DW12;
359         union
360         {
361             struct
362             {
363                 uint32_t                 DynamicStateBufferSizeModifyEnable               : __CODEGEN_BITFIELD( 0,  0)    ; //!< DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE
364                 uint32_t                 Reserved417                                      : __CODEGEN_BITFIELD( 1, 11)    ; //!< Reserved
365                 uint32_t                 DynamicStateBufferSize                           : __CODEGEN_BITFIELD(12, 31)    ; //!< Dynamic State Buffer Size
366             };
367             uint32_t                     Value;
368         } DW13;
369         union
370         {
371             struct
372             {
373                 uint32_t                 IndirectObjectBufferSizeModifyEnable             : __CODEGEN_BITFIELD( 0,  0)    ; //!< INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE
374                 uint32_t                 Reserved449                                      : __CODEGEN_BITFIELD( 1, 11)    ; //!< Reserved
375                 uint32_t                 IndirectObjectBufferSize                         : __CODEGEN_BITFIELD(12, 31)    ; //!< Indirect Object Buffer Size
376             };
377             uint32_t                     Value;
378         } DW14;
379         union
380         {
381             struct
382             {
383                 uint32_t                 InstructionBufferSizeModifyEnable                : __CODEGEN_BITFIELD( 0,  0)    ; //!< INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE
384                 uint32_t                 Reserved481                                      : __CODEGEN_BITFIELD( 1, 11)    ; //!< Reserved
385                 uint32_t                 InstructionBufferSize                            : __CODEGEN_BITFIELD(12, 31)    ; //!< Instruction Buffer Size
386             };
387             uint32_t                     Value;
388         } DW15;
389         union
390         {
391             struct
392             {
393                 uint64_t                 BindlessSurfaceStateBaseAddressModifyEnable      : __CODEGEN_BITFIELD( 0,  0)    ; //!< BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
394                 uint64_t                 Reserved513                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
395                 uint64_t                 BindlessSurfaceStateMemoryObjectControlState     : __CODEGEN_BITFIELD( 4, 10)    ; //!< Bindless Surface State Memory Object Control State
396                 uint64_t                 Reserved523                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
397                 uint64_t                 BindlessSurfaceStateBaseAddress                  : __CODEGEN_BITFIELD(12, 63)    ; //!< Bindless Surface State Base Address
398             };
399             uint32_t                     Value[2];
400         } DW16_17;
401         union
402         {
403             struct
404             {
405                 uint32_t                 BindlessSurfaceStateSize                                                         ; //!< Bindless Surface State Size
406             };
407             uint32_t                     Value;
408         } DW18;
409         union
410         {
411             struct
412             {
413                 uint64_t                 BindlessSamplerStateBaseAddressModifyEnable      : __CODEGEN_BITFIELD( 0,  0)    ; //!< BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE
414                 uint64_t                 Reserved609                                      : __CODEGEN_BITFIELD( 1,  3)    ; //!< Reserved
415                 uint64_t                 BindlessSamplerStateMemoryObjectControlState     : __CODEGEN_BITFIELD( 4, 10)    ; //!< Bindless Sampler State Memory Object Control State
416                 uint64_t                 Reserved619                                      : __CODEGEN_BITFIELD(11, 11)    ; //!< Reserved
417                 uint64_t                 BindlessSamplerStateBaseAddress                  : __CODEGEN_BITFIELD(12, 63)    ; //!< Bindless Sampler State Base Address
418             };
419             uint32_t                     Value[2];
420         } DW19_20;
421         union
422         {
423             struct
424             {
425                 uint32_t                 Reserved672                                      : __CODEGEN_BITFIELD( 0, 11)    ; //!< Reserved
426                 uint32_t                 BindlessSamplerStateBufferSize                   : __CODEGEN_BITFIELD(12, 31)    ; //!< Bindless Sampler State Buffer Size
427             };
428             uint32_t                     Value;
429         } DW21;
430 
431         //! \name Local enumerations
432 
433         enum _3D_COMMAND_SUB_OPCODE
434         {
435             _3D_COMMAND_SUB_OPCODE_STATEBASEADDRESS                          = 1, //!< No additional details
436         };
437 
438         enum _3D_COMMAND_OPCODE
439         {
440             _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED                           = 1, //!< No additional details
441         };
442 
443         enum COMMAND_SUBTYPE
444         {
445             COMMAND_SUBTYPE_GFXPIPECOMMON                                    = 0, //!< No additional details
446         };
447 
448         enum COMMAND_TYPE
449         {
450             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
451         };
452 
453         //! \brief GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE
454         //! \details
455         //!     The other fields in this DWord and the following DWord are updated
456         //!     only when this bit is set.
457         enum GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE
458         {
459             GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE                 = 0, //!< Ignore the updated address.
460             GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE                  = 1, //!< Modify the address.
461         };
462 
463         //! \brief COHERENCY_SETTING_MODIFY_ENABLE
464         //! \details
465         //!     All the fields in this DW is only updated when this bit is set.
466         enum COHERENCY_SETTING_MODIFY_ENABLE
467         {
468             COHERENCY_SETTING_MODIFY_ENABLE_DISABLEWRITETOTHISDW             = 0, //!< No additional details
469             COHERENCY_SETTING_MODIFY_ENABLE_ENABLEWRITETOTHISDW              = 1, //!< No additional details
470         };
471 
472         //! \brief DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES
473         //! \details
474         //!     Specifies whether sequential consistency of atomic memory operations are
475         //!     supported across multiple GPUs.
476         enum DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES
477         {
478             DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES_ENABLE = 0, //!< Atomic memory operations from all GPUs to the same address is sequentially consistent.
479             DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES_DISABLE = 1, //!< Disable multi-GPU Atomic consistency. Atomic memory operations to the same address is sequentially consistent only if the operations are from the same GPU.
480         };
481 
482         //! \brief DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES
483         //! \details
484         //!     Specifies whether data-consistency on partial memory write operations
485         //!     are supported across multiple GPUs.
486         enum DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES
487         {
488             DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES_ENABLED = 0, //!< Enable data consistency on multi-GPU partial memory writes.
489             DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES_DISABLED = 1, //!< Disable data consistency on multi-GPU partial memory writes. If multiple GPUs write different bytes of the same cacheline, the data may be corrupted.
490         };
491 
492         //! \brief SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
493         //! \details
494         //!     The other fields in this DWord and the following DWord are updated only
495         //!     when this bit is set.
496         enum SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
497         {
498             SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE                 = 0, //!< Ignore the updated address.
499             SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE                  = 1, //!< Modify the address.
500         };
501 
502         //! \brief DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE
503         //! \details
504         //!     The other fields in this DWord and the following DWord are updated only
505         //!     when this bit is set.
506         enum DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE
507         {
508             DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE                 = 0, //!< Ignore the updated address.
509             DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE                  = 1, //!< Modify the address.
510         };
511 
512         //! \brief INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE
513         //! \details
514         //!     The other fields in this DWord and the following DWord are updated
515         //!     only when this bit is set.
516         enum INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE
517         {
518             INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE_DISABLE               = 0, //!< Ignore the updated address.
519             INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE_ENABLE                = 1, //!< Modify the address.
520         };
521 
522         //! \brief INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE
523         //! \details
524         //!     The other fields in this DWord and the following DWord are updated
525         //!     only when this bit is set.
526         enum INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE
527         {
528             INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE_DISABLE                   = 0, //!< Ignore the updated address.
529             INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE_ENABLE                    = 1, //!< Modify the address.
530         };
531 
532         //! \brief GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE
533         //! \details
534         //!     The fields in this DWord are updated only when this bit is set.
535         enum GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE
536         {
537             GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE_DISABLE                  = 0, //!< Ignore the updated bound.
538             GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE_ENABLE                   = 1, //!< Modify the updated bound.
539         };
540 
541         //! \brief DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE
542         //! \details
543         //!     FormatDesc
544         enum DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE
545         {
546             DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE_DISABLE                  = 0, //!< Ignore the updated bound.
547             DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE_ENABLE                   = 1, //!< Modify the updated bound.
548         };
549 
550         //! \brief INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE
551         //! \details
552         //!     FormatDesc
553         enum INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE
554         {
555             INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE_DISABLE                = 0, //!< Ignore the updated bound.
556             INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE_ENABLE                 = 1, //!< Modify the updated bound.
557         };
558 
559         //! \brief INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE
560         //! \details
561         //!     FormatDesc
562         enum INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE
563         {
564             INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE_DISABLE                    = 0, //!< Ignore the updated bound.
565         };
566 
567         //! \brief BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
568         //! \details
569         //!     The other fields in this DWord and the following two DWords are
570         //!     updated only when this bit is set.
571         enum BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE
572         {
573             BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE        = 0, //!< Ignore the updated address
574             BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE         = 1, //!< Modify the address
575         };
576 
577         //! \brief BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE
578         //! \details
579         //!     The other fields in this DWord and the following two DWords are
580         //!     updated only when this bit is set.
581         enum BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE
582         {
583             BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE        = 0, //!< Ignore the updated address
584             BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE         = 1, //!< Modify the address
585         };
586 
587         //! \name Initializations
588 
589         //! \brief Explicit member initialization function
590         STATE_BASE_ADDRESS_CMD();
591 
592         static const size_t dwSize = 22;
593         static const size_t byteSize = 88;
594     };
595 
596     //!
597     //! \brief _3DSTATE_CHROMA_KEY
598     //! \details
599     //!     The 3DSTATE_CHROMA_KEY instruction is used to program texture
600     //!     color/chroma-key key values. A table containing four set of values is
601     //!     supported. The ChromaKey Index sampler state variable is used to select
602     //!     which table entry is associated with the map. Texture chromakey
603     //!     functions are enabled and controlled via use of the ChromaKey Enable
604     //!     texture sampler state variable.Texture Color Key (keying on a paletted
605     //!     texture index) is not supported.
606     //!
607     //!     Workaround
608     //!
609     //!     : This command must be followed by a PIPE_CONTROL with CS Stall bit
610     //!     set.,
611     //!
612     struct _3DSTATE_CHROMA_KEY_CMD
613     {
614         union
615         {
616             struct
617             {
618                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
619                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
620                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
621                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
622                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
623                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
624             };
625             uint32_t                     Value;
626         } DW0;
627         union
628         {
629             struct
630             {
631                 uint32_t                 Reserved32                                       : __CODEGEN_BITFIELD( 0, 29)    ; //!< Reserved
632                 uint32_t                 ChromakeyTableIndex                              : __CODEGEN_BITFIELD(30, 31)    ; //!< ChromaKey Table Index
633             };
634             uint32_t                     Value;
635         } DW1;
636         union
637         {
638             struct
639             {
640                 uint32_t                 ChromakeyLowValue                                                                ; //!< ChromaKey Low Value
641             };
642             uint32_t                     Value;
643         } DW2;
644         union
645         {
646             struct
647             {
648                 uint32_t                 ChromakeyHighValue                                                               ; //!< ChromaKey High Value
649             };
650             uint32_t                     Value;
651         } DW3;
652 
653         //! \name Local enumerations
654 
655         enum _3D_COMMAND_SUB_OPCODE
656         {
657             _3D_COMMAND_SUB_OPCODE_3DSTATECHROMAKEY                          = 4, //!< No additional details
658         };
659 
660         enum _3D_COMMAND_OPCODE
661         {
662             _3D_COMMAND_OPCODE_3DSTATENONPIPELINED                           = 1, //!< No additional details
663         };
664 
665         enum COMMAND_SUBTYPE
666         {
667             COMMAND_SUBTYPE_GFXPIPE3D                                        = 3, //!< No additional details
668         };
669 
670         enum COMMAND_TYPE
671         {
672             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
673         };
674 
675         //! \name Initializations
676 
677         //! \brief Explicit member initialization function
678         _3DSTATE_CHROMA_KEY_CMD();
679 
680         static const size_t dwSize = 4;
681         static const size_t byteSize = 16;
682     };
683 
684     //!
685     //! \brief STATE_SIP
686     //! \details
687     //!     The STATE_SIP command specifies the starting instruction location of the
688     //!     System Routine that is shared by all threads in execution.
689     //!
690     //!     Workaround
691     //!
692     //!     : This command must be followed by a PIPE_CONTROL with CS Stall bit
693     //!     set.,
694     //!
695     struct STATE_SIP_CMD
696     {
697         union
698         {
699             struct
700             {
701                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
702                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
703                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
704                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
705                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
706                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
707             };
708             uint32_t                     Value;
709         } DW0;
710         union
711         {
712             struct
713             {
714                 uint64_t                 Reserved32                                       : __CODEGEN_BITFIELD( 0,  3)    ; //!< Reserved
715                 uint64_t                 SystemInstructionPointer                         : __CODEGEN_BITFIELD( 4, 63)    ; //!< System Instruction Pointer
716             };
717             uint32_t                     Value[2];
718         } DW1_2;
719 
720         //! \name Local enumerations
721 
722         enum _3D_COMMAND_SUB_OPCODE
723         {
724             _3D_COMMAND_SUB_OPCODE_STATESIP                                  = 2, //!< No additional details
725         };
726 
727         enum _3D_COMMAND_OPCODE
728         {
729             _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED                           = 1, //!< No additional details
730         };
731 
732         enum COMMAND_SUBTYPE
733         {
734             COMMAND_SUBTYPE_GFXPIPECOMMON                                    = 0, //!< No additional details
735         };
736 
737         enum COMMAND_TYPE
738         {
739             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
740         };
741 
742         //! \name Initializations
743 
744         //! \brief Explicit member initialization function
745         STATE_SIP_CMD();
746 
747         static const size_t dwSize = 3;
748         static const size_t byteSize = 12;
749     };
750 
751     //!
752     //! \brief _3DSTATE_BINDING_TABLE_POOL_ALLOC
753     //! \details
754     //!     This command is to program the base address and size of the binding
755     //!     table pool. The address to fetch the binding table is based on the
756     //!     Binding Table Pool Base Address and the binding table pointer if the
757     //!     Binding Table Pool is enabled. Otherwise the binding table pointer is an
758     //!     offset from the Surface Base Address.
759     //!
760     struct _3DSTATE_BINDING_TABLE_POOL_ALLOC_CMD
761     {
762         union
763         {
764             struct
765             {
766                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
767                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
768                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
769                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
770                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
771                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
772             };
773             uint32_t                     Value;
774         } DW0;
775         union
776         {
777             struct
778             {
779                 uint64_t                 SurfaceObjectControlState                        : __CODEGEN_BITFIELD( 0,  6)    ; //!< Surface Object Control State
780                 uint64_t                 Reserved39                                       : __CODEGEN_BITFIELD( 7, 11)    ; //!< Reserved
781                 uint64_t                 BindingTablePoolBaseAddress                      : __CODEGEN_BITFIELD(12, 63)    ; //!< Binding Table Pool Base Address
782             };
783             uint32_t                     Value[2];
784         } DW1_2;
785         union
786         {
787             struct
788             {
789                 uint32_t                 Reserved96                                       : __CODEGEN_BITFIELD( 0, 11)    ; //!< Reserved
790                 uint32_t                 BindingTablePoolBufferSize                       : __CODEGEN_BITFIELD(12, 31)    ; //!< BINDING_TABLE_POOL_BUFFER_SIZE
791             };
792             uint32_t                     Value;
793         } DW3;
794 
795         //! \name Local enumerations
796 
797         enum _3D_COMMAND_SUB_OPCODE
798         {
799             _3D_COMMAND_SUB_OPCODE_3DSTATEBINDINGTABLEPOOLALLOC              = 25, //!< No additional details
800         };
801 
802         enum _3D_COMMAND_OPCODE
803         {
804             _3D_COMMAND_OPCODE_3DSTATENONPIPELINED                           = 1, //!< No additional details
805         };
806 
807         enum COMMAND_SUBTYPE
808         {
809             COMMAND_SUBTYPE_GFXPIPE3D                                        = 3, //!< No additional details
810         };
811 
812         enum COMMAND_TYPE
813         {
814             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
815         };
816 
817         //! \brief BINDING_TABLE_POOL_BUFFER_SIZE
818         //! \details
819         //!     This field specifies the size of the buffer in 4K pages. Any access
820         //!     which straddle or go past the end of the buffer will return 0.
821         enum BINDING_TABLE_POOL_BUFFER_SIZE
822         {
823             BINDING_TABLE_POOL_BUFFER_SIZE_NOVALIDDATA                       = 0, //!< There is no valid data in the buffer
824         };
825 
826         //! \name Initializations
827 
828         //! \brief Explicit member initialization function
829         _3DSTATE_BINDING_TABLE_POOL_ALLOC_CMD();
830 
831         static const size_t dwSize = 4;
832         static const size_t byteSize = 16;
833     };
834 
835     //!
836     //! \brief COMPUTE_WALKER
837     //! \details
838     //!     COMPUTE_WALKER spawns threadgroups in 1, 2, or 3 dimensions (X, Y, Z).
839     //!     Each threadgroup is described by Interface Descriptor in this command.
840     //!       Each dispatched thread has a standard payload delivered in R0,
841     //!     including the Indirect Address to fetch the thread's parameters.
842     //!       After the Walker completes dispatching its threads and those threads
843     //!     have completed running, a PostSync operation can write a completion code
844     //!     or a timestamp.
845     //!
846     //!     If the threads spawned by this command are required to observe memory
847     //!     writes performed by threads spawned from a previous command, and if
848     //!     those threads did not perform a Memory Fence before they exited, then
849     //!     software must precede this command with a PIPE_CONTROL with Dataport
850     //!     Flush control.
851     //!
852     struct COMPUTE_WALKER_CMD
853     {
854         union
855         {
856             struct
857             {
858                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
859                 uint32_t                 PredicateEnable                                  : __CODEGEN_BITFIELD( 8,  8)    ; //!< Predicate Enable
860                 uint32_t                 WorkloadPartitionEnable                          : __CODEGEN_BITFIELD( 9,  9)    ; //!< Workload Partition Enable
861                 uint32_t                 IndirectParameterEnable                          : __CODEGEN_BITFIELD(10, 10)    ; //!< Indirect Parameter Enable
862                 uint32_t                 Reserved11                                       : __CODEGEN_BITFIELD(11, 12)    ; //!< Reserved
863                 uint32_t                 DispatchComplete                                 : __CODEGEN_BITFIELD(13, 13)    ; //!< Dispatch Complete
864                 uint32_t                 SystolicModeEnable                               : __CODEGEN_BITFIELD(14, 14)    ; //!< Systolic Mode Enable
865                 uint32_t                 CfeSubopcodeVariant                              : __CODEGEN_BITFIELD(15, 17)    ; //!< CFE_SUBOPCODE_VARIANT
866                 uint32_t                 CfeSubopcode                                     : __CODEGEN_BITFIELD(18, 23)    ; //!< CFE_SUBOPCODE
867                 uint32_t                 ComputeCommandOpcode                             : __CODEGEN_BITFIELD(24, 26)    ; //!< COMPUTE_COMMAND_OPCODE
868                 uint32_t                 Pipeline                                         : __CODEGEN_BITFIELD(27, 28)    ; //!< PIPELINE
869                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
870             };
871             uint32_t                     Value;
872         } DW0;
873         union
874         {
875             struct
876             {
877                 uint32_t                 Reserved32                                       : __CODEGEN_BITFIELD( 0,  7)    ; //!< Reserved
878                 uint32_t                 DebugObjectId                                    : __CODEGEN_BITFIELD( 8, 31)    ; //!< Debug: Object ID
879             };
880             uint32_t                     Value;
881         } DW1;
882         union
883         {
884             struct
885             {
886                 uint32_t                 IndirectDataLength                               : __CODEGEN_BITFIELD( 0, 16)    ; //!< Indirect Data Length
887                 uint32_t                 L3PrefetchDisable                                : __CODEGEN_BITFIELD(17, 17)    ; //!< L3 prefetch disable
888                 uint32_t                 PartitionDispatchParameter                       : __CODEGEN_BITFIELD(18, 29)    ; //!< Partition Dispatch Parameter
889                 uint32_t                 PartitionType                                    : __CODEGEN_BITFIELD(30, 31)    ; //!< PARTITION_TYPE
890             };
891             uint32_t                     Value;
892         } DW2;
893         union
894         {
895             struct
896             {
897                 uint32_t                 Reserved96                                       : __CODEGEN_BITFIELD( 0,  5)    ; //!< Reserved
898                 uint32_t                 IndirectDataStartAddress                         : __CODEGEN_BITFIELD( 6, 31)    ; //!< Indirect Data Start Address
899             };
900             uint32_t                     Value;
901         } DW3;
902         union
903         {
904             struct
905             {
906                 uint32_t                 Reserved128                                      : __CODEGEN_BITFIELD( 0, 16)    ; //!< Reserved
907                 uint32_t                 MessageSIMD                                      : __CODEGEN_BITFIELD(17, 18)    ; //!< MESSAGE_SIMD
908                 uint32_t                 TileLayout                                       : __CODEGEN_BITFIELD(19, 21)    ; //!< TILE_LAYOUT
909                 uint32_t                 WalkOrder                                        : __CODEGEN_BITFIELD(22, 24)    ; //!< WALK_ORDER
910                 uint32_t                 EmitInlineParameter                              : __CODEGEN_BITFIELD(25, 25)    ; //!< Emit Inline Parameter
911                 uint32_t                 EmitLocal                                        : __CODEGEN_BITFIELD(26, 28)    ; //!< EMIT_LOCAL
912                 uint32_t                 GenerateLocalId                                  : __CODEGEN_BITFIELD(29, 29)    ; //!< Generate Local ID
913                 uint32_t                 SIMDSize                                         : __CODEGEN_BITFIELD(30, 31)    ; //!< SIMD_SIZE
914             };
915             uint32_t                     Value;
916         } DW4;
917         union
918         {
919             struct
920             {
921                 uint32_t                 ExecutionMask                                                                    ; //!< Execution Mask
922             };
923             uint32_t                     Value;
924         } DW5;
925         union
926         {
927             struct
928             {
929                 uint32_t                 LocalXMaximum                                    : __CODEGEN_BITFIELD( 0,  9)    ; //!< Local X Maximum
930                 uint32_t                 LocalYMaximum                                    : __CODEGEN_BITFIELD(10, 19)    ; //!< Local Y Maximum
931                 uint32_t                 LocalZMaximum                                    : __CODEGEN_BITFIELD(20, 29)    ; //!< Local Z Maximum
932                 uint32_t                 Reserved222                                      : __CODEGEN_BITFIELD(30, 31)    ; //!< Reserved
933             };
934             uint32_t                     Value;
935         } DW6;
936         union
937         {
938             struct
939             {
940                 uint32_t                 ThreadGroupIDXDimension                                                          ; //!< Thread Group ID X Dimension
941             };
942             uint32_t                     Value;
943         } DW7;
944         union
945         {
946             struct
947             {
948                 uint32_t                 ThreadGroupIDYDimension                                                          ; //!< Thread Group ID Y Dimension
949             };
950             uint32_t                     Value;
951         } DW8;
952         union
953         {
954             struct
955             {
956                 uint32_t                 ThreadGroupIDZDimension                                                          ; //!< Thread Group ID Z Dimension
957             };
958             uint32_t                     Value;
959         } DW9;
960         union
961         {
962             struct
963             {
964                 uint32_t                 ThreadGroupIDStartingX                                                           ; //!< Thread Group ID Starting X
965             };
966             uint32_t                     Value;
967         } DW10;
968         union
969         {
970             struct
971             {
972                 uint32_t                 ThreadGroupIDStartingY                                                           ; //!< Thread Group ID Starting Y
973             };
974             uint32_t                     Value;
975         } DW11;
976         union
977         {
978             struct
979             {
980                 uint32_t                 ThreadGroupIDStartingZ                                                           ; //!< Thread Group ID Starting Z
981             };
982             uint32_t                     Value;
983         } DW12;
984         union
985         {
986             struct
987             {
988                 uint32_t                 PartitionId                                                                      ; //!< Partition ID
989             };
990             uint32_t                     Value;
991         } DW13;
992         union
993         {
994             struct
995             {
996                 uint32_t                 PartitionSize                                                                    ; //!< Partition Size
997             };
998             uint32_t                     Value;
999         } DW14;
1000         union
1001         {
1002             struct
1003             {
1004                 uint32_t                 PreemptX                                                                         ; //!< Preempt X
1005             };
1006             uint32_t                     Value;
1007         } DW15;
1008         union
1009         {
1010             struct
1011             {
1012                 uint32_t                 PreemptY                                                                         ; //!< Preempt Y
1013             };
1014             uint32_t                     Value;
1015         } DW16;
1016         union
1017         {
1018             struct
1019             {
1020                 uint32_t                 PreemptZ                                                                         ; //!< Preempt Z
1021             };
1022             uint32_t                     Value;
1023         } DW17;
1024         union
1025         {
1026             struct
1027             {
1028                 uint32_t                 WalkerId                                         : __CODEGEN_BITFIELD( 0,  3)    ; //!< Walker ID
1029                 uint32_t                 Reserved580                                      : __CODEGEN_BITFIELD( 4,  7)    ; //!< Reserved
1030                 uint32_t                 OverDispatchTgCount                              : __CODEGEN_BITFIELD( 8, 23)    ; //!< Over dispatch TG count
1031                 uint32_t                 Reserved600                                      : __CODEGEN_BITFIELD(24, 31)    ; //!< Reserved
1032             };
1033             uint32_t                     Value;
1034         } DW18;
1035 
1036         //!
1037         //! \brief INTERFACE_DESCRIPTOR_DATA
1038         //! \details
1039         //!
1040         //!
1041         struct INTERFACE_DESCRIPTOR_DATA_CMD
1042         {
1043             union
1044             {
1045                 struct
1046                 {
1047                     uint32_t                 Reserved0                                        : __CODEGEN_BITFIELD( 0,  5)    ; //!< Reserved
1048                     uint32_t                 KernelStartPointer                               : __CODEGEN_BITFIELD( 6, 31)    ; //!< Kernel Start Pointer
1049                 };
1050                 uint32_t                     Value;
1051             } DW0;
1052             union
1053             {
1054                 struct
1055                 {
1056                     uint32_t                 Reserved32                                                                       ; //!< Reserved
1057                 };
1058                 uint32_t                     Value;
1059             } DW1;
1060             union
1061             {
1062                 struct
1063                 {
1064                     uint32_t                 Reserved64                                       : __CODEGEN_BITFIELD( 0,  6)    ; //!< Reserved
1065                     uint32_t                 SoftwareExceptionEnable                          : __CODEGEN_BITFIELD( 7,  7)    ; //!< Software Exception Enable
1066                     uint32_t                 Reserved72                                       : __CODEGEN_BITFIELD( 8, 10)    ; //!< Reserved
1067                     uint32_t                 MaskStackExceptionEnable                         : __CODEGEN_BITFIELD(11, 11)    ; //!< Mask Stack Exception Enable
1068                     uint32_t                 Reserved76                                       : __CODEGEN_BITFIELD(12, 12)    ; //!< Reserved
1069                     uint32_t                 IllegalOpcodeExceptionEnable                     : __CODEGEN_BITFIELD(13, 13)    ; //!< Illegal Opcode Exception Enable
1070                     uint32_t                 Reserved78                                       : __CODEGEN_BITFIELD(14, 15)    ; //!< Reserved
1071                     uint32_t                 FloatingPointMode                                : __CODEGEN_BITFIELD(16, 16)    ; //!< FLOATING_POINT_MODE
1072                     uint32_t                 Reserved81                                       : __CODEGEN_BITFIELD(17, 17)    ; //!< Reserved
1073                     uint32_t                 SingleProgramFlow                                : __CODEGEN_BITFIELD(18, 18)    ; //!< SINGLE_PROGRAM_FLOW
1074                     uint32_t                 DenormMode                                       : __CODEGEN_BITFIELD(19, 19)    ; //!< DENORM_MODE
1075                     uint32_t                 ThreadPreemption                                 : __CODEGEN_BITFIELD(20, 20)    ; //!< THREAD_PREEMPTION
1076                     uint32_t                 Reserved85                                       : __CODEGEN_BITFIELD(21, 31)    ; //!< Reserved
1077                 };
1078                 uint32_t                     Value;
1079             } DW2;
1080             union
1081             {
1082                 struct
1083                 {
1084                     uint32_t                 Reserved96                                       : __CODEGEN_BITFIELD( 0,  1)    ; //!< Reserved
1085                     uint32_t                 SamplerCount                                     : __CODEGEN_BITFIELD( 2,  4)    ; //!< SAMPLER_COUNT
1086                     uint32_t                 SamplerStatePointer                              : __CODEGEN_BITFIELD( 5, 31)    ; //!< Sampler State Pointer
1087                 };
1088                 uint32_t                     Value;
1089             } DW3;
1090             union
1091             {
1092                 struct
1093                 {
1094                     uint32_t                 BindingTableEntryCount                           : __CODEGEN_BITFIELD( 0,  4)    ; //!< BINDING_TABLE_ENTRY_COUNT
1095                     uint32_t                 BindingTablePointer                              : __CODEGEN_BITFIELD( 5, 20)    ; //!< Binding Table Pointer
1096                     uint32_t                 Reserved149                                      : __CODEGEN_BITFIELD(21, 31)    ; //!< Reserved
1097                 };
1098                 uint32_t                     Value;
1099             } DW4;
1100             union
1101             {
1102                 struct
1103                 {
1104                     uint32_t                 NumberOfThreadsInGpgpuThreadGroup                : __CODEGEN_BITFIELD( 0,  9)    ; //!< Number of Threads in GPGPU Thread Group
1105                     uint32_t                 Reserved170                                      : __CODEGEN_BITFIELD(10, 12)    ; //!< Reserved
1106                     uint32_t                 ThreadGroupForwardProgressGuarantee              : __CODEGEN_BITFIELD(13, 13)    ; //!< THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE
1107                     uint32_t                 Reserved174                                      : __CODEGEN_BITFIELD(14, 15)    ; //!< Reserved
1108                     uint32_t                 SharedLocalMemorySize                            : __CODEGEN_BITFIELD(16, 20)    ; //!< SHARED_LOCAL_MEMORY_SIZE
1109                     uint32_t                 Reserved181                                      : __CODEGEN_BITFIELD(21, 21)    ; //!< Reserved
1110                     uint32_t                 RoundingMode                                     : __CODEGEN_BITFIELD(22, 23)    ; //!< ROUNDING_MODE
1111                     uint32_t                 Reserved184                                      : __CODEGEN_BITFIELD(24, 25)    ; //!< Reserved
1112                     uint32_t                 ThreadGroupDispatchSize                          : __CODEGEN_BITFIELD(26, 27)    ; //!< THREAD_GROUP_DISPATCH_SIZE
1113                     uint32_t                 NumberOfBarriers                                 : __CODEGEN_BITFIELD(28, 30)    ; //!< NUMBER_OF_BARRIERS
1114                     uint32_t                 BtdMode                                          : __CODEGEN_BITFIELD(31, 31)    ; //!< BTD_MODE
1115                 };
1116                 uint32_t                     Value;
1117             } DW5;
1118             union
1119             {
1120                 struct
1121                 {
1122                     uint32_t                 Reserved192                                                                      ; //!< Reserved
1123                 };
1124                 uint32_t                     Value;
1125             } DW6;
1126             union
1127             {
1128                 struct
1129                 {
1130                     uint32_t                 PreferredSlmAllocationSizePerSubslice            : __CODEGEN_BITFIELD( 0,  3)    ; //!< PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
1131                     uint32_t                 Reserved228                                      : __CODEGEN_BITFIELD( 4, 31)    ; //!< Reserved
1132                 };
1133                 uint32_t                     Value;
1134             } DW7;
1135 
1136             //! \name Local enumerations
1137 
1138             //! \brief FLOATING_POINT_MODE
1139             //! \details
1140             //!     Specifies the floating point mode used by the dispatched thread.
1141             enum FLOATING_POINT_MODE
1142             {
1143                 FLOATING_POINT_MODE_IEEE_754                                     = 0, //!< No additional details
1144                 FLOATING_POINT_MODE_ALTERNATE                                    = 1, //!< No additional details
1145             };
1146 
1147             //! \brief SINGLE_PROGRAM_FLOW
1148             //! \details
1149             //!     Specifies whether the kernel program has a single program flow (SIMDnxm
1150             //!     with m = 1) or multiple program flows (SIMDnxm with m > 1).
1151             enum SINGLE_PROGRAM_FLOW
1152             {
1153                 SINGLE_PROGRAM_FLOW_MULTIPLE                                     = 0, //!< No additional details
1154                 SINGLE_PROGRAM_FLOW_SINGLE                                       = 1, //!< No additional details
1155             };
1156 
1157             //! \brief DENORM_MODE
1158             //! \details
1159             //!     This field specifies how Float denormalized numbers are handles in the
1160             //!     dispatched thread.
1161             enum DENORM_MODE
1162             {
1163                 DENORM_MODE_FTZ                                                  = 0, //!< Float denorms will be flushed to zero when appearing as inputs; denorms will never come out of instructions. Double precision float and half precision float numbers are not flushed to zero.
1164                 DENORM_MODE_SETBYKERNEL                                          = 1, //!< Denorms will be handled in by kernel.
1165             };
1166 
1167             //! \brief THREAD_PREEMPTION
1168             //! \details
1169             //!     This field specifies whether, when dispatched, the thread is allowed to
1170             //!     stop in middle on receiving mid-thread pre-emption request.
1171             enum THREAD_PREEMPTION
1172             {
1173                 THREAD_PREEMPTION_DISABLE                                        = 0, //!< Thread is pre-empted only in case of page-fault.
1174                 THREAD_PREEMPTION_ENABLE                                         = 1, //!< Thread is pre-empted on receiving pre-emption indication.
1175             };
1176 
1177             //! \brief SAMPLER_COUNT
1178             //! \details
1179             //!     Specifies how many samplers (in multiples of 4) the kernel uses. Used
1180             //!     only for prefetching the associated sampler state entries.
1181             enum SAMPLER_COUNT
1182             {
1183                 SAMPLER_COUNT_NOSAMPLERSUSED                                     = 0, //!< No additional details
1184                 SAMPLER_COUNT_BETWEEN1AND4SAMPLERSUSED                           = 1, //!< No additional details
1185                 SAMPLER_COUNT_BETWEEN5AND8SAMPLERSUSED                           = 2, //!< No additional details
1186                 SAMPLER_COUNT_BETWEEN9AND12SAMPLERSUSED                          = 3, //!< No additional details
1187                 SAMPLER_COUNT_BETWEEN13AND16SAMPLERSUSED                         = 4, //!< No additional details
1188             };
1189 
1190             //! \brief BINDING_TABLE_ENTRY_COUNT
1191             //! \details
1192             //!     Specifies how many binding table entries the kernel uses. Used only for
1193             //!     prefetching of the binding table entries and associated surface state.
1194             enum BINDING_TABLE_ENTRY_COUNT
1195             {
1196                 BINDING_TABLE_ENTRY_COUNT_PREFETCHDISABLED                       = 0, //!< No additional details
1197             };
1198 
1199             //! \brief THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE
1200             //! \details
1201             //!     Set by the kernel if TG requires synchronization at memory. If this bit
1202             //!     is set, HW must enable the barrier when mid thread preemption is
1203             //!     enabled.
1204             enum THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE
1205             {
1206                 THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE_DISABLE                  = 0, //!< CFEG HW does not set an implicit barrier when WMTP is enabled
1207                 THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE_ENABLE                   = 1, //!< CFEG HW forces an implicit barrier when WMTP is enabled.
1208             };
1209 
1210             //! \brief SHARED_LOCAL_MEMORY_SIZE
1211             //! \details
1212             //!     This field indicates how much Shared Local Memory the thread group
1213             //!     requires.
1214             //!     If the barriers are not enabled,HW will enable at least 1 barrier for
1215             //!     Mid thread preemption to work.
1216             enum SHARED_LOCAL_MEMORY_SIZE
1217             {
1218                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES0K                            = 0, //!< No additional details
1219                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES1K                            = 1, //!< No additional details
1220                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES2K                            = 2, //!< No additional details
1221                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES4K                            = 3, //!< No additional details
1222                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES8K                            = 4, //!< No additional details
1223                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES16K                           = 5, //!< No additional details
1224                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES32K                           = 6, //!< No additional details
1225                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES64K                           = 7, //!< No additional details
1226                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES24K                           = 8, //!< No additional details
1227                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES48K                           = 9, //!< No additional details
1228                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES96K                           = 10, //!< No additional details
1229                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES128K                          = 11, //!< No additional details
1230                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES192K                          = 12, //!< No additional details
1231                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES256K                          = 13, //!< No additional details
1232                 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES384K                          = 14, //!< No additional details
1233             };
1234 
1235             enum ROUNDING_MODE
1236             {
1237                 ROUNDING_MODE_RTNE                                               = 0, //!< Round to Nearest Even
1238                 ROUNDING_MODE_RU                                                 = 1, //!< Round toward +Infinity
1239                 ROUNDING_MODE_RD                                                 = 2, //!< Round toward -Infinity
1240                 ROUNDING_MODE_RTZ                                                = 3, //!< Round toward Zero
1241             };
1242 
1243             //! \brief THREAD_GROUP_DISPATCH_SIZE
1244             //! \details
1245             //!     Provides a mechanism for Software to tune the settings based on WLs
1246             //!     to evenly distribute the threads across the entire m/c.
1247             //!     The recommended settings is just a guidance and not a programming
1248             //!     requirement.
1249             enum THREAD_GROUP_DISPATCH_SIZE
1250             {
1251                 THREAD_GROUP_DISPATCH_SIZE_TGSIZE8                               = 0, //!< The dispatch size is 8 thread groups.
1252                 THREAD_GROUP_DISPATCH_SIZE_TGSIZE4                               = 1, //!< The dispatch size is 4 thread groups.
1253                 THREAD_GROUP_DISPATCH_SIZE_TGSIZE2                               = 2, //!< The dispatch size is 2 thread groups.
1254                 THREAD_GROUP_DISPATCH_SIZE_TGSIZE1                               = 3, //!< The dispatch size is 1 thread groups.
1255             };
1256 
1257             //! \brief NUMBER_OF_BARRIERS
1258             //! \details
1259             //!     Specifies number of barriers in the threadgroup.
1260             enum NUMBER_OF_BARRIERS
1261             {
1262                 NUMBER_OF_BARRIERS_NONE                                          = 0, //!< No additional details
1263                 NUMBER_OF_BARRIERS_B1                                            = 1, //!< No additional details
1264                 NUMBER_OF_BARRIERS_B2                                            = 2, //!< No additional details
1265                 NUMBER_OF_BARRIERS_B4                                            = 3, //!< No additional details
1266                 NUMBER_OF_BARRIERS_B8                                            = 4, //!< No additional details
1267                 NUMBER_OF_BARRIERS_B16                                           = 5, //!< No additional details
1268                 NUMBER_OF_BARRIERS_B24                                           = 6, //!< No additional details
1269                 NUMBER_OF_BARRIERS_B32                                           = 7, //!< No additional details
1270             };
1271 
1272             //! \brief BTD_MODE
1273             //! \details
1274             //!     If this field is valid, it means that the Compute pipeline is
1275             //!     dispatching BTD threads.
1276             enum BTD_MODE
1277             {
1278                 BTD_MODE_DISABLE                                                 = 0, //!< Normal thread dispatch
1279                 BTD_MODE_ENABLE                                                  = 1, //!< When walker dispatched compute kernels either perform messages to the Bindless Thread Dispatch (BTD) shared function or Ray Tracing HW shared function, this bit must be enabled.When this bit is enabled, neither SLM nor barrier is available.
1280             };
1281 
1282             //! \brief PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
1283             //! \details
1284             //!     For products where SLM and Subslice L1 cacheshares a common,
1285             //!     re-partitionable RAM, this field indicates the preferred SLM size per
1286             //!     Subslice for this dispatch. The SLM size programmed here should be >=
1287             //!     the per thread-group SLM size programmed in DW[5][20:16].
1288             enum PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
1289             {
1290                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K          = 0, //!< No additional details
1291                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES16K         = 1, //!< No additional details
1292                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES32K         = 2, //!< No additional details
1293                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES64K         = 3, //!< No additional details
1294                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K         = 4, //!< No additional details
1295                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES128K        = 5, //!< No additional details
1296                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES160K        = 6, //!< No additional details
1297                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES192K        = 7, //!< No additional details
1298                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES224K        = 8, //!< No additional details
1299                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES256K        = 9, //!< No additional details
1300                 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES384K        = 10, //!< No additional details
1301             };
1302 
1303             //! \name Initializations
1304 
1305             //! \brief Explicit member initialization function
1306             INTERFACE_DESCRIPTOR_DATA_CMD();
1307 
1308             static const size_t dwSize = 8;
1309             static const size_t byteSize = 32;
1310         } InterfaceDescriptor;
1311 
1312         //!
1313         //! \brief POSTSYNC_DATA
1314         //! \details
1315         //!
1316         //!
1317         struct POSTSYNC_DATA_CMD
1318         {
1319             union
1320             {
1321                 struct
1322                 {
1323                     uint32_t                 Operation                                        : __CODEGEN_BITFIELD( 0,  1)    ; //!< OPERATION
1324                     uint32_t                 DataportPipelineFlush                            : __CODEGEN_BITFIELD( 2,  2)    ; //!< Dataport Pipeline Flush
1325                     uint32_t                 Reserved3                                        : __CODEGEN_BITFIELD( 3,  3)    ; //!< Reserved
1326                     uint32_t                 Mocs                                             : __CODEGEN_BITFIELD( 4, 10)    ; //!< MOCS
1327                     uint32_t                 SystemMemoryFenceRequest                         : __CODEGEN_BITFIELD(11, 11)    ; //!< System Memory Fence Request
1328                     uint32_t                 DataportSubsliceCacheFlush                       : __CODEGEN_BITFIELD(12, 12)    ; //!< Dataport Subslice Cache Flush
1329                     uint32_t                 Reserved13                                       : __CODEGEN_BITFIELD(13, 31)    ; //!< Reserved
1330                 };
1331                 uint32_t                     Value;
1332             } DW0;
1333             union
1334             {
1335                 struct
1336                 {
1337                     uint64_t                 DestinationAddress                                                               ; //!< Destination Address
1338                 };
1339                 uint32_t                     Value[2];
1340             } DW1_2;
1341             union
1342             {
1343                 struct
1344                 {
1345                     uint64_t                 ImmediateData                                                                    ; //!< Immediate Data
1346                 };
1347                 uint32_t                     Value[2];
1348             } DW3_4;
1349 
1350             //! \name Local enumerations
1351 
1352             enum OPERATION
1353             {
1354                 OPERATION_NOWRITE                                                = 0, //!< The Destination Address and Immediate Data fields are ignored.
1355                 OPERATION_WRITEIMMEDIATEDATA                                     = 1, //!< Writes 8 bytes (64 bits) of Immediate Data to the Destination Address.
1356                 OPERATION_WRITETIMESTAMP                                         = 3, //!< Writes 32 bytes (256bits) of Timestamp Data to the Destination Address.The Immediate Data field is ignored.The timestamp layout :[0] = 64b Context Timestamp Start[1] = 64b Global Timestamp Start[2] = 64b Context Timestamp End[3] = 64b Global Timestamp End
1357             };
1358 
1359             //! \name Initializations
1360 
1361             //! \brief Explicit member initialization function
1362             POSTSYNC_DATA_CMD();
1363 
1364             static const size_t dwSize = 5;
1365             static const size_t byteSize = 20;
1366         } PostSync;
1367 
1368         struct INLINE_DATA_CMD
1369         {
1370             uint32_t Value[8] = {0};
1371 
1372             //! \brief Explicit member initialization function
INLINE_DATA_CMDCmd::COMPUTE_WALKER_CMD::INLINE_DATA_CMD1373             INLINE_DATA_CMD() {}
1374             static const size_t dwSize   = 8;
1375             static const size_t byteSize = 32;
1376         } InlineData;
1377 
1378         //! \name Local enumerations
1379 
1380         enum CFE_SUBOPCODE_VARIANT
1381         {
1382             CFE_SUBOPCODE_VARIANT_STANDARD                                   = 0, //!< No additional details
1383             CFE_SUBOPCODE_VARIANT_PASS1_RESUME                               = 1, //!< Resumption of Compute Walkers that has Thread groups only in Pass1 (EOT Preempt Saved), recorded in context image to continue execution after preemption.
1384             CFE_SUBOPCODE_VARIANT_PASS2_RESUME                               = 2, //!< Resumption of Compute Walkers that only has Pass2 (EOT NOT-RUN) thread groups recorded in context image to continue execution after preemption.
1385             CFE_SUBOPCODE_VARIANT_BTDPASS2                                   = 3, //!< This encoding is used by BTD over dispatched threads.  This value is not saved in the Walker Context image
1386             CFE_SUBOPCODE_VARIANT_PASS1PASS2_RESUME                          = 4, //!< Resumption of Compute Walkers that has a Mix of Pass1 (EOT Preempt Saved) and Pass2 (EOT NOT-RUN) thread groups recorded in context image to continue execution after preemption.
1387             CFE_SUBOPCODE_VARIANT_TG_RESUME                                  = 5, //!< Resumption of COMPUTE_WALKER that was preempted at a Thread group and has completed execution of previous TGs, recorded in context image to continue execution after preemption.
1388             CFE_SUBOPCODE_VARIANT_WDONE                                      = 7, //!< Walker completed execution of all TGs.
1389         };
1390 
1391         enum CFE_SUBOPCODE
1392         {
1393             CFE_SUBOPCODE_COMPUTEWALKER                                      = 2, //!< No additional details
1394         };
1395 
1396         enum COMPUTE_COMMAND_OPCODE
1397         {
1398             COMPUTE_COMMAND_OPCODE_NEWCFECOMMAND                             = 2, //!< No additional details
1399         };
1400 
1401         enum PIPELINE
1402         {
1403             PIPELINE_COMPUTE                                                 = 2, //!< No additional details
1404         };
1405 
1406         enum COMMAND_TYPE
1407         {
1408             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
1409         };
1410 
1411         //! \brief PARTITION_TYPE
1412         //! \details
1413         //!     Specifies whether the command is executed by multiple partitions.
1414         //!       When partitioned, the X or Y or Z dispatches are split at
1415         //!     Partition Size boundaries.
1416         enum PARTITION_TYPE
1417         {
1418             PARTITION_TYPE_DISABLED                                          = 0, //!< The command is not partitioned. Partition ID and Partition Size are ignored.
1419             PARTITION_TYPE_X                                                 = 1, //!< The command is partitioned in the X dimension. The X walk is between (PartitionID * PartitionSize) <= X < ((PartitionID+1)*PartitionSize). All Y and Z walks are performed in this partition.
1420             PARTITION_TYPE_Y                                                 = 2, //!< The command is partitioned in the Y dimension. The Y walk is between (PartitionID * PartitionSize) <= Y < ((PartitionID+1)*PartitionSize). All X and Z walks are performed in this partition.
1421             PARTITION_TYPE_Z                                                 = 3, //!< The command is partitioned in the Z dimension. The Z walk is between (PartitionID * PartitionSize) <= Z < ((PartitionID+1)*PartitionSize). All X and Y walks are performed in this partition.
1422         };
1423 
1424         //! \brief MESSAGE_SIMD
1425         //! \details
1426         //!     Specifies the SIMD size of the messages used to access the local data.
1427         //!     When the message size is less than the thread SIMD size, then the Local
1428         //!     ID are batched so that the smaller message SIMD size keep full cache
1429         //!     lines together in fused threads.
1430         enum MESSAGE_SIMD
1431         {
1432             MESSAGE_SIMD_SIMT16                                              = 1, //!< No additional details
1433             MESSAGE_SIMD_SIMT32                                              = 2, //!< No additional details
1434         };
1435 
1436         //! \brief TILE_LAYOUT
1437         //! \details
1438         //!     Specifies whether 2D and 3D surfaces are stored in Linear or TileY
1439         //!     layouts. The local ID values are batched together to keep full cache
1440         //!     lines together in the same SIMD thread.
1441         enum TILE_LAYOUT
1442         {
1443             TILE_LAYOUT_LINEAR                                               = 0, //!< No additional details
1444             TILE_LAYOUT_TILEY32BPE                                           = 1, //!< No additional details
1445             TILE_LAYOUT_TILEY64BPE                                           = 2, //!< No additional details
1446             TILE_LAYOUT_TILEY128BPE                                          = 3, //!< No additional details
1447         };
1448 
1449         //! \brief WALK_ORDER
1450         //! \details
1451         //!     Specifies which dimensions are the first and second priority order for
1452         //!     binding together in SIMD threads. In the values below, 0 is the first
1453         //!     priority and 1 is the second priority.
1454         enum WALK_ORDER
1455         {
1456             WALK_ORDER_WALK012                                               = 0, //!< Normal Linear walk order
1457             WALK_ORDER_WALK021                                               = 1, //!< No additional details
1458             WALK_ORDER_WALK102                                               = 2, //!< Normal TileY walk order
1459             WALK_ORDER_WALK120                                               = 3, //!< No additional details
1460             WALK_ORDER_WALK201                                               = 4, //!< No additional details
1461             WALK_ORDER_WALK210                                               = 5, //!< No additional details
1462         };
1463 
1464         //! \brief EMIT_LOCAL
1465         //! \details
1466         //!     These bits identify whether the register payload for Local X/Y/Z
1467         //!     indices will be present. Bit 26 is X, Bit 27 is Y, and Bit 28 is Z.
1468         //!     Separate <a
1469         //!     href="https:gfxspecs.intel.com/Predator/Home/Index/55403">GPGPU_LOCALID</a>
1470         //!     register payloads aregenerated when the corresponding bit isset.
1471         //!     If Generate Local ID is enabled, then the thread dispatcher generates
1472         //!     the corresponding Local X/Y/Z index values, using the Local X/Y/Z
1473         //!     Maximum values from DW6 of this command. For any enable bit that is not
1474         //!     set, the corresponding Local ID will not be generated and that register
1475         //!     will not be emitted into the per-thread payload. When an enable bit is
1476         //!     not set, its corresponding Local Maximum value in DW6 must be 0.
1477         enum EMIT_LOCAL
1478         {
1479             EMIT_LOCAL_EMITNONE                                              = 0, //!< No additional details
1480             EMIT_LOCAL_EMITX                                                 = 1, //!< No additional details
1481             EMIT_LOCAL_EMITXY                                                = 3, //!< No additional details
1482             EMIT_LOCAL_EMITXYZ                                               = 7, //!< No additional details
1483         };
1484 
1485         //! \brief SIMD_SIZE
1486         //! \details
1487         //!     This field determines the size of the payload and the number of bits of
1488         //!     the execution mask that are expected.  The kernel pointed to by the
1489         //!     interface descriptor should match the SIMD declared here.
1490         enum SIMD_SIZE
1491         {
1492             SIMD_SIZE_SIMT16                                                 = 1, //!< 16 LSBs of the execution mask are used
1493             SIMD_SIZE_SIMT32                                                 = 2, //!< 32 bits of execution mask used
1494         };
1495 
1496         //! \name Initializations
1497 
1498         //! \brief Explicit member initialization function
1499         COMPUTE_WALKER_CMD();
1500 
1501         static const size_t dwSize = 40;
1502         static const size_t byteSize = 160;
1503     };
1504 
1505     //!
1506     //! \brief CFE_STATE
1507     //! \details
1508     //!     Set the compute pipeline state.
1509     //!
1510     struct CFE_STATE_CMD
1511     {
1512         union
1513         {
1514             struct
1515             {
1516                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
1517                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
1518                 uint32_t                 CfeSubopcodeVariant                              : __CODEGEN_BITFIELD(16, 17)    ; //!< CFE_SUBOPCODE_VARIANT
1519                 uint32_t                 CfeSubopcode                                     : __CODEGEN_BITFIELD(18, 23)    ; //!< CFE_SUBOPCODE
1520                 uint32_t                 ComputeCommandOpcode                             : __CODEGEN_BITFIELD(24, 26)    ; //!< COMPUTE_COMMAND_OPCODE
1521                 uint32_t                 Pipeline                                         : __CODEGEN_BITFIELD(27, 28)    ; //!< PIPELINE
1522                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
1523             };
1524             uint32_t                     Value;
1525         } DW0;
1526         union
1527         {
1528             struct
1529             {
1530                 uint32_t                 Reserved32                                       : __CODEGEN_BITFIELD( 0,  9)    ; //!< Reserved
1531                 uint32_t                 ScratchSpaceBuffer                               : __CODEGEN_BITFIELD(10, 31)    ; //!< Scratch Space Buffer
1532             };
1533             uint32_t                     Value;
1534         } DW1;
1535         union
1536         {
1537             struct
1538             {
1539                 uint32_t                 Reserved64                                                                       ; //!< Reserved
1540             };
1541             uint32_t                     Value;
1542         } DW2;
1543         union
1544         {
1545             struct
1546             {
1547                 uint32_t                 ControlsTheNumberOfStackidsForRayTracingSubsystem : __CODEGEN_BITFIELD( 0,  1)    ; //!< CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM
1548                 uint32_t                 Reserved98                                       : __CODEGEN_BITFIELD( 2,  9)    ; //!< Reserved
1549                 uint32_t                 LargeGrfThreadAdjustDisable                      : __CODEGEN_BITFIELD(10, 10)    ; //!< LARGE_GRF_THREAD_ADJUST_DISABLE
1550                 uint32_t                 ComputeOverdispatchDisable                       : __CODEGEN_BITFIELD(11, 11)    ; //!< COMPUTE_OVERDISPATCH_DISABLE
1551                 uint32_t                 ComputeDispatchAllWalkerEnable                   : __CODEGEN_BITFIELD(12, 12)    ; //!< COMPUTE_DISPATCH_ALL_WALKER_ENABLE
1552                 uint32_t                 Reserved109                                      : __CODEGEN_BITFIELD(13, 13)    ; //!< Reserved
1553                 uint32_t                 OverDispatchControl                              : __CODEGEN_BITFIELD(14, 15)    ; //!< OVER_DISPATCH_CONTROL
1554                 uint32_t                 MaximumNumberOfThreads                           : __CODEGEN_BITFIELD(16, 31)    ; //!< Maximum Number of Threads
1555             };
1556             uint32_t                     Value;
1557         } DW3;
1558         union
1559         {
1560             struct
1561             {
1562                 uint32_t                 Reserved128                                                                      ; //!< Reserved
1563             };
1564             uint32_t                     Value;
1565         } DW4;
1566         union
1567         {
1568             struct
1569             {
1570                 uint32_t                 ResumeIndicatorDebugkey                          : __CODEGEN_BITFIELD( 0,  0)    ; //!< Resume indicator debugkey
1571                 uint32_t                 WalkerNumberDebugkey                             : __CODEGEN_BITFIELD( 1, 10)    ; //!< Walker number debugkey
1572                 uint32_t                 Reserved171                                      : __CODEGEN_BITFIELD(11, 31)    ; //!< Reserved
1573             };
1574             uint32_t                     Value;
1575         } DW5;
1576 
1577         //! \name Local enumerations
1578 
1579         enum CFE_SUBOPCODE_VARIANT
1580         {
1581             CFE_SUBOPCODE_VARIANT_STANDARD                                   = 0, //!< No additional details
1582         };
1583 
1584         enum CFE_SUBOPCODE
1585         {
1586             CFE_SUBOPCODE_CFESTATE                                           = 0, //!< No additional details
1587         };
1588 
1589         enum COMPUTE_COMMAND_OPCODE
1590         {
1591             COMPUTE_COMMAND_OPCODE_NEWCFECOMMAND                             = 2, //!< No additional details
1592         };
1593 
1594         enum PIPELINE
1595         {
1596             PIPELINE_COMPUTE                                                 = 2, //!< No additional details
1597         };
1598 
1599         enum COMMAND_TYPE
1600         {
1601             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
1602         };
1603 
1604         //! \brief CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM
1605         //! \details
1606         //!     This field allows controlling the number stackIDs (i.e. #unique rays in
1607         //!     the Ray Tracing subsytem.
1608         enum CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM
1609         {
1610             CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_2K     = 0, //!< Number of stackIDs = 2048
1611             CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_1K     = 1, //!< Number of stackIDs = 1024
1612             CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_512    = 2, //!< Number of stackIDs = 512
1613             CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_256    = 3, //!< Number of stackIDs = 256
1614         };
1615 
1616         //! \brief LARGE_GRF_THREAD_ADJUST_DISABLE
1617         //! \details
1618         //!     When this bit is set, the thread dispatch logic will not cap the
1619         //!     max_outstanding_threads based on the large grf mode. This is a Chicken
1620         //!     bit.
1621         enum LARGE_GRF_THREAD_ADJUST_DISABLE
1622         {
1623             LARGE_GRF_THREAD_ADJUST_DISABLE_ENABLED                          = 0, //!< No additional details
1624             LARGE_GRF_THREAD_ADJUST_DISABLE_DISABLED                         = 1, //!< No additional details
1625         };
1626 
1627         //! \brief COMPUTE_OVERDISPATCH_DISABLE
1628         //! \details
1629         //!     When this bit is set, the thread dispatch logic will disable over
1630         //!     dispatching of threads to the DSS.
1631         enum COMPUTE_OVERDISPATCH_DISABLE
1632         {
1633             COMPUTE_OVERDISPATCH_DISABLE_ENABLED                             = 0, //!< No additional details
1634             COMPUTE_OVERDISPATCH_DISABLE_DISABLED                            = 1, //!< No additional details
1635         };
1636 
1637         //! \brief COMPUTE_DISPATCH_ALL_WALKER_ENABLE
1638         //! \details
1639         //!     When this bit is set, the thread dispatch logic does a forced
1640         //!     round-robin dispatch to all the enabled DSS in this context.
1641         enum COMPUTE_DISPATCH_ALL_WALKER_ENABLE
1642         {
1643             COMPUTE_DISPATCH_ALL_WALKER_ENABLE_DISABLED                      = 0, //!< No additional details
1644             COMPUTE_DISPATCH_ALL_WALKER_ENABLE_ENABLED                       = 1, //!< No additional details
1645         };
1646 
1647         //! \brief OVER_DISPATCH_CONTROL
1648         //! \details
1649         //!     Enables the amount of GPGPU thread over dispatch.
1650         enum OVER_DISPATCH_CONTROL
1651         {
1652             OVER_DISPATCH_CONTROL_NONE                                       = 0, //!< 0% overdispatch
1653             OVER_DISPATCH_CONTROL_LOW                                        = 1, //!< 6.25% overdispatch
1654             OVER_DISPATCH_CONTROL_NORMAL                                     = 2, //!< 12.5% overdispatch
1655             OVER_DISPATCH_CONTROL_HIGH                                       = 3, //!< 25% overdispatch
1656         };
1657 
1658         //! \name Initializations
1659 
1660         //! \brief Explicit member initialization function
1661         CFE_STATE_CMD();
1662 
1663         static const size_t dwSize = 6;
1664         static const size_t byteSize = 24;
1665     };
1666 
1667     //!
1668     //! \brief STATE_COMPUTE_MODE
1669     //! \details
1670     //!     This is a non-pipeline state command and is a general compute
1671     //!     programming state that can be shared from the top to bottom of the
1672     //!     pipeline.
1673     //!
1674     struct STATE_COMPUTE_MODE_CMD
1675     {
1676         union
1677         {
1678             struct
1679             {
1680                 uint32_t                 DwordLength                                      : __CODEGEN_BITFIELD( 0,  7)    ; //!< DWORD_LENGTH
1681                 uint32_t                 Reserved8                                        : __CODEGEN_BITFIELD( 8, 15)    ; //!< Reserved
1682                 uint32_t                 _3DCommandSubOpcode                              : __CODEGEN_BITFIELD(16, 23)    ; //!< _3D_COMMAND_SUB_OPCODE
1683                 uint32_t                 _3DCommandOpcode                                 : __CODEGEN_BITFIELD(24, 26)    ; //!< _3D_COMMAND_OPCODE
1684                 uint32_t                 CommandSubtype                                   : __CODEGEN_BITFIELD(27, 28)    ; //!< COMMAND_SUBTYPE
1685                 uint32_t                 CommandType                                      : __CODEGEN_BITFIELD(29, 31)    ; //!< COMMAND_TYPE
1686             };
1687             uint32_t                     Value;
1688         } DW0;
1689         union
1690         {
1691             struct
1692             {
1693                 uint32_t                 ZPassAsyncComputeThreadLimit                     : __CODEGEN_BITFIELD( 0,  2)    ; //!< Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT
1694                 uint32_t                 NpZAsyncThrottleSettings                         : __CODEGEN_BITFIELD( 3,  4)    ; //!< NP_Z_ASYNC_THROTTLE_SETTINGS
1695                 uint32_t                 Reserved37                                       : __CODEGEN_BITFIELD( 5,  6)    ; //!< Reserved
1696                 uint32_t                 AsyncComputeThreadLimit                          : __CODEGEN_BITFIELD( 7,  9)    ; //!< ASYNC_COMPUTE_THREAD_LIMIT
1697                 uint32_t                 Reserved42                                       : __CODEGEN_BITFIELD(10, 12)    ; //!< Reserved
1698                 uint32_t                 EuThreadSchedulingModeOverride                   : __CODEGEN_BITFIELD(13, 14)    ; //!< EU_THREAD_SCHEDULING_MODE_OVERRIDE
1699                 uint32_t                 LargeGrfMode                                     : __CODEGEN_BITFIELD(15, 15)    ; //!< LARGE_GRF_MODE
1700                 uint32_t                 Mask1                                            : __CODEGEN_BITFIELD(16, 31)    ; //!< Mask1
1701             };
1702             uint32_t                     Value;
1703         } DW1;
1704         union
1705         {
1706             struct
1707             {
1708                 uint32_t                 MidthreadPreemptionDelayTimer                    : __CODEGEN_BITFIELD( 0,  2)    ; //!< MIDTHREAD_PREEMPTION_DELAY_TIMER
1709                 uint32_t                 MidthreadPreemptionOverdispatchThreadGroupCount  : __CODEGEN_BITFIELD( 3,  4)    ; //!< MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT
1710                 uint32_t                 MidthreadPreemptionOverdispatchTestMode          : __CODEGEN_BITFIELD( 5,  5)    ; //!< MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE
1711                 uint32_t                 UavCoherencyMode                                 : __CODEGEN_BITFIELD( 6,  6)    ; //!< UAV_COHERENCY_MODE
1712                 uint32_t                 Reserved71                                       : __CODEGEN_BITFIELD( 7, 10)    ; //!< Reserved
1713                 uint32_t                 MemoryAllocationForScratchAndMidthreadPreemptionBuffers : __CODEGEN_BITFIELD(11, 11)    ; //!< MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS
1714                 uint32_t                 Reserved76                                       : __CODEGEN_BITFIELD(12, 15)    ; //!< Reserved
1715                 uint32_t                 Mask2                                            : __CODEGEN_BITFIELD(16, 31)    ; //!< Mask2
1716             };
1717             uint32_t                     Value;
1718         } DW2;
1719 
1720         //! \name Local enumerations
1721 
1722         enum _3D_COMMAND_SUB_OPCODE
1723         {
1724             _3D_COMMAND_SUB_OPCODE_STATECOMPUTEMODE                          = 5, //!< No additional details
1725         };
1726 
1727         enum _3D_COMMAND_OPCODE
1728         {
1729             _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED                           = 1, //!< No additional details
1730         };
1731 
1732         enum COMMAND_SUBTYPE
1733         {
1734             COMMAND_SUBTYPE_GFXPIPECOMMON                                    = 0, //!< No additional details
1735         };
1736 
1737         enum COMMAND_TYPE
1738         {
1739             COMMAND_TYPE_GFXPIPE                                             = 3, //!< No additional details
1740         };
1741 
1742         //! \brief Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT
1743         //! \details
1744         //!     Specifies the maximum number of active Compute CS threads to run in a
1745         //!     DSS when the 3D Pipe is active and a Z-pass is not running. When the 3D
1746         //!     Pipe is not active or when a Z-pass is running, the maximum number of
1747         //!     active Compute CS threads is specified by Maximum Number of
1748         //!     Threads in CFE_STATE command.
1749         enum Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT
1750         {
1751             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX60                          = 0, //!< Maximum of upto 1 thread per fused EU reserved for 3D.
1752             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX64                          = 1, //!< No limit applied. Maximum Number of Threads is the only limit on Compute CS threads.
1753             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX56                          = 2, //!< Maximum of 1 thread per fused EU reserved for 3D .
1754             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX48                          = 3, //!< Maximum of 2 thread per fused EU reserved for 3D .
1755             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX40                          = 4, //!< Maximum of 3 thread per fused EU reserved for 3D .
1756             Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX32                          = 5, //!< Maximum of 4 thread per fused EU reserved for 3D .
1757         };
1758 
1759         enum NP_Z_ASYNC_THROTTLE_SETTINGS
1760         {
1761             NP_Z_ASYNC_THROTTLE_SETTINGS_UNNAMED0                            = 0, //!< Use the same settings as the Pixel shader Async compute settings i.e bits[9:7] of this DW.
1762             NP_Z_ASYNC_THROTTLE_SETTINGS_MAX32                               = 1, //!< Maximum of 4 thread per fused EU reserved for 3D .
1763             NP_Z_ASYNC_THROTTLE_SETTINGS_MAX40                               = 2, //!< Maximum of 3 thread per fused EU reserved for 3D .
1764             NP_Z_ASYNC_THROTTLE_SETTINGS_MAX48                               = 3, //!< Maximum of 2 thread per fused EU reserved for 3D .
1765         };
1766 
1767         //! \brief ASYNC_COMPUTE_THREAD_LIMIT
1768         //! \details
1769         //!     Specifies the maximum number of active Compute CS threads to run in a
1770         //!     DSS when the 3D Pipe is active and a Z-pass is not running. When the 3D
1771         //!     Pipe is not active or when a Z-pass is running, the maximum number of
1772         //!     active Compute CS threads is specified by Maximum Number of
1773         //!     Threads in CFE_STATE command.
1774         enum ASYNC_COMPUTE_THREAD_LIMIT
1775         {
1776             ASYNC_COMPUTE_THREAD_LIMIT_DISABLED                              = 0, //!< No limit applied. Maximum Number of Threads is the only limit on Compute CS threads.
1777             ASYNC_COMPUTE_THREAD_LIMIT_MAX2                                  = 1, //!< Maximum of 2 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 1 thread per EU row.
1778             ASYNC_COMPUTE_THREAD_LIMIT_MAX8                                  = 2, //!< Maximum of 8 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 1 thread per EU .
1779             ASYNC_COMPUTE_THREAD_LIMIT_MAX16                                 = 3, //!< Maximum of 16 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 2 threads per EU .
1780             ASYNC_COMPUTE_THREAD_LIMIT_MAX24                                 = 4, //!< Maximum of 24 EU threads per DSS, when 3D Pipe is active.
1781             ASYNC_COMPUTE_THREAD_LIMIT_MAX32                                 = 5, //!< Maximum of 32 EU threads per DSS, when 3D Pipe is active.
1782             ASYNC_COMPUTE_THREAD_LIMIT_MAX40                                 = 6, //!< Maximum of 40 EU threads per DSS, when 3D Pipe is active.
1783             ASYNC_COMPUTE_THREAD_LIMIT_MAX48                                 = 7, //!< Maximum of 48 EU threads per DSS, when 3D Pipe is active.
1784         };
1785 
1786         //! \brief EU_THREAD_SCHEDULING_MODE_OVERRIDE
1787         //! \details
1788         //!     Override the thread scheduling policy in EU.
1789         enum EU_THREAD_SCHEDULING_MODE_OVERRIDE
1790         {
1791             EU_THREAD_SCHEDULING_MODE_OVERRIDE_HWDEFAULT                     = 0, //!< No override - HW selects optimal scheduling policy. For DGT and PVC, HW uses Oldest First scheduling.
1792             EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDESTFIRST                   = 1, //!< EU will always schedule the oldest ready thread.
1793             EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUNDROBIN                    = 2, //!< EU will schedule threads in a round-robin manner, switching to the next ready thread every cycle.
1794             EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALLBASEDROUNDROBIN          = 3, //!< EU will schedule threads in a round-robin manner, but scheduling will only switch when the current thread is stalled due to dependency.
1795         };
1796 
1797         //! \brief LARGE_GRF_MODE
1798         //! \details
1799         //!     This bit controls the Large GRF Mode Vs Regular GRF Mode in Execution
1800         //!     Units.
1801         enum LARGE_GRF_MODE
1802         {
1803             LARGE_GRF_MODE_UNNAMED0                                          = 0, //!< Regular GRF mode of operation.
1804             LARGE_GRF_MODE_UNNAMED1                                          = 1, //!< Large GRF mode of operation.
1805         };
1806 
1807         //! \brief MIDTHREAD_PREEMPTION_DELAY_TIMER
1808         //! \details
1809         //!     The delay timer gives the option of giving time for threads that are on
1810         //!     the verge of exiting to completely exit thereby avoiding saving the
1811         //!     thread state.
1812         enum MIDTHREAD_PREEMPTION_DELAY_TIMER
1813         {
1814             MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL0                    = 0, //!< Mid thread preemption event is signalled to the EU as soon as CFEG receives it from CS.
1815             MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL50                   = 1, //!< Mid thread preemption timer of 50usec.
1816             MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL100                  = 2, //!< Mid thread preemption timer of 100usec
1817             MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL150                  = 3, //!< Mid thread preemption timer of 150usec
1818         };
1819 
1820         //! \brief MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT
1821         //! \details
1822         //!     The overdispatch TG buffer a.k.a ODB buffer stores the XYZ ID of the
1823         //!     TG that were dispatched by HW but did not land on the EU after the
1824         //!     preemption is signaled. This buffer is in PPGGT space and it has to be
1825         //!     managed by the CFEG to avoid overflowing.
1826         //!     The ODB buffer size is programmed based on the Physical machine
1827         //!     size.
1828         enum MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT
1829         {
1830             MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM2      = 0, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 2.
1831             MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM4      = 1, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 4.
1832             MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM8      = 2, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 8
1833             MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM16     = 3, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 16
1834         };
1835 
1836         //! \brief MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE
1837         //! \details
1838         //!     This bit provides a mechanism to limit the number of ODB TGs to a
1839         //!     much smaller number in simulation.
1840         enum MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE
1841         {
1842             MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE_REGULAR              = 0, //!< Regular mode operation.  The MAX_ODB_CNT is derived from the value programmed in bits [4:3]
1843             MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE_TESTMODE             = 1, //!< Caps the MAX_ODB_CNT to 64 .
1844         };
1845 
1846         //! \brief UAV_COHERENCY_MODE
1847         //! \details
1848         //!     This field controls whether UAV operations in the HW will flush the
1849         //!     L1 Dataport Cache or only drain the dataport pipe. If UAV's are cached,
1850         //!     SW must set this bit to ensure coherency of UAV's that are made coherent
1851         //!     through UAV barrier and UAV resource changes with RESOURCE_BARRIER(L1
1852         //!     Dataport UAV Flush).
1853         enum UAV_COHERENCY_MODE
1854         {
1855             UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE                            = 0, //!< Untyped L1 is neither flushed or invalidated for both UAV Barrier Coherency and BARRIER_RESOURCE with L1 Dataport UAV Flush.
1856             UAV_COHERENCY_MODE_FLUSH_DATAPORTL1                               = 1, //!< Untyped L1 is flushed,for both UAV Barrier Coherency and BARRIER_RESOURCE with L1 Dataport UAV Flush.
1857         };
1858 
1859         //! \brief MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS
1860         //! \details
1861         //!     This bit only applies for Compute-only contexts initiated from
1862         //!     CCS.
1863         //!     If set to 1, HW uses the Virtual Subslice ID instead of the Physical
1864         //!     Subslice ID to index the scratch and Midthread Preemption Thread
1865         //!     statebuffers (TSB). This reduces the memory footprint when running
1866         //!     multiple compute contexts.
1867         enum MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS
1868         {
1869             MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS_FULL = 0, //!< SW allocates the MAX memory (full GPU size) for scratch and Midthread Preemption buffers (TSB) for all CCS contexts
1870             MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS_MIN = 1, //!< SW allocates the minimum required memory for scratch and Midthread Preemption buffers (TSB) based on the number of Subslices assigned to this CCS context as defined in the CCS_MODE register .
1871         };
1872 
1873         //! \name Initializations
1874 
1875         //! \brief Explicit member initialization function
1876         STATE_COMPUTE_MODE_CMD();
1877 
1878         static const size_t dwSize = 3;
1879         static const size_t byteSize = 12;
1880     };
1881 
1882     //!
1883     //! \brief PALETTE_ENTRY
1884     //! \details
1885     //!
1886     //!
1887     struct PALETTE_ENTRY_CMD
1888     {
1889         union
1890         {
1891             struct
1892             {
1893                 uint32_t                 Blue : __CODEGEN_BITFIELD(0, 7); //!< Blue
1894                 uint32_t                 Green : __CODEGEN_BITFIELD(8, 15); //!< Green
1895                 uint32_t                 Red : __CODEGEN_BITFIELD(16, 23); //!< Red
1896                 uint32_t                 Alpha : __CODEGEN_BITFIELD(24, 31); //!< Alpha
1897             };
1898             uint32_t                     Value;
1899         } DW0;
1900 
1901         //! \name Local enumerations
1902 
1903         //! \name Initializations
1904 
1905         //! \brief Explicit member initialization function
1906         PALETTE_ENTRY_CMD();
1907 
1908         static const size_t dwSize = 1;
1909         static const size_t byteSize = 4;
1910     };
1911 
1912     //!
1913     //! \brief GPGPU_CSR_BASE_ADDRESS
1914     //! \details
1915     //!     The GPGPU_CSR_BASE_ADDRESS command sets the base pointers for EU and L3
1916     //!     to Context Save and Restore EU State and SLM for GPGPU mid-thread
1917     //!     preemption.
1918     //!
1919     //!     Execution of this command causes a full pipeline flush, thus its use
1920     //!     should be minimized for higher performance. State and instruction caches
1921     //!     are flushed on completion of the flush.
1922     //!
1923     //!     SW must always program PIPE_CONTROL with "CS Stall" and "Render Target
1924     //!     Cache Flush Enable" set prior to programming GPGPU_CSR_BASE_ADDRESS
1925     //!     command for GPGPU workloads i.e when pipeline select is GPGPU via
1926     //!     PIPELINE_SELECT command. This is required to achieve better GPGPU
1927     //!     preemption latencies for certain programming sequences. If programming
1928     //!     PIPE_CONTROL has performance implications then preemption latencies can
1929     //!     be trade off against performance by not implementing this programming
1930     //!     note.
1931     //!
1932     struct GPGPU_CSR_BASE_ADDRESS_CMD
1933     {
1934         union
1935         {
1936             struct
1937             {
1938                 uint32_t                 DwordLength : __CODEGEN_BITFIELD(0, 7); //!< DWORD_LENGTH
1939                 uint32_t                 Reserved8 : __CODEGEN_BITFIELD(8, 15); //!< Reserved
1940                 uint32_t                 _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23); //!< _3D_COMMAND_SUB_OPCODE
1941                 uint32_t                 _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26); //!< _3D_COMMAND_OPCODE
1942                 uint32_t                 CommandSubtype : __CODEGEN_BITFIELD(27, 28); //!< COMMAND_SUBTYPE
1943                 uint32_t                 CommandType : __CODEGEN_BITFIELD(29, 31); //!< COMMAND_TYPE
1944             };
1945             uint32_t                     Value;
1946         } DW0;
1947         union
1948         {
1949             struct
1950             {
1951                 uint64_t                 Reserved32 : __CODEGEN_BITFIELD(0, 11); //!< Reserved
1952                 uint64_t                 GpgpuCsrBaseAddress : __CODEGEN_BITFIELD(12, 63); //!< GPGPU CSR Base Address
1953             };
1954             uint32_t                     Value[2];
1955         } DW1_2;
1956 
1957         //! \name Local enumerations
1958 
1959         enum _3D_COMMAND_SUB_OPCODE
1960         {
1961             _3D_COMMAND_SUB_OPCODE_GPGPUCSRBASEADDRESS = 4, //!< No additional details
1962         };
1963 
1964         enum _3D_COMMAND_OPCODE
1965         {
1966             _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details
1967         };
1968 
1969         enum COMMAND_SUBTYPE
1970         {
1971             COMMAND_SUBTYPE_GFXPIPECOMMON = 0, //!< No additional details
1972         };
1973 
1974         enum COMMAND_TYPE
1975         {
1976             COMMAND_TYPE_GFXPIPE = 3, //!< No additional details
1977         };
1978 
1979         //! \name Initializations
1980 
1981         //! \brief Explicit member initialization function
1982         GPGPU_CSR_BASE_ADDRESS_CMD();
1983 
1984         static const size_t dwSize = 3;
1985         static const size_t byteSize = 12;
1986     };
1987 };
1988 }  // namespace xe2_hpg_next
1989 }  // namespace render
1990 }  // namespace mhw
1991 #pragma pack()
1992 
1993 #endif  // __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__