1 2 /*===================== begin_copyright_notice ================================== 3 4 * Copyright (c) 2024, Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 24 ======================= end_copyright_notice ==================================*/ 25 //! 26 //! \file mhw_render_hwcmd_xe2_hpg_next.h 27 //! \brief Auto-generated constructors for MHW and states. 28 //! \details This file may not be included outside of xe2_hpg as other components 29 //! should use MHW interface to interact with MHW commands and states. 30 //! 31 32 // DO NOT EDIT 33 34 #ifndef __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__ 35 #define __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__ 36 37 #include "mhw_hwcmd.h" 38 #pragma once 39 #pragma pack(1) 40 41 #include <cstdint> 42 #include <cstddef> 43 44 namespace mhw 45 { 46 namespace render 47 { 48 namespace xe2_hpg_next 49 { 50 struct Cmd 51 { 52 public: 53 // Internal Macros 54 #define __CODEGEN_MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) 55 #define __CODEGEN_BITFIELD(l, h) (h) - (l) + 1 56 #define __CODEGEN_OP_LENGTH_BIAS 2 57 #define __CODEGEN_OP_LENGTH(x) (uint32_t)((__CODEGEN_MAX(x, __CODEGEN_OP_LENGTH_BIAS)) - __CODEGEN_OP_LENGTH_BIAS) 58 GetOpLengthCmd59 static uint32_t GetOpLength(uint32_t uiLength) { return __CODEGEN_OP_LENGTH(uiLength); } 60 61 //! 62 //! \brief PIPELINE_SELECT 63 //! \details 64 //! The PIPELINE_SELECT command is used to specify which GPE pipeline is to 65 //! be considered the 'current' active pipeline. Issuing 66 //! 3D-pipeline-specific commands when the Media pipeline is selected, or 67 //! viceversa, is UNDEFINED. 68 //! 69 //! Issuing 3D-pipeline-specific commands when the GPGPU pipeline is 70 //! selected, or vice versa, is UNDEFINED. 71 //! 72 //! Programming common non pipeline commands (e.g., STATE_BASE_ADDRESS) is 73 //! allowed in all pipeline modes. 74 //! 75 //! Software must ensure Render Cache, Depth Cache and Dataport are flushed 76 //! through a stalling PIPE_CONTROL command prior to programming of 77 //! PIPELINE_SELECT command transitioning Pipeline Select from 3D to 78 //! GPGPU/Media. Similarly software must ensure Dataport flush is issued 79 //! through a stalling PIPE_CONTROL command prior to programming of 80 //! PIPELINE_SELECT command transitioning Pipeline Select from GPGPU/Media 81 //! to 3D. 82 //! Example: 83 //! 84 //! Workload-3Dmode, 85 //! 86 //! PIPE_CONTROL (CS Stall, Depth Cache Flush Enable, Render Target Cache 87 //! Flush Enable, Dataport Flush Enable) , 88 //! 89 //! PIPELINE_SELECT ( GPGPU), 90 //! 91 //! Workload-GPGPUmode, 92 //! 93 //! PIPE_CONTROL (CS Stall, Dataport Flush Enable), 94 //! 95 //! PIPELINE_SELECT ( 3D) ... 96 //! 97 //! 98 //! 99 //! Workaround 100 //! 101 //! : This command must be followed by a PIPE_CONTROL with CS Stall bit 102 //! set., 103 //! 104 //! "Pipe Selection" must be never set to "3D" in PIPELINE_SELECT command 105 //! programmed for workloads submitted to ComputeCS. 106 //! 107 //! style="margin:0in 0in 0.0001pt"> While GPU is operating in GPGPU mode 108 //! of operation and when a Mid Thread Preemption (if enabled) occurs on a 109 //! PIPELINE_SELECT command with Media Sampler DOP CG Enable reset along 110 //! with Pipeline Select Mode set to 3D and on resubmission of this context 111 //! on context restore Sampler DOP CG Enable will be reset. This would mean 112 //! the GPGPU mid thread preempted threads restored will get executed with 113 //! media sampler DOP clock not gated consuming media sampler DOP power 114 //! until all GPGPU threads have retired. style="margin:0in 0in 0.0001pt" /> 115 //! style="margin:0in 0in 0.0001pt"> Programming of the PIPELINE_SELECT 116 //! can be modified to avoid the above inefficiency. This can be done by 117 //! programming Pipeline Selection and Media Sampler DOP CG Enable fields in 118 //! two different PIPELINE_SELECT commands instead of on single 119 //! PIPELINE_SELECTcommand. style="margin:0in 0in 0.0001pt" /> 120 //! style="margin:0in 0in 0.0001pt"> Example: style="margin:0in 0in 121 //! 0.0001pt"> PIPELINE_SELECT ( Pipeline Selection = 3D, Media Sampler DOP 122 //! CG Enable = False) style="margin:0in 0in 0.0001pt" /> style="margin:0in 123 //! 0in 0.0001pt"> To style="margin:0in 0in 0.0001pt"> PIPELINE_SELECT ( 124 //! Pipeline Selection = 3D)style="margin:0in 0in 0.0001pt"> 125 //! PIPELINE_SELECT (Media Sampler DOP CG Enable = False) 126 //! 127 struct PIPELINE_SELECT_CMD 128 { 129 union 130 { 131 struct 132 { 133 uint32_t PipelineSelection : __CODEGEN_BITFIELD( 0, 1) ; //!< PIPELINE_SELECTION 134 uint32_t RenderSliceCommonPowerGateEnable : __CODEGEN_BITFIELD( 2, 2) ; //!< RENDER_SLICE_COMMON_POWER_GATE_ENABLE 135 uint32_t RenderSamplerPowerGateEnable : __CODEGEN_BITFIELD( 3, 3) ; //!< RENDER_SAMPLER_POWER_GATE_ENABLE 136 uint32_t Reserved4 : __CODEGEN_BITFIELD( 4, 4) ; //!< Reserved 137 uint32_t EnableComputeTo3DPerformanceMode : __CODEGEN_BITFIELD( 5, 5) ; //!< Enable Compute to 3D performance mode 138 uint32_t Reserved6 : __CODEGEN_BITFIELD( 6, 6) ; //!< Reserved 139 uint32_t SystolicModeEnable : __CODEGEN_BITFIELD( 7, 7) ; //!< SYSTOLIC_MODE_ENABLE 140 uint32_t MaskBits : __CODEGEN_BITFIELD( 8, 15) ; //!< Mask Bits 141 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 142 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 143 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 144 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 145 }; 146 uint32_t Value; 147 } DW0; 148 149 //! \name Local enumerations 150 151 //! \brief PIPELINE_SELECTION 152 //! \details 153 //! Mask bits [9:8] has to be set for HW to look at this field when 154 //! PIPELINE_SELECT command is parsed. Setting only one of the mask bit [9] 155 //! or [8] is illegal. 156 enum PIPELINE_SELECTION 157 { 158 PIPELINE_SELECTION_3D = 0, //!< 3D pipeline is selected 159 PIPELINE_SELECTION_GPGPU = 2, //!< GPGPU pipeline is selected 160 }; 161 162 //! \brief RENDER_SLICE_COMMON_POWER_GATE_ENABLE 163 //! \details 164 //! Mask bit [10] has to be set for HW to look at this field when 165 //! PIPELINE_SELECT command is parsed. 166 enum RENDER_SLICE_COMMON_POWER_GATE_ENABLE 167 { 168 RENDER_SLICE_COMMON_POWER_GATE_ENABLE_DISABLED = 0, //!< Command Streamer sends message to PM to disable render slice common Power Gating. 169 RENDER_SLICE_COMMON_POWER_GATE_ENABLE_ENABLED = 1, //!< Command Streamer sends message to PM to enable render slice common Power Gating. 170 }; 171 172 //! \brief RENDER_SAMPLER_POWER_GATE_ENABLE 173 //! \details 174 //! Mask bit [11] has to be set for HW to look at this field when 175 //! PIPELINE_SELECT command is parsed. 176 enum RENDER_SAMPLER_POWER_GATE_ENABLE 177 { 178 RENDER_SAMPLER_POWER_GATE_ENABLE_DISABLED = 0, //!< Command Streamer sends message to PM to disable render sampler Power Gating. 179 RENDER_SAMPLER_POWER_GATE_ENABLE_ENABLED = 1, //!< Command Streamer sends message to PM to enable render sampler Power Gating. 180 }; 181 182 //! \brief SYSTOLIC_MODE_ENABLE 183 //! \details 184 //! When set, this will enable systolic mode for the following 185 //! COMPUTE_WALKER commands. This will lower the Fmax to avoid ICC current 186 //! issues when executing systolic array commands in the execution units. If 187 //! this is not set prior to executing systolic array operations, the 188 //! context will be halted to avoid any ICC issues. 189 enum SYSTOLIC_MODE_ENABLE 190 { 191 SYSTOLIC_MODE_ENABLE_SYSTOLICMODEDISABLED = 0, //!< No additional details 192 SYSTOLIC_MODE_ENABLE_SYSTOLICMODEENABLED = 1, //!< No additional details 193 }; 194 195 enum _3D_COMMAND_SUB_OPCODE 196 { 197 _3D_COMMAND_SUB_OPCODE_PIPELINESELECT = 4, //!< No additional details 198 }; 199 200 enum _3D_COMMAND_OPCODE 201 { 202 _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details 203 }; 204 205 enum COMMAND_SUBTYPE 206 { 207 COMMAND_SUBTYPE_GFXPIPESINGLEDW = 1, //!< No additional details 208 }; 209 210 enum COMMAND_TYPE 211 { 212 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 213 }; 214 215 //! \name Initializations 216 217 //! \brief Explicit member initialization function 218 PIPELINE_SELECT_CMD(); 219 220 static const size_t dwSize = 1; 221 static const size_t byteSize = 4; 222 }; 223 224 //! 225 //! \brief STATE_BASE_ADDRESS 226 //! \details 227 //! The STATE_BASE_ADDRESS command sets the base pointers for subsequent 228 //! state, instruction, and media indirect object accesses by the GPE.For 229 //! more information see the Base Address Utilization table in the Memory 230 //! Access Indirection narrative topic. 231 //! 232 //! The following commands must be reissued following any change to the base 233 //! addresses: 3DSTATE_CC_POINTERS 234 //! 3DSTATE_BINDING_TABLE_POINTERS 235 //! 3DSTATE_SAMPLER_STATE_POINTERS 236 //! 3DSTATE_VIEWPORT_STATE_POINTERS 237 //! /> Execution of this command causes a full pipeline flush, thus its 238 //! use should be minimized for higher performance. 239 //! 240 //! If 3DSTATE_PS_EXTRA::Pixel Shader Is Per Coarse Pixel == 1, the 241 //! 3DSTATE_CPS_POINTERS command must be reissued following any change to 242 //! the dynamic state base address. 243 //! 244 //! SW must always program PIPE_CONTROL with "CS Stall" and "Render Target 245 //! Cache Flush Enable" set before programming STATE_BASE_ADDRESS command 246 //! for GPGPU workloads i.e when pipeline select is GPGPU via 247 //! PIPELINE_SELECT command. This is required to achieve better GPGPU 248 //! preemption latencies in certain workload programming sequences. If 249 //! programming PIPE_CONTROL has performance implications then preemption 250 //! latencies can be traded off against performance by not implementing this 251 //! programming note. 252 //! 253 //! SW must always program PIPE_CONTROL command with HDC Pipleine FLush set 254 //! prior to programming of STATE_BASE_ADDRESS command for GPGPU/Media 255 //! workloads i.e when pipeline select is GPGPU or Media via PIPELINE_SELECT 256 //! command. This is required to ensure thewrite data out of the prior 257 //! thread group are flushed out prior to the state changes due to the 258 //! programming of STATE_BASE_ADDRESS command take place. 259 //! 260 struct STATE_BASE_ADDRESS_CMD 261 { 262 union 263 { 264 struct 265 { 266 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 267 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 268 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 269 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 270 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 271 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 272 }; 273 uint32_t Value; 274 } DW0; 275 union 276 { 277 struct 278 { 279 uint64_t GeneralStateBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE 280 uint64_t Reserved33 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 281 uint64_t GeneralStateMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< General State Memory Object Control State 282 uint64_t Reserved43 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 283 uint64_t GeneralStateBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< General State Base Address 284 }; 285 uint32_t Value[2]; 286 } DW1_2; 287 union 288 { 289 struct 290 { 291 uint32_t CoherencySettingModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< COHERENCY_SETTING_MODIFY_ENABLE 292 uint32_t Reserved97 : __CODEGEN_BITFIELD( 1, 13) ; //!< Reserved 293 uint32_t DisableSupportForMultiGpuAtomicsForStatelessAccesses : __CODEGEN_BITFIELD(14, 14) ; //!< DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES 294 uint32_t DisableSupportForMultiGpuPartialWritesForStatelessMessages : __CODEGEN_BITFIELD(15, 15) ; //!< DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES 295 uint32_t StatelessDataPortAccessMemoryObjectControlState : __CODEGEN_BITFIELD(16, 22) ; //!< Stateless Data Port Access Memory Object Control State 296 uint32_t L1CacheControl : __CODEGEN_BITFIELD(23, 25) ; //!< L1 Cache Control 297 uint32_t Reserved122 : __CODEGEN_BITFIELD(26, 31) ; //!< Reserved 298 }; 299 uint32_t Value; 300 } DW3; 301 union 302 { 303 struct 304 { 305 uint64_t SurfaceStateBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 306 uint64_t Reserved129 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 307 uint64_t SurfaceStateMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Surface State Memory Object Control State 308 uint64_t Reserved139 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 309 uint64_t SurfaceStateBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Surface State Base Address 310 }; 311 uint32_t Value[2]; 312 } DW4_5; 313 union 314 { 315 struct 316 { 317 uint64_t DynamicStateBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE 318 uint64_t Reserved193 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 319 uint64_t DynamicStateMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Dynamic State Memory Object Control State 320 uint64_t Reserved203 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 321 uint64_t DynamicStateBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Dynamic State Base Address 322 }; 323 uint32_t Value[2]; 324 } DW6_7; 325 union 326 { 327 struct 328 { 329 uint64_t IndirectObjectBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE 330 uint64_t Reserved257 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 331 uint64_t IndirectObjectMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Indirect Object Memory Object Control State 332 uint64_t Reserved267 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 333 uint64_t IndirectObjectBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Indirect Object Base Address 334 }; 335 uint32_t Value[2]; 336 } DW8_9; 337 union 338 { 339 struct 340 { 341 uint64_t InstructionBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE 342 uint64_t Reserved321 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 343 uint64_t InstructionMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Instruction Memory Object Control State 344 uint64_t Reserved331 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 345 uint64_t InstructionBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Instruction Base Address 346 }; 347 uint32_t Value[2]; 348 } DW10_11; 349 union 350 { 351 struct 352 { 353 uint32_t GeneralStateBufferSizeModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE 354 uint32_t Reserved385 : __CODEGEN_BITFIELD( 1, 11) ; //!< Reserved 355 uint32_t GeneralStateBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< General State Buffer Size 356 }; 357 uint32_t Value; 358 } DW12; 359 union 360 { 361 struct 362 { 363 uint32_t DynamicStateBufferSizeModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE 364 uint32_t Reserved417 : __CODEGEN_BITFIELD( 1, 11) ; //!< Reserved 365 uint32_t DynamicStateBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< Dynamic State Buffer Size 366 }; 367 uint32_t Value; 368 } DW13; 369 union 370 { 371 struct 372 { 373 uint32_t IndirectObjectBufferSizeModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE 374 uint32_t Reserved449 : __CODEGEN_BITFIELD( 1, 11) ; //!< Reserved 375 uint32_t IndirectObjectBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< Indirect Object Buffer Size 376 }; 377 uint32_t Value; 378 } DW14; 379 union 380 { 381 struct 382 { 383 uint32_t InstructionBufferSizeModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE 384 uint32_t Reserved481 : __CODEGEN_BITFIELD( 1, 11) ; //!< Reserved 385 uint32_t InstructionBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< Instruction Buffer Size 386 }; 387 uint32_t Value; 388 } DW15; 389 union 390 { 391 struct 392 { 393 uint64_t BindlessSurfaceStateBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 394 uint64_t Reserved513 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 395 uint64_t BindlessSurfaceStateMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Bindless Surface State Memory Object Control State 396 uint64_t Reserved523 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 397 uint64_t BindlessSurfaceStateBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Bindless Surface State Base Address 398 }; 399 uint32_t Value[2]; 400 } DW16_17; 401 union 402 { 403 struct 404 { 405 uint32_t BindlessSurfaceStateSize ; //!< Bindless Surface State Size 406 }; 407 uint32_t Value; 408 } DW18; 409 union 410 { 411 struct 412 { 413 uint64_t BindlessSamplerStateBaseAddressModifyEnable : __CODEGEN_BITFIELD( 0, 0) ; //!< BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE 414 uint64_t Reserved609 : __CODEGEN_BITFIELD( 1, 3) ; //!< Reserved 415 uint64_t BindlessSamplerStateMemoryObjectControlState : __CODEGEN_BITFIELD( 4, 10) ; //!< Bindless Sampler State Memory Object Control State 416 uint64_t Reserved619 : __CODEGEN_BITFIELD(11, 11) ; //!< Reserved 417 uint64_t BindlessSamplerStateBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Bindless Sampler State Base Address 418 }; 419 uint32_t Value[2]; 420 } DW19_20; 421 union 422 { 423 struct 424 { 425 uint32_t Reserved672 : __CODEGEN_BITFIELD( 0, 11) ; //!< Reserved 426 uint32_t BindlessSamplerStateBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< Bindless Sampler State Buffer Size 427 }; 428 uint32_t Value; 429 } DW21; 430 431 //! \name Local enumerations 432 433 enum _3D_COMMAND_SUB_OPCODE 434 { 435 _3D_COMMAND_SUB_OPCODE_STATEBASEADDRESS = 1, //!< No additional details 436 }; 437 438 enum _3D_COMMAND_OPCODE 439 { 440 _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details 441 }; 442 443 enum COMMAND_SUBTYPE 444 { 445 COMMAND_SUBTYPE_GFXPIPECOMMON = 0, //!< No additional details 446 }; 447 448 enum COMMAND_TYPE 449 { 450 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 451 }; 452 453 //! \brief GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE 454 //! \details 455 //! The other fields in this DWord and the following DWord are updated 456 //! only when this bit is set. 457 enum GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE 458 { 459 GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address. 460 GENERAL_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address. 461 }; 462 463 //! \brief COHERENCY_SETTING_MODIFY_ENABLE 464 //! \details 465 //! All the fields in this DW is only updated when this bit is set. 466 enum COHERENCY_SETTING_MODIFY_ENABLE 467 { 468 COHERENCY_SETTING_MODIFY_ENABLE_DISABLEWRITETOTHISDW = 0, //!< No additional details 469 COHERENCY_SETTING_MODIFY_ENABLE_ENABLEWRITETOTHISDW = 1, //!< No additional details 470 }; 471 472 //! \brief DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES 473 //! \details 474 //! Specifies whether sequential consistency of atomic memory operations are 475 //! supported across multiple GPUs. 476 enum DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES 477 { 478 DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES_ENABLE = 0, //!< Atomic memory operations from all GPUs to the same address is sequentially consistent. 479 DISABLE_SUPPORT_FOR_MULTI_GPU_ATOMICS_FOR_STATELESS_ACCESSES_DISABLE = 1, //!< Disable multi-GPU Atomic consistency. Atomic memory operations to the same address is sequentially consistent only if the operations are from the same GPU. 480 }; 481 482 //! \brief DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES 483 //! \details 484 //! Specifies whether data-consistency on partial memory write operations 485 //! are supported across multiple GPUs. 486 enum DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES 487 { 488 DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES_ENABLED = 0, //!< Enable data consistency on multi-GPU partial memory writes. 489 DISABLE_SUPPORT_FOR_MULTI_GPU_PARTIAL_WRITES_FOR_STATELESS_MESSAGES_DISABLED = 1, //!< Disable data consistency on multi-GPU partial memory writes. If multiple GPUs write different bytes of the same cacheline, the data may be corrupted. 490 }; 491 492 //! \brief SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 493 //! \details 494 //! The other fields in this DWord and the following DWord are updated only 495 //! when this bit is set. 496 enum SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 497 { 498 SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address. 499 SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address. 500 }; 501 502 //! \brief DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE 503 //! \details 504 //! The other fields in this DWord and the following DWord are updated only 505 //! when this bit is set. 506 enum DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE 507 { 508 DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address. 509 DYNAMIC_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address. 510 }; 511 512 //! \brief INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE 513 //! \details 514 //! The other fields in this DWord and the following DWord are updated 515 //! only when this bit is set. 516 enum INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE 517 { 518 INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address. 519 INDIRECT_OBJECT_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address. 520 }; 521 522 //! \brief INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE 523 //! \details 524 //! The other fields in this DWord and the following DWord are updated 525 //! only when this bit is set. 526 enum INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE 527 { 528 INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address. 529 INSTRUCTION_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address. 530 }; 531 532 //! \brief GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE 533 //! \details 534 //! The fields in this DWord are updated only when this bit is set. 535 enum GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE 536 { 537 GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated bound. 538 GENERAL_STATE_BUFFER_SIZE_MODIFY_ENABLE_ENABLE = 1, //!< Modify the updated bound. 539 }; 540 541 //! \brief DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE 542 //! \details 543 //! FormatDesc 544 enum DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE 545 { 546 DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated bound. 547 DYNAMIC_STATE_BUFFER_SIZE_MODIFY_ENABLE_ENABLE = 1, //!< Modify the updated bound. 548 }; 549 550 //! \brief INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE 551 //! \details 552 //! FormatDesc 553 enum INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE 554 { 555 INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated bound. 556 INDIRECT_OBJECT_BUFFER_SIZE_MODIFY_ENABLE_ENABLE = 1, //!< Modify the updated bound. 557 }; 558 559 //! \brief INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE 560 //! \details 561 //! FormatDesc 562 enum INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE 563 { 564 INSTRUCTION_BUFFER_SIZE_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated bound. 565 }; 566 567 //! \brief BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 568 //! \details 569 //! The other fields in this DWord and the following two DWords are 570 //! updated only when this bit is set. 571 enum BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE 572 { 573 BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address 574 BINDLESS_SURFACE_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address 575 }; 576 577 //! \brief BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE 578 //! \details 579 //! The other fields in this DWord and the following two DWords are 580 //! updated only when this bit is set. 581 enum BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE 582 { 583 BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE_DISABLE = 0, //!< Ignore the updated address 584 BINDLESS_SAMPLER_STATE_BASE_ADDRESS_MODIFY_ENABLE_ENABLE = 1, //!< Modify the address 585 }; 586 587 //! \name Initializations 588 589 //! \brief Explicit member initialization function 590 STATE_BASE_ADDRESS_CMD(); 591 592 static const size_t dwSize = 22; 593 static const size_t byteSize = 88; 594 }; 595 596 //! 597 //! \brief _3DSTATE_CHROMA_KEY 598 //! \details 599 //! The 3DSTATE_CHROMA_KEY instruction is used to program texture 600 //! color/chroma-key key values. A table containing four set of values is 601 //! supported. The ChromaKey Index sampler state variable is used to select 602 //! which table entry is associated with the map. Texture chromakey 603 //! functions are enabled and controlled via use of the ChromaKey Enable 604 //! texture sampler state variable.Texture Color Key (keying on a paletted 605 //! texture index) is not supported. 606 //! 607 //! Workaround 608 //! 609 //! : This command must be followed by a PIPE_CONTROL with CS Stall bit 610 //! set., 611 //! 612 struct _3DSTATE_CHROMA_KEY_CMD 613 { 614 union 615 { 616 struct 617 { 618 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 619 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 620 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 621 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 622 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 623 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 624 }; 625 uint32_t Value; 626 } DW0; 627 union 628 { 629 struct 630 { 631 uint32_t Reserved32 : __CODEGEN_BITFIELD( 0, 29) ; //!< Reserved 632 uint32_t ChromakeyTableIndex : __CODEGEN_BITFIELD(30, 31) ; //!< ChromaKey Table Index 633 }; 634 uint32_t Value; 635 } DW1; 636 union 637 { 638 struct 639 { 640 uint32_t ChromakeyLowValue ; //!< ChromaKey Low Value 641 }; 642 uint32_t Value; 643 } DW2; 644 union 645 { 646 struct 647 { 648 uint32_t ChromakeyHighValue ; //!< ChromaKey High Value 649 }; 650 uint32_t Value; 651 } DW3; 652 653 //! \name Local enumerations 654 655 enum _3D_COMMAND_SUB_OPCODE 656 { 657 _3D_COMMAND_SUB_OPCODE_3DSTATECHROMAKEY = 4, //!< No additional details 658 }; 659 660 enum _3D_COMMAND_OPCODE 661 { 662 _3D_COMMAND_OPCODE_3DSTATENONPIPELINED = 1, //!< No additional details 663 }; 664 665 enum COMMAND_SUBTYPE 666 { 667 COMMAND_SUBTYPE_GFXPIPE3D = 3, //!< No additional details 668 }; 669 670 enum COMMAND_TYPE 671 { 672 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 673 }; 674 675 //! \name Initializations 676 677 //! \brief Explicit member initialization function 678 _3DSTATE_CHROMA_KEY_CMD(); 679 680 static const size_t dwSize = 4; 681 static const size_t byteSize = 16; 682 }; 683 684 //! 685 //! \brief STATE_SIP 686 //! \details 687 //! The STATE_SIP command specifies the starting instruction location of the 688 //! System Routine that is shared by all threads in execution. 689 //! 690 //! Workaround 691 //! 692 //! : This command must be followed by a PIPE_CONTROL with CS Stall bit 693 //! set., 694 //! 695 struct STATE_SIP_CMD 696 { 697 union 698 { 699 struct 700 { 701 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 702 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 703 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 704 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 705 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 706 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 707 }; 708 uint32_t Value; 709 } DW0; 710 union 711 { 712 struct 713 { 714 uint64_t Reserved32 : __CODEGEN_BITFIELD( 0, 3) ; //!< Reserved 715 uint64_t SystemInstructionPointer : __CODEGEN_BITFIELD( 4, 63) ; //!< System Instruction Pointer 716 }; 717 uint32_t Value[2]; 718 } DW1_2; 719 720 //! \name Local enumerations 721 722 enum _3D_COMMAND_SUB_OPCODE 723 { 724 _3D_COMMAND_SUB_OPCODE_STATESIP = 2, //!< No additional details 725 }; 726 727 enum _3D_COMMAND_OPCODE 728 { 729 _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details 730 }; 731 732 enum COMMAND_SUBTYPE 733 { 734 COMMAND_SUBTYPE_GFXPIPECOMMON = 0, //!< No additional details 735 }; 736 737 enum COMMAND_TYPE 738 { 739 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 740 }; 741 742 //! \name Initializations 743 744 //! \brief Explicit member initialization function 745 STATE_SIP_CMD(); 746 747 static const size_t dwSize = 3; 748 static const size_t byteSize = 12; 749 }; 750 751 //! 752 //! \brief _3DSTATE_BINDING_TABLE_POOL_ALLOC 753 //! \details 754 //! This command is to program the base address and size of the binding 755 //! table pool. The address to fetch the binding table is based on the 756 //! Binding Table Pool Base Address and the binding table pointer if the 757 //! Binding Table Pool is enabled. Otherwise the binding table pointer is an 758 //! offset from the Surface Base Address. 759 //! 760 struct _3DSTATE_BINDING_TABLE_POOL_ALLOC_CMD 761 { 762 union 763 { 764 struct 765 { 766 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 767 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 768 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 769 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 770 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 771 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 772 }; 773 uint32_t Value; 774 } DW0; 775 union 776 { 777 struct 778 { 779 uint64_t SurfaceObjectControlState : __CODEGEN_BITFIELD( 0, 6) ; //!< Surface Object Control State 780 uint64_t Reserved39 : __CODEGEN_BITFIELD( 7, 11) ; //!< Reserved 781 uint64_t BindingTablePoolBaseAddress : __CODEGEN_BITFIELD(12, 63) ; //!< Binding Table Pool Base Address 782 }; 783 uint32_t Value[2]; 784 } DW1_2; 785 union 786 { 787 struct 788 { 789 uint32_t Reserved96 : __CODEGEN_BITFIELD( 0, 11) ; //!< Reserved 790 uint32_t BindingTablePoolBufferSize : __CODEGEN_BITFIELD(12, 31) ; //!< BINDING_TABLE_POOL_BUFFER_SIZE 791 }; 792 uint32_t Value; 793 } DW3; 794 795 //! \name Local enumerations 796 797 enum _3D_COMMAND_SUB_OPCODE 798 { 799 _3D_COMMAND_SUB_OPCODE_3DSTATEBINDINGTABLEPOOLALLOC = 25, //!< No additional details 800 }; 801 802 enum _3D_COMMAND_OPCODE 803 { 804 _3D_COMMAND_OPCODE_3DSTATENONPIPELINED = 1, //!< No additional details 805 }; 806 807 enum COMMAND_SUBTYPE 808 { 809 COMMAND_SUBTYPE_GFXPIPE3D = 3, //!< No additional details 810 }; 811 812 enum COMMAND_TYPE 813 { 814 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 815 }; 816 817 //! \brief BINDING_TABLE_POOL_BUFFER_SIZE 818 //! \details 819 //! This field specifies the size of the buffer in 4K pages. Any access 820 //! which straddle or go past the end of the buffer will return 0. 821 enum BINDING_TABLE_POOL_BUFFER_SIZE 822 { 823 BINDING_TABLE_POOL_BUFFER_SIZE_NOVALIDDATA = 0, //!< There is no valid data in the buffer 824 }; 825 826 //! \name Initializations 827 828 //! \brief Explicit member initialization function 829 _3DSTATE_BINDING_TABLE_POOL_ALLOC_CMD(); 830 831 static const size_t dwSize = 4; 832 static const size_t byteSize = 16; 833 }; 834 835 //! 836 //! \brief COMPUTE_WALKER 837 //! \details 838 //! COMPUTE_WALKER spawns threadgroups in 1, 2, or 3 dimensions (X, Y, Z). 839 //! Each threadgroup is described by Interface Descriptor in this command. 840 //! Each dispatched thread has a standard payload delivered in R0, 841 //! including the Indirect Address to fetch the thread's parameters. 842 //! After the Walker completes dispatching its threads and those threads 843 //! have completed running, a PostSync operation can write a completion code 844 //! or a timestamp. 845 //! 846 //! If the threads spawned by this command are required to observe memory 847 //! writes performed by threads spawned from a previous command, and if 848 //! those threads did not perform a Memory Fence before they exited, then 849 //! software must precede this command with a PIPE_CONTROL with Dataport 850 //! Flush control. 851 //! 852 struct COMPUTE_WALKER_CMD 853 { 854 union 855 { 856 struct 857 { 858 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 859 uint32_t PredicateEnable : __CODEGEN_BITFIELD( 8, 8) ; //!< Predicate Enable 860 uint32_t WorkloadPartitionEnable : __CODEGEN_BITFIELD( 9, 9) ; //!< Workload Partition Enable 861 uint32_t IndirectParameterEnable : __CODEGEN_BITFIELD(10, 10) ; //!< Indirect Parameter Enable 862 uint32_t Reserved11 : __CODEGEN_BITFIELD(11, 12) ; //!< Reserved 863 uint32_t DispatchComplete : __CODEGEN_BITFIELD(13, 13) ; //!< Dispatch Complete 864 uint32_t SystolicModeEnable : __CODEGEN_BITFIELD(14, 14) ; //!< Systolic Mode Enable 865 uint32_t CfeSubopcodeVariant : __CODEGEN_BITFIELD(15, 17) ; //!< CFE_SUBOPCODE_VARIANT 866 uint32_t CfeSubopcode : __CODEGEN_BITFIELD(18, 23) ; //!< CFE_SUBOPCODE 867 uint32_t ComputeCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< COMPUTE_COMMAND_OPCODE 868 uint32_t Pipeline : __CODEGEN_BITFIELD(27, 28) ; //!< PIPELINE 869 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 870 }; 871 uint32_t Value; 872 } DW0; 873 union 874 { 875 struct 876 { 877 uint32_t Reserved32 : __CODEGEN_BITFIELD( 0, 7) ; //!< Reserved 878 uint32_t DebugObjectId : __CODEGEN_BITFIELD( 8, 31) ; //!< Debug: Object ID 879 }; 880 uint32_t Value; 881 } DW1; 882 union 883 { 884 struct 885 { 886 uint32_t IndirectDataLength : __CODEGEN_BITFIELD( 0, 16) ; //!< Indirect Data Length 887 uint32_t L3PrefetchDisable : __CODEGEN_BITFIELD(17, 17) ; //!< L3 prefetch disable 888 uint32_t PartitionDispatchParameter : __CODEGEN_BITFIELD(18, 29) ; //!< Partition Dispatch Parameter 889 uint32_t PartitionType : __CODEGEN_BITFIELD(30, 31) ; //!< PARTITION_TYPE 890 }; 891 uint32_t Value; 892 } DW2; 893 union 894 { 895 struct 896 { 897 uint32_t Reserved96 : __CODEGEN_BITFIELD( 0, 5) ; //!< Reserved 898 uint32_t IndirectDataStartAddress : __CODEGEN_BITFIELD( 6, 31) ; //!< Indirect Data Start Address 899 }; 900 uint32_t Value; 901 } DW3; 902 union 903 { 904 struct 905 { 906 uint32_t Reserved128 : __CODEGEN_BITFIELD( 0, 16) ; //!< Reserved 907 uint32_t MessageSIMD : __CODEGEN_BITFIELD(17, 18) ; //!< MESSAGE_SIMD 908 uint32_t TileLayout : __CODEGEN_BITFIELD(19, 21) ; //!< TILE_LAYOUT 909 uint32_t WalkOrder : __CODEGEN_BITFIELD(22, 24) ; //!< WALK_ORDER 910 uint32_t EmitInlineParameter : __CODEGEN_BITFIELD(25, 25) ; //!< Emit Inline Parameter 911 uint32_t EmitLocal : __CODEGEN_BITFIELD(26, 28) ; //!< EMIT_LOCAL 912 uint32_t GenerateLocalId : __CODEGEN_BITFIELD(29, 29) ; //!< Generate Local ID 913 uint32_t SIMDSize : __CODEGEN_BITFIELD(30, 31) ; //!< SIMD_SIZE 914 }; 915 uint32_t Value; 916 } DW4; 917 union 918 { 919 struct 920 { 921 uint32_t ExecutionMask ; //!< Execution Mask 922 }; 923 uint32_t Value; 924 } DW5; 925 union 926 { 927 struct 928 { 929 uint32_t LocalXMaximum : __CODEGEN_BITFIELD( 0, 9) ; //!< Local X Maximum 930 uint32_t LocalYMaximum : __CODEGEN_BITFIELD(10, 19) ; //!< Local Y Maximum 931 uint32_t LocalZMaximum : __CODEGEN_BITFIELD(20, 29) ; //!< Local Z Maximum 932 uint32_t Reserved222 : __CODEGEN_BITFIELD(30, 31) ; //!< Reserved 933 }; 934 uint32_t Value; 935 } DW6; 936 union 937 { 938 struct 939 { 940 uint32_t ThreadGroupIDXDimension ; //!< Thread Group ID X Dimension 941 }; 942 uint32_t Value; 943 } DW7; 944 union 945 { 946 struct 947 { 948 uint32_t ThreadGroupIDYDimension ; //!< Thread Group ID Y Dimension 949 }; 950 uint32_t Value; 951 } DW8; 952 union 953 { 954 struct 955 { 956 uint32_t ThreadGroupIDZDimension ; //!< Thread Group ID Z Dimension 957 }; 958 uint32_t Value; 959 } DW9; 960 union 961 { 962 struct 963 { 964 uint32_t ThreadGroupIDStartingX ; //!< Thread Group ID Starting X 965 }; 966 uint32_t Value; 967 } DW10; 968 union 969 { 970 struct 971 { 972 uint32_t ThreadGroupIDStartingY ; //!< Thread Group ID Starting Y 973 }; 974 uint32_t Value; 975 } DW11; 976 union 977 { 978 struct 979 { 980 uint32_t ThreadGroupIDStartingZ ; //!< Thread Group ID Starting Z 981 }; 982 uint32_t Value; 983 } DW12; 984 union 985 { 986 struct 987 { 988 uint32_t PartitionId ; //!< Partition ID 989 }; 990 uint32_t Value; 991 } DW13; 992 union 993 { 994 struct 995 { 996 uint32_t PartitionSize ; //!< Partition Size 997 }; 998 uint32_t Value; 999 } DW14; 1000 union 1001 { 1002 struct 1003 { 1004 uint32_t PreemptX ; //!< Preempt X 1005 }; 1006 uint32_t Value; 1007 } DW15; 1008 union 1009 { 1010 struct 1011 { 1012 uint32_t PreemptY ; //!< Preempt Y 1013 }; 1014 uint32_t Value; 1015 } DW16; 1016 union 1017 { 1018 struct 1019 { 1020 uint32_t PreemptZ ; //!< Preempt Z 1021 }; 1022 uint32_t Value; 1023 } DW17; 1024 union 1025 { 1026 struct 1027 { 1028 uint32_t WalkerId : __CODEGEN_BITFIELD( 0, 3) ; //!< Walker ID 1029 uint32_t Reserved580 : __CODEGEN_BITFIELD( 4, 7) ; //!< Reserved 1030 uint32_t OverDispatchTgCount : __CODEGEN_BITFIELD( 8, 23) ; //!< Over dispatch TG count 1031 uint32_t Reserved600 : __CODEGEN_BITFIELD(24, 31) ; //!< Reserved 1032 }; 1033 uint32_t Value; 1034 } DW18; 1035 1036 //! 1037 //! \brief INTERFACE_DESCRIPTOR_DATA 1038 //! \details 1039 //! 1040 //! 1041 struct INTERFACE_DESCRIPTOR_DATA_CMD 1042 { 1043 union 1044 { 1045 struct 1046 { 1047 uint32_t Reserved0 : __CODEGEN_BITFIELD( 0, 5) ; //!< Reserved 1048 uint32_t KernelStartPointer : __CODEGEN_BITFIELD( 6, 31) ; //!< Kernel Start Pointer 1049 }; 1050 uint32_t Value; 1051 } DW0; 1052 union 1053 { 1054 struct 1055 { 1056 uint32_t Reserved32 ; //!< Reserved 1057 }; 1058 uint32_t Value; 1059 } DW1; 1060 union 1061 { 1062 struct 1063 { 1064 uint32_t Reserved64 : __CODEGEN_BITFIELD( 0, 6) ; //!< Reserved 1065 uint32_t SoftwareExceptionEnable : __CODEGEN_BITFIELD( 7, 7) ; //!< Software Exception Enable 1066 uint32_t Reserved72 : __CODEGEN_BITFIELD( 8, 10) ; //!< Reserved 1067 uint32_t MaskStackExceptionEnable : __CODEGEN_BITFIELD(11, 11) ; //!< Mask Stack Exception Enable 1068 uint32_t Reserved76 : __CODEGEN_BITFIELD(12, 12) ; //!< Reserved 1069 uint32_t IllegalOpcodeExceptionEnable : __CODEGEN_BITFIELD(13, 13) ; //!< Illegal Opcode Exception Enable 1070 uint32_t Reserved78 : __CODEGEN_BITFIELD(14, 15) ; //!< Reserved 1071 uint32_t FloatingPointMode : __CODEGEN_BITFIELD(16, 16) ; //!< FLOATING_POINT_MODE 1072 uint32_t Reserved81 : __CODEGEN_BITFIELD(17, 17) ; //!< Reserved 1073 uint32_t SingleProgramFlow : __CODEGEN_BITFIELD(18, 18) ; //!< SINGLE_PROGRAM_FLOW 1074 uint32_t DenormMode : __CODEGEN_BITFIELD(19, 19) ; //!< DENORM_MODE 1075 uint32_t ThreadPreemption : __CODEGEN_BITFIELD(20, 20) ; //!< THREAD_PREEMPTION 1076 uint32_t Reserved85 : __CODEGEN_BITFIELD(21, 31) ; //!< Reserved 1077 }; 1078 uint32_t Value; 1079 } DW2; 1080 union 1081 { 1082 struct 1083 { 1084 uint32_t Reserved96 : __CODEGEN_BITFIELD( 0, 1) ; //!< Reserved 1085 uint32_t SamplerCount : __CODEGEN_BITFIELD( 2, 4) ; //!< SAMPLER_COUNT 1086 uint32_t SamplerStatePointer : __CODEGEN_BITFIELD( 5, 31) ; //!< Sampler State Pointer 1087 }; 1088 uint32_t Value; 1089 } DW3; 1090 union 1091 { 1092 struct 1093 { 1094 uint32_t BindingTableEntryCount : __CODEGEN_BITFIELD( 0, 4) ; //!< BINDING_TABLE_ENTRY_COUNT 1095 uint32_t BindingTablePointer : __CODEGEN_BITFIELD( 5, 20) ; //!< Binding Table Pointer 1096 uint32_t Reserved149 : __CODEGEN_BITFIELD(21, 31) ; //!< Reserved 1097 }; 1098 uint32_t Value; 1099 } DW4; 1100 union 1101 { 1102 struct 1103 { 1104 uint32_t NumberOfThreadsInGpgpuThreadGroup : __CODEGEN_BITFIELD( 0, 9) ; //!< Number of Threads in GPGPU Thread Group 1105 uint32_t Reserved170 : __CODEGEN_BITFIELD(10, 12) ; //!< Reserved 1106 uint32_t ThreadGroupForwardProgressGuarantee : __CODEGEN_BITFIELD(13, 13) ; //!< THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE 1107 uint32_t Reserved174 : __CODEGEN_BITFIELD(14, 15) ; //!< Reserved 1108 uint32_t SharedLocalMemorySize : __CODEGEN_BITFIELD(16, 20) ; //!< SHARED_LOCAL_MEMORY_SIZE 1109 uint32_t Reserved181 : __CODEGEN_BITFIELD(21, 21) ; //!< Reserved 1110 uint32_t RoundingMode : __CODEGEN_BITFIELD(22, 23) ; //!< ROUNDING_MODE 1111 uint32_t Reserved184 : __CODEGEN_BITFIELD(24, 25) ; //!< Reserved 1112 uint32_t ThreadGroupDispatchSize : __CODEGEN_BITFIELD(26, 27) ; //!< THREAD_GROUP_DISPATCH_SIZE 1113 uint32_t NumberOfBarriers : __CODEGEN_BITFIELD(28, 30) ; //!< NUMBER_OF_BARRIERS 1114 uint32_t BtdMode : __CODEGEN_BITFIELD(31, 31) ; //!< BTD_MODE 1115 }; 1116 uint32_t Value; 1117 } DW5; 1118 union 1119 { 1120 struct 1121 { 1122 uint32_t Reserved192 ; //!< Reserved 1123 }; 1124 uint32_t Value; 1125 } DW6; 1126 union 1127 { 1128 struct 1129 { 1130 uint32_t PreferredSlmAllocationSizePerSubslice : __CODEGEN_BITFIELD( 0, 3) ; //!< PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE 1131 uint32_t Reserved228 : __CODEGEN_BITFIELD( 4, 31) ; //!< Reserved 1132 }; 1133 uint32_t Value; 1134 } DW7; 1135 1136 //! \name Local enumerations 1137 1138 //! \brief FLOATING_POINT_MODE 1139 //! \details 1140 //! Specifies the floating point mode used by the dispatched thread. 1141 enum FLOATING_POINT_MODE 1142 { 1143 FLOATING_POINT_MODE_IEEE_754 = 0, //!< No additional details 1144 FLOATING_POINT_MODE_ALTERNATE = 1, //!< No additional details 1145 }; 1146 1147 //! \brief SINGLE_PROGRAM_FLOW 1148 //! \details 1149 //! Specifies whether the kernel program has a single program flow (SIMDnxm 1150 //! with m = 1) or multiple program flows (SIMDnxm with m > 1). 1151 enum SINGLE_PROGRAM_FLOW 1152 { 1153 SINGLE_PROGRAM_FLOW_MULTIPLE = 0, //!< No additional details 1154 SINGLE_PROGRAM_FLOW_SINGLE = 1, //!< No additional details 1155 }; 1156 1157 //! \brief DENORM_MODE 1158 //! \details 1159 //! This field specifies how Float denormalized numbers are handles in the 1160 //! dispatched thread. 1161 enum DENORM_MODE 1162 { 1163 DENORM_MODE_FTZ = 0, //!< Float denorms will be flushed to zero when appearing as inputs; denorms will never come out of instructions. Double precision float and half precision float numbers are not flushed to zero. 1164 DENORM_MODE_SETBYKERNEL = 1, //!< Denorms will be handled in by kernel. 1165 }; 1166 1167 //! \brief THREAD_PREEMPTION 1168 //! \details 1169 //! This field specifies whether, when dispatched, the thread is allowed to 1170 //! stop in middle on receiving mid-thread pre-emption request. 1171 enum THREAD_PREEMPTION 1172 { 1173 THREAD_PREEMPTION_DISABLE = 0, //!< Thread is pre-empted only in case of page-fault. 1174 THREAD_PREEMPTION_ENABLE = 1, //!< Thread is pre-empted on receiving pre-emption indication. 1175 }; 1176 1177 //! \brief SAMPLER_COUNT 1178 //! \details 1179 //! Specifies how many samplers (in multiples of 4) the kernel uses. Used 1180 //! only for prefetching the associated sampler state entries. 1181 enum SAMPLER_COUNT 1182 { 1183 SAMPLER_COUNT_NOSAMPLERSUSED = 0, //!< No additional details 1184 SAMPLER_COUNT_BETWEEN1AND4SAMPLERSUSED = 1, //!< No additional details 1185 SAMPLER_COUNT_BETWEEN5AND8SAMPLERSUSED = 2, //!< No additional details 1186 SAMPLER_COUNT_BETWEEN9AND12SAMPLERSUSED = 3, //!< No additional details 1187 SAMPLER_COUNT_BETWEEN13AND16SAMPLERSUSED = 4, //!< No additional details 1188 }; 1189 1190 //! \brief BINDING_TABLE_ENTRY_COUNT 1191 //! \details 1192 //! Specifies how many binding table entries the kernel uses. Used only for 1193 //! prefetching of the binding table entries and associated surface state. 1194 enum BINDING_TABLE_ENTRY_COUNT 1195 { 1196 BINDING_TABLE_ENTRY_COUNT_PREFETCHDISABLED = 0, //!< No additional details 1197 }; 1198 1199 //! \brief THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE 1200 //! \details 1201 //! Set by the kernel if TG requires synchronization at memory. If this bit 1202 //! is set, HW must enable the barrier when mid thread preemption is 1203 //! enabled. 1204 enum THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE 1205 { 1206 THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE_DISABLE = 0, //!< CFEG HW does not set an implicit barrier when WMTP is enabled 1207 THREAD_GROUP_FORWARD_PROGRESS_GUARANTEE_ENABLE = 1, //!< CFEG HW forces an implicit barrier when WMTP is enabled. 1208 }; 1209 1210 //! \brief SHARED_LOCAL_MEMORY_SIZE 1211 //! \details 1212 //! This field indicates how much Shared Local Memory the thread group 1213 //! requires. 1214 //! If the barriers are not enabled,HW will enable at least 1 barrier for 1215 //! Mid thread preemption to work. 1216 enum SHARED_LOCAL_MEMORY_SIZE 1217 { 1218 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES0K = 0, //!< No additional details 1219 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES1K = 1, //!< No additional details 1220 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES2K = 2, //!< No additional details 1221 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES4K = 3, //!< No additional details 1222 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES8K = 4, //!< No additional details 1223 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES16K = 5, //!< No additional details 1224 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES32K = 6, //!< No additional details 1225 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES64K = 7, //!< No additional details 1226 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES24K = 8, //!< No additional details 1227 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES48K = 9, //!< No additional details 1228 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES96K = 10, //!< No additional details 1229 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES128K = 11, //!< No additional details 1230 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES192K = 12, //!< No additional details 1231 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES256K = 13, //!< No additional details 1232 SHARED_LOCAL_MEMORY_SIZE_SLMENCODES384K = 14, //!< No additional details 1233 }; 1234 1235 enum ROUNDING_MODE 1236 { 1237 ROUNDING_MODE_RTNE = 0, //!< Round to Nearest Even 1238 ROUNDING_MODE_RU = 1, //!< Round toward +Infinity 1239 ROUNDING_MODE_RD = 2, //!< Round toward -Infinity 1240 ROUNDING_MODE_RTZ = 3, //!< Round toward Zero 1241 }; 1242 1243 //! \brief THREAD_GROUP_DISPATCH_SIZE 1244 //! \details 1245 //! Provides a mechanism for Software to tune the settings based on WLs 1246 //! to evenly distribute the threads across the entire m/c. 1247 //! The recommended settings is just a guidance and not a programming 1248 //! requirement. 1249 enum THREAD_GROUP_DISPATCH_SIZE 1250 { 1251 THREAD_GROUP_DISPATCH_SIZE_TGSIZE8 = 0, //!< The dispatch size is 8 thread groups. 1252 THREAD_GROUP_DISPATCH_SIZE_TGSIZE4 = 1, //!< The dispatch size is 4 thread groups. 1253 THREAD_GROUP_DISPATCH_SIZE_TGSIZE2 = 2, //!< The dispatch size is 2 thread groups. 1254 THREAD_GROUP_DISPATCH_SIZE_TGSIZE1 = 3, //!< The dispatch size is 1 thread groups. 1255 }; 1256 1257 //! \brief NUMBER_OF_BARRIERS 1258 //! \details 1259 //! Specifies number of barriers in the threadgroup. 1260 enum NUMBER_OF_BARRIERS 1261 { 1262 NUMBER_OF_BARRIERS_NONE = 0, //!< No additional details 1263 NUMBER_OF_BARRIERS_B1 = 1, //!< No additional details 1264 NUMBER_OF_BARRIERS_B2 = 2, //!< No additional details 1265 NUMBER_OF_BARRIERS_B4 = 3, //!< No additional details 1266 NUMBER_OF_BARRIERS_B8 = 4, //!< No additional details 1267 NUMBER_OF_BARRIERS_B16 = 5, //!< No additional details 1268 NUMBER_OF_BARRIERS_B24 = 6, //!< No additional details 1269 NUMBER_OF_BARRIERS_B32 = 7, //!< No additional details 1270 }; 1271 1272 //! \brief BTD_MODE 1273 //! \details 1274 //! If this field is valid, it means that the Compute pipeline is 1275 //! dispatching BTD threads. 1276 enum BTD_MODE 1277 { 1278 BTD_MODE_DISABLE = 0, //!< Normal thread dispatch 1279 BTD_MODE_ENABLE = 1, //!< When walker dispatched compute kernels either perform messages to the Bindless Thread Dispatch (BTD) shared function or Ray Tracing HW shared function, this bit must be enabled.When this bit is enabled, neither SLM nor barrier is available. 1280 }; 1281 1282 //! \brief PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE 1283 //! \details 1284 //! For products where SLM and Subslice L1 cacheshares a common, 1285 //! re-partitionable RAM, this field indicates the preferred SLM size per 1286 //! Subslice for this dispatch. The SLM size programmed here should be >= 1287 //! the per thread-group SLM size programmed in DW[5][20:16]. 1288 enum PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE 1289 { 1290 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K = 0, //!< No additional details 1291 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES16K = 1, //!< No additional details 1292 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES32K = 2, //!< No additional details 1293 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES64K = 3, //!< No additional details 1294 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K = 4, //!< No additional details 1295 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES128K = 5, //!< No additional details 1296 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES160K = 6, //!< No additional details 1297 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES192K = 7, //!< No additional details 1298 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES224K = 8, //!< No additional details 1299 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES256K = 9, //!< No additional details 1300 PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES384K = 10, //!< No additional details 1301 }; 1302 1303 //! \name Initializations 1304 1305 //! \brief Explicit member initialization function 1306 INTERFACE_DESCRIPTOR_DATA_CMD(); 1307 1308 static const size_t dwSize = 8; 1309 static const size_t byteSize = 32; 1310 } InterfaceDescriptor; 1311 1312 //! 1313 //! \brief POSTSYNC_DATA 1314 //! \details 1315 //! 1316 //! 1317 struct POSTSYNC_DATA_CMD 1318 { 1319 union 1320 { 1321 struct 1322 { 1323 uint32_t Operation : __CODEGEN_BITFIELD( 0, 1) ; //!< OPERATION 1324 uint32_t DataportPipelineFlush : __CODEGEN_BITFIELD( 2, 2) ; //!< Dataport Pipeline Flush 1325 uint32_t Reserved3 : __CODEGEN_BITFIELD( 3, 3) ; //!< Reserved 1326 uint32_t Mocs : __CODEGEN_BITFIELD( 4, 10) ; //!< MOCS 1327 uint32_t SystemMemoryFenceRequest : __CODEGEN_BITFIELD(11, 11) ; //!< System Memory Fence Request 1328 uint32_t DataportSubsliceCacheFlush : __CODEGEN_BITFIELD(12, 12) ; //!< Dataport Subslice Cache Flush 1329 uint32_t Reserved13 : __CODEGEN_BITFIELD(13, 31) ; //!< Reserved 1330 }; 1331 uint32_t Value; 1332 } DW0; 1333 union 1334 { 1335 struct 1336 { 1337 uint64_t DestinationAddress ; //!< Destination Address 1338 }; 1339 uint32_t Value[2]; 1340 } DW1_2; 1341 union 1342 { 1343 struct 1344 { 1345 uint64_t ImmediateData ; //!< Immediate Data 1346 }; 1347 uint32_t Value[2]; 1348 } DW3_4; 1349 1350 //! \name Local enumerations 1351 1352 enum OPERATION 1353 { 1354 OPERATION_NOWRITE = 0, //!< The Destination Address and Immediate Data fields are ignored. 1355 OPERATION_WRITEIMMEDIATEDATA = 1, //!< Writes 8 bytes (64 bits) of Immediate Data to the Destination Address. 1356 OPERATION_WRITETIMESTAMP = 3, //!< Writes 32 bytes (256bits) of Timestamp Data to the Destination Address.The Immediate Data field is ignored.The timestamp layout :[0] = 64b Context Timestamp Start[1] = 64b Global Timestamp Start[2] = 64b Context Timestamp End[3] = 64b Global Timestamp End 1357 }; 1358 1359 //! \name Initializations 1360 1361 //! \brief Explicit member initialization function 1362 POSTSYNC_DATA_CMD(); 1363 1364 static const size_t dwSize = 5; 1365 static const size_t byteSize = 20; 1366 } PostSync; 1367 1368 struct INLINE_DATA_CMD 1369 { 1370 uint32_t Value[8] = {0}; 1371 1372 //! \brief Explicit member initialization function INLINE_DATA_CMDCmd::COMPUTE_WALKER_CMD::INLINE_DATA_CMD1373 INLINE_DATA_CMD() {} 1374 static const size_t dwSize = 8; 1375 static const size_t byteSize = 32; 1376 } InlineData; 1377 1378 //! \name Local enumerations 1379 1380 enum CFE_SUBOPCODE_VARIANT 1381 { 1382 CFE_SUBOPCODE_VARIANT_STANDARD = 0, //!< No additional details 1383 CFE_SUBOPCODE_VARIANT_PASS1_RESUME = 1, //!< Resumption of Compute Walkers that has Thread groups only in Pass1 (EOT Preempt Saved), recorded in context image to continue execution after preemption. 1384 CFE_SUBOPCODE_VARIANT_PASS2_RESUME = 2, //!< Resumption of Compute Walkers that only has Pass2 (EOT NOT-RUN) thread groups recorded in context image to continue execution after preemption. 1385 CFE_SUBOPCODE_VARIANT_BTDPASS2 = 3, //!< This encoding is used by BTD over dispatched threads. This value is not saved in the Walker Context image 1386 CFE_SUBOPCODE_VARIANT_PASS1PASS2_RESUME = 4, //!< Resumption of Compute Walkers that has a Mix of Pass1 (EOT Preempt Saved) and Pass2 (EOT NOT-RUN) thread groups recorded in context image to continue execution after preemption. 1387 CFE_SUBOPCODE_VARIANT_TG_RESUME = 5, //!< Resumption of COMPUTE_WALKER that was preempted at a Thread group and has completed execution of previous TGs, recorded in context image to continue execution after preemption. 1388 CFE_SUBOPCODE_VARIANT_WDONE = 7, //!< Walker completed execution of all TGs. 1389 }; 1390 1391 enum CFE_SUBOPCODE 1392 { 1393 CFE_SUBOPCODE_COMPUTEWALKER = 2, //!< No additional details 1394 }; 1395 1396 enum COMPUTE_COMMAND_OPCODE 1397 { 1398 COMPUTE_COMMAND_OPCODE_NEWCFECOMMAND = 2, //!< No additional details 1399 }; 1400 1401 enum PIPELINE 1402 { 1403 PIPELINE_COMPUTE = 2, //!< No additional details 1404 }; 1405 1406 enum COMMAND_TYPE 1407 { 1408 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 1409 }; 1410 1411 //! \brief PARTITION_TYPE 1412 //! \details 1413 //! Specifies whether the command is executed by multiple partitions. 1414 //! When partitioned, the X or Y or Z dispatches are split at 1415 //! Partition Size boundaries. 1416 enum PARTITION_TYPE 1417 { 1418 PARTITION_TYPE_DISABLED = 0, //!< The command is not partitioned. Partition ID and Partition Size are ignored. 1419 PARTITION_TYPE_X = 1, //!< The command is partitioned in the X dimension. The X walk is between (PartitionID * PartitionSize) <= X < ((PartitionID+1)*PartitionSize). All Y and Z walks are performed in this partition. 1420 PARTITION_TYPE_Y = 2, //!< The command is partitioned in the Y dimension. The Y walk is between (PartitionID * PartitionSize) <= Y < ((PartitionID+1)*PartitionSize). All X and Z walks are performed in this partition. 1421 PARTITION_TYPE_Z = 3, //!< The command is partitioned in the Z dimension. The Z walk is between (PartitionID * PartitionSize) <= Z < ((PartitionID+1)*PartitionSize). All X and Y walks are performed in this partition. 1422 }; 1423 1424 //! \brief MESSAGE_SIMD 1425 //! \details 1426 //! Specifies the SIMD size of the messages used to access the local data. 1427 //! When the message size is less than the thread SIMD size, then the Local 1428 //! ID are batched so that the smaller message SIMD size keep full cache 1429 //! lines together in fused threads. 1430 enum MESSAGE_SIMD 1431 { 1432 MESSAGE_SIMD_SIMT16 = 1, //!< No additional details 1433 MESSAGE_SIMD_SIMT32 = 2, //!< No additional details 1434 }; 1435 1436 //! \brief TILE_LAYOUT 1437 //! \details 1438 //! Specifies whether 2D and 3D surfaces are stored in Linear or TileY 1439 //! layouts. The local ID values are batched together to keep full cache 1440 //! lines together in the same SIMD thread. 1441 enum TILE_LAYOUT 1442 { 1443 TILE_LAYOUT_LINEAR = 0, //!< No additional details 1444 TILE_LAYOUT_TILEY32BPE = 1, //!< No additional details 1445 TILE_LAYOUT_TILEY64BPE = 2, //!< No additional details 1446 TILE_LAYOUT_TILEY128BPE = 3, //!< No additional details 1447 }; 1448 1449 //! \brief WALK_ORDER 1450 //! \details 1451 //! Specifies which dimensions are the first and second priority order for 1452 //! binding together in SIMD threads. In the values below, 0 is the first 1453 //! priority and 1 is the second priority. 1454 enum WALK_ORDER 1455 { 1456 WALK_ORDER_WALK012 = 0, //!< Normal Linear walk order 1457 WALK_ORDER_WALK021 = 1, //!< No additional details 1458 WALK_ORDER_WALK102 = 2, //!< Normal TileY walk order 1459 WALK_ORDER_WALK120 = 3, //!< No additional details 1460 WALK_ORDER_WALK201 = 4, //!< No additional details 1461 WALK_ORDER_WALK210 = 5, //!< No additional details 1462 }; 1463 1464 //! \brief EMIT_LOCAL 1465 //! \details 1466 //! These bits identify whether the register payload for Local X/Y/Z 1467 //! indices will be present. Bit 26 is X, Bit 27 is Y, and Bit 28 is Z. 1468 //! Separate <a 1469 //! href="https:gfxspecs.intel.com/Predator/Home/Index/55403">GPGPU_LOCALID</a> 1470 //! register payloads aregenerated when the corresponding bit isset. 1471 //! If Generate Local ID is enabled, then the thread dispatcher generates 1472 //! the corresponding Local X/Y/Z index values, using the Local X/Y/Z 1473 //! Maximum values from DW6 of this command. For any enable bit that is not 1474 //! set, the corresponding Local ID will not be generated and that register 1475 //! will not be emitted into the per-thread payload. When an enable bit is 1476 //! not set, its corresponding Local Maximum value in DW6 must be 0. 1477 enum EMIT_LOCAL 1478 { 1479 EMIT_LOCAL_EMITNONE = 0, //!< No additional details 1480 EMIT_LOCAL_EMITX = 1, //!< No additional details 1481 EMIT_LOCAL_EMITXY = 3, //!< No additional details 1482 EMIT_LOCAL_EMITXYZ = 7, //!< No additional details 1483 }; 1484 1485 //! \brief SIMD_SIZE 1486 //! \details 1487 //! This field determines the size of the payload and the number of bits of 1488 //! the execution mask that are expected. The kernel pointed to by the 1489 //! interface descriptor should match the SIMD declared here. 1490 enum SIMD_SIZE 1491 { 1492 SIMD_SIZE_SIMT16 = 1, //!< 16 LSBs of the execution mask are used 1493 SIMD_SIZE_SIMT32 = 2, //!< 32 bits of execution mask used 1494 }; 1495 1496 //! \name Initializations 1497 1498 //! \brief Explicit member initialization function 1499 COMPUTE_WALKER_CMD(); 1500 1501 static const size_t dwSize = 40; 1502 static const size_t byteSize = 160; 1503 }; 1504 1505 //! 1506 //! \brief CFE_STATE 1507 //! \details 1508 //! Set the compute pipeline state. 1509 //! 1510 struct CFE_STATE_CMD 1511 { 1512 union 1513 { 1514 struct 1515 { 1516 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 1517 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 1518 uint32_t CfeSubopcodeVariant : __CODEGEN_BITFIELD(16, 17) ; //!< CFE_SUBOPCODE_VARIANT 1519 uint32_t CfeSubopcode : __CODEGEN_BITFIELD(18, 23) ; //!< CFE_SUBOPCODE 1520 uint32_t ComputeCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< COMPUTE_COMMAND_OPCODE 1521 uint32_t Pipeline : __CODEGEN_BITFIELD(27, 28) ; //!< PIPELINE 1522 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 1523 }; 1524 uint32_t Value; 1525 } DW0; 1526 union 1527 { 1528 struct 1529 { 1530 uint32_t Reserved32 : __CODEGEN_BITFIELD( 0, 9) ; //!< Reserved 1531 uint32_t ScratchSpaceBuffer : __CODEGEN_BITFIELD(10, 31) ; //!< Scratch Space Buffer 1532 }; 1533 uint32_t Value; 1534 } DW1; 1535 union 1536 { 1537 struct 1538 { 1539 uint32_t Reserved64 ; //!< Reserved 1540 }; 1541 uint32_t Value; 1542 } DW2; 1543 union 1544 { 1545 struct 1546 { 1547 uint32_t ControlsTheNumberOfStackidsForRayTracingSubsystem : __CODEGEN_BITFIELD( 0, 1) ; //!< CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM 1548 uint32_t Reserved98 : __CODEGEN_BITFIELD( 2, 9) ; //!< Reserved 1549 uint32_t LargeGrfThreadAdjustDisable : __CODEGEN_BITFIELD(10, 10) ; //!< LARGE_GRF_THREAD_ADJUST_DISABLE 1550 uint32_t ComputeOverdispatchDisable : __CODEGEN_BITFIELD(11, 11) ; //!< COMPUTE_OVERDISPATCH_DISABLE 1551 uint32_t ComputeDispatchAllWalkerEnable : __CODEGEN_BITFIELD(12, 12) ; //!< COMPUTE_DISPATCH_ALL_WALKER_ENABLE 1552 uint32_t Reserved109 : __CODEGEN_BITFIELD(13, 13) ; //!< Reserved 1553 uint32_t OverDispatchControl : __CODEGEN_BITFIELD(14, 15) ; //!< OVER_DISPATCH_CONTROL 1554 uint32_t MaximumNumberOfThreads : __CODEGEN_BITFIELD(16, 31) ; //!< Maximum Number of Threads 1555 }; 1556 uint32_t Value; 1557 } DW3; 1558 union 1559 { 1560 struct 1561 { 1562 uint32_t Reserved128 ; //!< Reserved 1563 }; 1564 uint32_t Value; 1565 } DW4; 1566 union 1567 { 1568 struct 1569 { 1570 uint32_t ResumeIndicatorDebugkey : __CODEGEN_BITFIELD( 0, 0) ; //!< Resume indicator debugkey 1571 uint32_t WalkerNumberDebugkey : __CODEGEN_BITFIELD( 1, 10) ; //!< Walker number debugkey 1572 uint32_t Reserved171 : __CODEGEN_BITFIELD(11, 31) ; //!< Reserved 1573 }; 1574 uint32_t Value; 1575 } DW5; 1576 1577 //! \name Local enumerations 1578 1579 enum CFE_SUBOPCODE_VARIANT 1580 { 1581 CFE_SUBOPCODE_VARIANT_STANDARD = 0, //!< No additional details 1582 }; 1583 1584 enum CFE_SUBOPCODE 1585 { 1586 CFE_SUBOPCODE_CFESTATE = 0, //!< No additional details 1587 }; 1588 1589 enum COMPUTE_COMMAND_OPCODE 1590 { 1591 COMPUTE_COMMAND_OPCODE_NEWCFECOMMAND = 2, //!< No additional details 1592 }; 1593 1594 enum PIPELINE 1595 { 1596 PIPELINE_COMPUTE = 2, //!< No additional details 1597 }; 1598 1599 enum COMMAND_TYPE 1600 { 1601 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 1602 }; 1603 1604 //! \brief CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM 1605 //! \details 1606 //! This field allows controlling the number stackIDs (i.e. #unique rays in 1607 //! the Ray Tracing subsytem. 1608 enum CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM 1609 { 1610 CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_2K = 0, //!< Number of stackIDs = 2048 1611 CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_1K = 1, //!< Number of stackIDs = 1024 1612 CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_512 = 2, //!< Number of stackIDs = 512 1613 CONTROLS_THE_NUMBER_OF_STACKIDS_FOR_RAY_TRACING_SUBSYSTEM_256 = 3, //!< Number of stackIDs = 256 1614 }; 1615 1616 //! \brief LARGE_GRF_THREAD_ADJUST_DISABLE 1617 //! \details 1618 //! When this bit is set, the thread dispatch logic will not cap the 1619 //! max_outstanding_threads based on the large grf mode. This is a Chicken 1620 //! bit. 1621 enum LARGE_GRF_THREAD_ADJUST_DISABLE 1622 { 1623 LARGE_GRF_THREAD_ADJUST_DISABLE_ENABLED = 0, //!< No additional details 1624 LARGE_GRF_THREAD_ADJUST_DISABLE_DISABLED = 1, //!< No additional details 1625 }; 1626 1627 //! \brief COMPUTE_OVERDISPATCH_DISABLE 1628 //! \details 1629 //! When this bit is set, the thread dispatch logic will disable over 1630 //! dispatching of threads to the DSS. 1631 enum COMPUTE_OVERDISPATCH_DISABLE 1632 { 1633 COMPUTE_OVERDISPATCH_DISABLE_ENABLED = 0, //!< No additional details 1634 COMPUTE_OVERDISPATCH_DISABLE_DISABLED = 1, //!< No additional details 1635 }; 1636 1637 //! \brief COMPUTE_DISPATCH_ALL_WALKER_ENABLE 1638 //! \details 1639 //! When this bit is set, the thread dispatch logic does a forced 1640 //! round-robin dispatch to all the enabled DSS in this context. 1641 enum COMPUTE_DISPATCH_ALL_WALKER_ENABLE 1642 { 1643 COMPUTE_DISPATCH_ALL_WALKER_ENABLE_DISABLED = 0, //!< No additional details 1644 COMPUTE_DISPATCH_ALL_WALKER_ENABLE_ENABLED = 1, //!< No additional details 1645 }; 1646 1647 //! \brief OVER_DISPATCH_CONTROL 1648 //! \details 1649 //! Enables the amount of GPGPU thread over dispatch. 1650 enum OVER_DISPATCH_CONTROL 1651 { 1652 OVER_DISPATCH_CONTROL_NONE = 0, //!< 0% overdispatch 1653 OVER_DISPATCH_CONTROL_LOW = 1, //!< 6.25% overdispatch 1654 OVER_DISPATCH_CONTROL_NORMAL = 2, //!< 12.5% overdispatch 1655 OVER_DISPATCH_CONTROL_HIGH = 3, //!< 25% overdispatch 1656 }; 1657 1658 //! \name Initializations 1659 1660 //! \brief Explicit member initialization function 1661 CFE_STATE_CMD(); 1662 1663 static const size_t dwSize = 6; 1664 static const size_t byteSize = 24; 1665 }; 1666 1667 //! 1668 //! \brief STATE_COMPUTE_MODE 1669 //! \details 1670 //! This is a non-pipeline state command and is a general compute 1671 //! programming state that can be shared from the top to bottom of the 1672 //! pipeline. 1673 //! 1674 struct STATE_COMPUTE_MODE_CMD 1675 { 1676 union 1677 { 1678 struct 1679 { 1680 uint32_t DwordLength : __CODEGEN_BITFIELD( 0, 7) ; //!< DWORD_LENGTH 1681 uint32_t Reserved8 : __CODEGEN_BITFIELD( 8, 15) ; //!< Reserved 1682 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23) ; //!< _3D_COMMAND_SUB_OPCODE 1683 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26) ; //!< _3D_COMMAND_OPCODE 1684 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28) ; //!< COMMAND_SUBTYPE 1685 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31) ; //!< COMMAND_TYPE 1686 }; 1687 uint32_t Value; 1688 } DW0; 1689 union 1690 { 1691 struct 1692 { 1693 uint32_t ZPassAsyncComputeThreadLimit : __CODEGEN_BITFIELD( 0, 2) ; //!< Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT 1694 uint32_t NpZAsyncThrottleSettings : __CODEGEN_BITFIELD( 3, 4) ; //!< NP_Z_ASYNC_THROTTLE_SETTINGS 1695 uint32_t Reserved37 : __CODEGEN_BITFIELD( 5, 6) ; //!< Reserved 1696 uint32_t AsyncComputeThreadLimit : __CODEGEN_BITFIELD( 7, 9) ; //!< ASYNC_COMPUTE_THREAD_LIMIT 1697 uint32_t Reserved42 : __CODEGEN_BITFIELD(10, 12) ; //!< Reserved 1698 uint32_t EuThreadSchedulingModeOverride : __CODEGEN_BITFIELD(13, 14) ; //!< EU_THREAD_SCHEDULING_MODE_OVERRIDE 1699 uint32_t LargeGrfMode : __CODEGEN_BITFIELD(15, 15) ; //!< LARGE_GRF_MODE 1700 uint32_t Mask1 : __CODEGEN_BITFIELD(16, 31) ; //!< Mask1 1701 }; 1702 uint32_t Value; 1703 } DW1; 1704 union 1705 { 1706 struct 1707 { 1708 uint32_t MidthreadPreemptionDelayTimer : __CODEGEN_BITFIELD( 0, 2) ; //!< MIDTHREAD_PREEMPTION_DELAY_TIMER 1709 uint32_t MidthreadPreemptionOverdispatchThreadGroupCount : __CODEGEN_BITFIELD( 3, 4) ; //!< MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT 1710 uint32_t MidthreadPreemptionOverdispatchTestMode : __CODEGEN_BITFIELD( 5, 5) ; //!< MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE 1711 uint32_t UavCoherencyMode : __CODEGEN_BITFIELD( 6, 6) ; //!< UAV_COHERENCY_MODE 1712 uint32_t Reserved71 : __CODEGEN_BITFIELD( 7, 10) ; //!< Reserved 1713 uint32_t MemoryAllocationForScratchAndMidthreadPreemptionBuffers : __CODEGEN_BITFIELD(11, 11) ; //!< MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS 1714 uint32_t Reserved76 : __CODEGEN_BITFIELD(12, 15) ; //!< Reserved 1715 uint32_t Mask2 : __CODEGEN_BITFIELD(16, 31) ; //!< Mask2 1716 }; 1717 uint32_t Value; 1718 } DW2; 1719 1720 //! \name Local enumerations 1721 1722 enum _3D_COMMAND_SUB_OPCODE 1723 { 1724 _3D_COMMAND_SUB_OPCODE_STATECOMPUTEMODE = 5, //!< No additional details 1725 }; 1726 1727 enum _3D_COMMAND_OPCODE 1728 { 1729 _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details 1730 }; 1731 1732 enum COMMAND_SUBTYPE 1733 { 1734 COMMAND_SUBTYPE_GFXPIPECOMMON = 0, //!< No additional details 1735 }; 1736 1737 enum COMMAND_TYPE 1738 { 1739 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 1740 }; 1741 1742 //! \brief Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT 1743 //! \details 1744 //! Specifies the maximum number of active Compute CS threads to run in a 1745 //! DSS when the 3D Pipe is active and a Z-pass is not running. When the 3D 1746 //! Pipe is not active or when a Z-pass is running, the maximum number of 1747 //! active Compute CS threads is specified by Maximum Number of 1748 //! Threads in CFE_STATE command. 1749 enum Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT 1750 { 1751 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX60 = 0, //!< Maximum of upto 1 thread per fused EU reserved for 3D. 1752 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX64 = 1, //!< No limit applied. Maximum Number of Threads is the only limit on Compute CS threads. 1753 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX56 = 2, //!< Maximum of 1 thread per fused EU reserved for 3D . 1754 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX48 = 3, //!< Maximum of 2 thread per fused EU reserved for 3D . 1755 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX40 = 4, //!< Maximum of 3 thread per fused EU reserved for 3D . 1756 Z_PASS_ASYNC_COMPUTE_THREAD_LIMIT_MAX32 = 5, //!< Maximum of 4 thread per fused EU reserved for 3D . 1757 }; 1758 1759 enum NP_Z_ASYNC_THROTTLE_SETTINGS 1760 { 1761 NP_Z_ASYNC_THROTTLE_SETTINGS_UNNAMED0 = 0, //!< Use the same settings as the Pixel shader Async compute settings i.e bits[9:7] of this DW. 1762 NP_Z_ASYNC_THROTTLE_SETTINGS_MAX32 = 1, //!< Maximum of 4 thread per fused EU reserved for 3D . 1763 NP_Z_ASYNC_THROTTLE_SETTINGS_MAX40 = 2, //!< Maximum of 3 thread per fused EU reserved for 3D . 1764 NP_Z_ASYNC_THROTTLE_SETTINGS_MAX48 = 3, //!< Maximum of 2 thread per fused EU reserved for 3D . 1765 }; 1766 1767 //! \brief ASYNC_COMPUTE_THREAD_LIMIT 1768 //! \details 1769 //! Specifies the maximum number of active Compute CS threads to run in a 1770 //! DSS when the 3D Pipe is active and a Z-pass is not running. When the 3D 1771 //! Pipe is not active or when a Z-pass is running, the maximum number of 1772 //! active Compute CS threads is specified by Maximum Number of 1773 //! Threads in CFE_STATE command. 1774 enum ASYNC_COMPUTE_THREAD_LIMIT 1775 { 1776 ASYNC_COMPUTE_THREAD_LIMIT_DISABLED = 0, //!< No limit applied. Maximum Number of Threads is the only limit on Compute CS threads. 1777 ASYNC_COMPUTE_THREAD_LIMIT_MAX2 = 1, //!< Maximum of 2 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 1 thread per EU row. 1778 ASYNC_COMPUTE_THREAD_LIMIT_MAX8 = 2, //!< Maximum of 8 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 1 thread per EU . 1779 ASYNC_COMPUTE_THREAD_LIMIT_MAX16 = 3, //!< Maximum of 16 EU threads per DSS, when 3D Pipe is active. This sets the Async Compute thread limit to about 2 threads per EU . 1780 ASYNC_COMPUTE_THREAD_LIMIT_MAX24 = 4, //!< Maximum of 24 EU threads per DSS, when 3D Pipe is active. 1781 ASYNC_COMPUTE_THREAD_LIMIT_MAX32 = 5, //!< Maximum of 32 EU threads per DSS, when 3D Pipe is active. 1782 ASYNC_COMPUTE_THREAD_LIMIT_MAX40 = 6, //!< Maximum of 40 EU threads per DSS, when 3D Pipe is active. 1783 ASYNC_COMPUTE_THREAD_LIMIT_MAX48 = 7, //!< Maximum of 48 EU threads per DSS, when 3D Pipe is active. 1784 }; 1785 1786 //! \brief EU_THREAD_SCHEDULING_MODE_OVERRIDE 1787 //! \details 1788 //! Override the thread scheduling policy in EU. 1789 enum EU_THREAD_SCHEDULING_MODE_OVERRIDE 1790 { 1791 EU_THREAD_SCHEDULING_MODE_OVERRIDE_HWDEFAULT = 0, //!< No override - HW selects optimal scheduling policy. For DGT and PVC, HW uses Oldest First scheduling. 1792 EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDESTFIRST = 1, //!< EU will always schedule the oldest ready thread. 1793 EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUNDROBIN = 2, //!< EU will schedule threads in a round-robin manner, switching to the next ready thread every cycle. 1794 EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALLBASEDROUNDROBIN = 3, //!< EU will schedule threads in a round-robin manner, but scheduling will only switch when the current thread is stalled due to dependency. 1795 }; 1796 1797 //! \brief LARGE_GRF_MODE 1798 //! \details 1799 //! This bit controls the Large GRF Mode Vs Regular GRF Mode in Execution 1800 //! Units. 1801 enum LARGE_GRF_MODE 1802 { 1803 LARGE_GRF_MODE_UNNAMED0 = 0, //!< Regular GRF mode of operation. 1804 LARGE_GRF_MODE_UNNAMED1 = 1, //!< Large GRF mode of operation. 1805 }; 1806 1807 //! \brief MIDTHREAD_PREEMPTION_DELAY_TIMER 1808 //! \details 1809 //! The delay timer gives the option of giving time for threads that are on 1810 //! the verge of exiting to completely exit thereby avoiding saving the 1811 //! thread state. 1812 enum MIDTHREAD_PREEMPTION_DELAY_TIMER 1813 { 1814 MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL0 = 0, //!< Mid thread preemption event is signalled to the EU as soon as CFEG receives it from CS. 1815 MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL50 = 1, //!< Mid thread preemption timer of 50usec. 1816 MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL100 = 2, //!< Mid thread preemption timer of 100usec 1817 MIDTHREAD_PREEMPTION_DELAY_TIMER_MTPTIMERVAL150 = 3, //!< Mid thread preemption timer of 150usec 1818 }; 1819 1820 //! \brief MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT 1821 //! \details 1822 //! The overdispatch TG buffer a.k.a ODB buffer stores the XYZ ID of the 1823 //! TG that were dispatched by HW but did not land on the EU after the 1824 //! preemption is signaled. This buffer is in PPGGT space and it has to be 1825 //! managed by the CFEG to avoid overflowing. 1826 //! The ODB buffer size is programmed based on the Physical machine 1827 //! size. 1828 enum MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT 1829 { 1830 MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM2 = 0, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 2. 1831 MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM4 = 1, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 4. 1832 MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM8 = 2, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 8 1833 MIDTHREAD_PREEMPTION_OVERDISPATCH_THREAD_GROUP_COUNT_ODTGM16 = 3, //!< HW Caps the Over dispatched TG count in ODB buffer to MAX_NUM_THRDS / 16 1834 }; 1835 1836 //! \brief MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE 1837 //! \details 1838 //! This bit provides a mechanism to limit the number of ODB TGs to a 1839 //! much smaller number in simulation. 1840 enum MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE 1841 { 1842 MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE_REGULAR = 0, //!< Regular mode operation. The MAX_ODB_CNT is derived from the value programmed in bits [4:3] 1843 MIDTHREAD_PREEMPTION_OVERDISPATCH_TEST_MODE_TESTMODE = 1, //!< Caps the MAX_ODB_CNT to 64 . 1844 }; 1845 1846 //! \brief UAV_COHERENCY_MODE 1847 //! \details 1848 //! This field controls whether UAV operations in the HW will flush the 1849 //! L1 Dataport Cache or only drain the dataport pipe. If UAV's are cached, 1850 //! SW must set this bit to ensure coherency of UAV's that are made coherent 1851 //! through UAV barrier and UAV resource changes with RESOURCE_BARRIER(L1 1852 //! Dataport UAV Flush). 1853 enum UAV_COHERENCY_MODE 1854 { 1855 UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE = 0, //!< Untyped L1 is neither flushed or invalidated for both UAV Barrier Coherency and BARRIER_RESOURCE with L1 Dataport UAV Flush. 1856 UAV_COHERENCY_MODE_FLUSH_DATAPORTL1 = 1, //!< Untyped L1 is flushed,for both UAV Barrier Coherency and BARRIER_RESOURCE with L1 Dataport UAV Flush. 1857 }; 1858 1859 //! \brief MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS 1860 //! \details 1861 //! This bit only applies for Compute-only contexts initiated from 1862 //! CCS. 1863 //! If set to 1, HW uses the Virtual Subslice ID instead of the Physical 1864 //! Subslice ID to index the scratch and Midthread Preemption Thread 1865 //! statebuffers (TSB). This reduces the memory footprint when running 1866 //! multiple compute contexts. 1867 enum MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS 1868 { 1869 MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS_FULL = 0, //!< SW allocates the MAX memory (full GPU size) for scratch and Midthread Preemption buffers (TSB) for all CCS contexts 1870 MEMORY_ALLOCATION_FOR_SCRATCH_AND_MIDTHREAD_PREEMPTION_BUFFERS_MIN = 1, //!< SW allocates the minimum required memory for scratch and Midthread Preemption buffers (TSB) based on the number of Subslices assigned to this CCS context as defined in the CCS_MODE register . 1871 }; 1872 1873 //! \name Initializations 1874 1875 //! \brief Explicit member initialization function 1876 STATE_COMPUTE_MODE_CMD(); 1877 1878 static const size_t dwSize = 3; 1879 static const size_t byteSize = 12; 1880 }; 1881 1882 //! 1883 //! \brief PALETTE_ENTRY 1884 //! \details 1885 //! 1886 //! 1887 struct PALETTE_ENTRY_CMD 1888 { 1889 union 1890 { 1891 struct 1892 { 1893 uint32_t Blue : __CODEGEN_BITFIELD(0, 7); //!< Blue 1894 uint32_t Green : __CODEGEN_BITFIELD(8, 15); //!< Green 1895 uint32_t Red : __CODEGEN_BITFIELD(16, 23); //!< Red 1896 uint32_t Alpha : __CODEGEN_BITFIELD(24, 31); //!< Alpha 1897 }; 1898 uint32_t Value; 1899 } DW0; 1900 1901 //! \name Local enumerations 1902 1903 //! \name Initializations 1904 1905 //! \brief Explicit member initialization function 1906 PALETTE_ENTRY_CMD(); 1907 1908 static const size_t dwSize = 1; 1909 static const size_t byteSize = 4; 1910 }; 1911 1912 //! 1913 //! \brief GPGPU_CSR_BASE_ADDRESS 1914 //! \details 1915 //! The GPGPU_CSR_BASE_ADDRESS command sets the base pointers for EU and L3 1916 //! to Context Save and Restore EU State and SLM for GPGPU mid-thread 1917 //! preemption. 1918 //! 1919 //! Execution of this command causes a full pipeline flush, thus its use 1920 //! should be minimized for higher performance. State and instruction caches 1921 //! are flushed on completion of the flush. 1922 //! 1923 //! SW must always program PIPE_CONTROL with "CS Stall" and "Render Target 1924 //! Cache Flush Enable" set prior to programming GPGPU_CSR_BASE_ADDRESS 1925 //! command for GPGPU workloads i.e when pipeline select is GPGPU via 1926 //! PIPELINE_SELECT command. This is required to achieve better GPGPU 1927 //! preemption latencies for certain programming sequences. If programming 1928 //! PIPE_CONTROL has performance implications then preemption latencies can 1929 //! be trade off against performance by not implementing this programming 1930 //! note. 1931 //! 1932 struct GPGPU_CSR_BASE_ADDRESS_CMD 1933 { 1934 union 1935 { 1936 struct 1937 { 1938 uint32_t DwordLength : __CODEGEN_BITFIELD(0, 7); //!< DWORD_LENGTH 1939 uint32_t Reserved8 : __CODEGEN_BITFIELD(8, 15); //!< Reserved 1940 uint32_t _3DCommandSubOpcode : __CODEGEN_BITFIELD(16, 23); //!< _3D_COMMAND_SUB_OPCODE 1941 uint32_t _3DCommandOpcode : __CODEGEN_BITFIELD(24, 26); //!< _3D_COMMAND_OPCODE 1942 uint32_t CommandSubtype : __CODEGEN_BITFIELD(27, 28); //!< COMMAND_SUBTYPE 1943 uint32_t CommandType : __CODEGEN_BITFIELD(29, 31); //!< COMMAND_TYPE 1944 }; 1945 uint32_t Value; 1946 } DW0; 1947 union 1948 { 1949 struct 1950 { 1951 uint64_t Reserved32 : __CODEGEN_BITFIELD(0, 11); //!< Reserved 1952 uint64_t GpgpuCsrBaseAddress : __CODEGEN_BITFIELD(12, 63); //!< GPGPU CSR Base Address 1953 }; 1954 uint32_t Value[2]; 1955 } DW1_2; 1956 1957 //! \name Local enumerations 1958 1959 enum _3D_COMMAND_SUB_OPCODE 1960 { 1961 _3D_COMMAND_SUB_OPCODE_GPGPUCSRBASEADDRESS = 4, //!< No additional details 1962 }; 1963 1964 enum _3D_COMMAND_OPCODE 1965 { 1966 _3D_COMMAND_OPCODE_GFXPIPENONPIPELINED = 1, //!< No additional details 1967 }; 1968 1969 enum COMMAND_SUBTYPE 1970 { 1971 COMMAND_SUBTYPE_GFXPIPECOMMON = 0, //!< No additional details 1972 }; 1973 1974 enum COMMAND_TYPE 1975 { 1976 COMMAND_TYPE_GFXPIPE = 3, //!< No additional details 1977 }; 1978 1979 //! \name Initializations 1980 1981 //! \brief Explicit member initialization function 1982 GPGPU_CSR_BASE_ADDRESS_CMD(); 1983 1984 static const size_t dwSize = 3; 1985 static const size_t byteSize = 12; 1986 }; 1987 }; 1988 } // namespace xe2_hpg_next 1989 } // namespace render 1990 } // namespace mhw 1991 #pragma pack() 1992 1993 #endif // __MHW_RENDER_HWCMD_XE2_HPG_NEXT_H__