1 /* 2 * Copyright © 2024 Intel Corporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6 #ifndef ELK_EU_OPCODES_H 7 #define ELK_EU_OPCODES_H 8 9 #ifdef __cplusplus 10 extern "C" { 11 #endif 12 13 enum elk_opcode { 14 /* These are the actual hardware instructions. */ 15 ELK_OPCODE_ILLEGAL, 16 ELK_OPCODE_MOV, 17 ELK_OPCODE_SEL, 18 ELK_OPCODE_MOVI, /**< G45+ */ 19 ELK_OPCODE_NOT, 20 ELK_OPCODE_AND, 21 ELK_OPCODE_OR, 22 ELK_OPCODE_XOR, 23 ELK_OPCODE_SHR, 24 ELK_OPCODE_SHL, 25 ELK_OPCODE_DIM, /**< Gfx7.5 only */ 26 ELK_OPCODE_SMOV, /**< Gfx8+ */ 27 ELK_OPCODE_ASR, 28 ELK_OPCODE_CMP, 29 ELK_OPCODE_CMPN, 30 ELK_OPCODE_CSEL, /**< Gfx8+ */ 31 ELK_OPCODE_F32TO16, /**< Gfx7 only */ 32 ELK_OPCODE_F16TO32, /**< Gfx7 only */ 33 ELK_OPCODE_BFREV, /**< Gfx7+ */ 34 ELK_OPCODE_BFE, /**< Gfx7+ */ 35 ELK_OPCODE_BFI1, /**< Gfx7+ */ 36 ELK_OPCODE_BFI2, /**< Gfx7+ */ 37 ELK_OPCODE_JMPI, 38 ELK_OPCODE_BRD, /**< Gfx7+ */ 39 ELK_OPCODE_IF, 40 ELK_OPCODE_IFF, /**< Pre-Gfx6 */ 41 ELK_OPCODE_BRC, /**< Gfx7+ */ 42 ELK_OPCODE_ELSE, 43 ELK_OPCODE_ENDIF, 44 ELK_OPCODE_DO, /**< Pre-Gfx6 */ 45 ELK_OPCODE_CASE, /**< Gfx6 only */ 46 ELK_OPCODE_WHILE, 47 ELK_OPCODE_BREAK, 48 ELK_OPCODE_CONTINUE, 49 ELK_OPCODE_HALT, 50 ELK_OPCODE_CALLA, /**< Gfx7.5+ */ 51 ELK_OPCODE_MSAVE, /**< Pre-Gfx6 */ 52 ELK_OPCODE_CALL, /**< Gfx6+ */ 53 ELK_OPCODE_MREST, /**< Pre-Gfx6 */ 54 ELK_OPCODE_RET, /**< Gfx6+ */ 55 ELK_OPCODE_PUSH, /**< Pre-Gfx6 */ 56 ELK_OPCODE_FORK, /**< Gfx6 only */ 57 ELK_OPCODE_GOTO, /**< Gfx8+ */ 58 ELK_OPCODE_POP, /**< Pre-Gfx6 */ 59 ELK_OPCODE_WAIT, 60 ELK_OPCODE_SEND, 61 ELK_OPCODE_SENDC, 62 ELK_OPCODE_MATH, /**< Gfx6+ */ 63 ELK_OPCODE_ADD, 64 ELK_OPCODE_MUL, 65 ELK_OPCODE_AVG, 66 ELK_OPCODE_FRC, 67 ELK_OPCODE_RNDU, 68 ELK_OPCODE_RNDD, 69 ELK_OPCODE_RNDE, 70 ELK_OPCODE_RNDZ, 71 ELK_OPCODE_MAC, 72 ELK_OPCODE_MACH, 73 ELK_OPCODE_LZD, 74 ELK_OPCODE_FBH, /**< Gfx7+ */ 75 ELK_OPCODE_FBL, /**< Gfx7+ */ 76 ELK_OPCODE_CBIT, /**< Gfx7+ */ 77 ELK_OPCODE_ADDC, /**< Gfx7+ */ 78 ELK_OPCODE_SUBB, /**< Gfx7+ */ 79 ELK_OPCODE_SAD2, 80 ELK_OPCODE_SADA2, 81 ELK_OPCODE_DP4, 82 ELK_OPCODE_DPH, 83 ELK_OPCODE_DP3, 84 ELK_OPCODE_DP2, 85 ELK_OPCODE_LINE, 86 ELK_OPCODE_PLN, /**< G45+ */ 87 ELK_OPCODE_MAD, /**< Gfx6+ */ 88 ELK_OPCODE_LRP, /**< Gfx6+ */ 89 ELK_OPCODE_MADM, /**< Gfx8+ */ 90 ELK_OPCODE_NENOP, /**< G45 only */ 91 ELK_OPCODE_NOP, 92 93 NUM_ELK_OPCODES, 94 95 /* These are compiler backend opcodes that get translated into other 96 * instructions. 97 */ 98 ELK_FS_OPCODE_FB_WRITE = NUM_ELK_OPCODES, 99 100 /** 101 * Same as ELK_FS_OPCODE_FB_WRITE but expects its arguments separately as 102 * individual sources instead of as a single payload blob. The 103 * position/ordering of the arguments are defined by the enum 104 * fb_write_logical_srcs. 105 */ 106 ELK_FS_OPCODE_FB_WRITE_LOGICAL, 107 108 ELK_FS_OPCODE_REP_FB_WRITE, 109 110 ELK_SHADER_OPCODE_RCP, 111 ELK_SHADER_OPCODE_RSQ, 112 ELK_SHADER_OPCODE_SQRT, 113 ELK_SHADER_OPCODE_EXP2, 114 ELK_SHADER_OPCODE_LOG2, 115 ELK_SHADER_OPCODE_POW, 116 ELK_SHADER_OPCODE_INT_QUOTIENT, 117 ELK_SHADER_OPCODE_INT_REMAINDER, 118 ELK_SHADER_OPCODE_SIN, 119 ELK_SHADER_OPCODE_COS, 120 121 /** 122 * A generic "send" opcode. The first source is the descriptor and 123 * the second source is the message payload. 124 */ 125 ELK_SHADER_OPCODE_SEND, 126 127 /** 128 * An "undefined" write which does nothing but indicates to liveness that 129 * we don't care about any values in the register which predate this 130 * instruction. Used to prevent partial writes from causing issues with 131 * live ranges. 132 */ 133 ELK_SHADER_OPCODE_UNDEF, 134 135 /** 136 * Texture sampling opcodes. 137 * 138 * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 139 * opcode but instead of taking a single payload blob they expect their 140 * arguments separately as individual sources. The position/ordering of the 141 * arguments are defined by the enum tex_logical_srcs. 142 */ 143 ELK_SHADER_OPCODE_TEX, 144 ELK_SHADER_OPCODE_TEX_LOGICAL, 145 ELK_SHADER_OPCODE_TXD, 146 ELK_SHADER_OPCODE_TXD_LOGICAL, 147 ELK_SHADER_OPCODE_TXF, 148 ELK_SHADER_OPCODE_TXF_LOGICAL, 149 ELK_SHADER_OPCODE_TXF_LZ, 150 ELK_SHADER_OPCODE_TXL, 151 ELK_SHADER_OPCODE_TXL_LOGICAL, 152 ELK_SHADER_OPCODE_TXL_LZ, 153 ELK_SHADER_OPCODE_TXS, 154 ELK_SHADER_OPCODE_TXS_LOGICAL, 155 ELK_FS_OPCODE_TXB, 156 ELK_FS_OPCODE_TXB_LOGICAL, 157 ELK_SHADER_OPCODE_TXF_CMS, 158 ELK_SHADER_OPCODE_TXF_CMS_LOGICAL, 159 ELK_SHADER_OPCODE_TXF_CMS_W, 160 ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL, 161 ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL, 162 ELK_SHADER_OPCODE_TXF_UMS, 163 ELK_SHADER_OPCODE_TXF_UMS_LOGICAL, 164 ELK_SHADER_OPCODE_TXF_MCS, 165 ELK_SHADER_OPCODE_TXF_MCS_LOGICAL, 166 ELK_SHADER_OPCODE_LOD, 167 ELK_SHADER_OPCODE_LOD_LOGICAL, 168 ELK_SHADER_OPCODE_TG4, 169 ELK_SHADER_OPCODE_TG4_LOGICAL, 170 ELK_SHADER_OPCODE_TG4_OFFSET, 171 ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL, 172 ELK_SHADER_OPCODE_SAMPLEINFO, 173 ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL, 174 175 ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL, 176 177 /** 178 * Combines multiple sources of size 1 into a larger virtual GRF. 179 * For example, parameters for a send-from-GRF message. Or, updating 180 * channels of a size 4 VGRF used to store vec4s such as texturing results. 181 * 182 * This will be lowered into MOVs from each source to consecutive offsets 183 * of the destination VGRF. 184 * 185 * src[0] may be BAD_FILE. If so, the lowering pass skips emitting the MOV, 186 * but still reserves the first channel of the destination VGRF. This can be 187 * used to reserve space for, say, a message header set up by the generators. 188 */ 189 ELK_SHADER_OPCODE_LOAD_PAYLOAD, 190 191 /** 192 * Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this 193 * acts intra-channel, obtaining the final value for each channel by 194 * combining the sources values for the same channel, the first source 195 * occupying the lowest bits and the last source occupying the highest 196 * bits. 197 */ 198 ELK_FS_OPCODE_PACK, 199 200 /** 201 * Typed and untyped surface access opcodes. 202 * 203 * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 204 * opcode but instead of taking a single payload blob they expect their 205 * arguments separately as individual sources: 206 * 207 * Source 0: [required] Surface coordinates. 208 * Source 1: [optional] Operation source. 209 * Source 2: [required] Surface index. 210 * Source 3: [required] Number of coordinate components (as UD immediate). 211 * Source 4: [required] Opcode-specific control immediate, same as source 2 212 * of the matching non-LOGICAL opcode. 213 */ 214 ELK_VEC4_OPCODE_UNTYPED_ATOMIC, 215 ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, 216 ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ, 217 ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, 218 ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE, 219 ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, 220 221 ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, 222 ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL, 223 224 /** 225 * Untyped A64 surface access opcodes. 226 * 227 * Source 0: 64-bit address 228 * Source 1: Operational source 229 * Source 2: [required] Opcode-specific control immediate, same as source 2 230 * of the matching non-LOGICAL opcode. 231 */ 232 ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL, 233 ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL, 234 ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL, 235 ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL, 236 ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL, 237 ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, 238 ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, 239 ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, 240 241 ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, 242 ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, 243 ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, 244 245 ELK_SHADER_OPCODE_RND_MODE, 246 ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE, 247 248 /** 249 * Byte scattered write/read opcodes. 250 * 251 * LOGICAL opcodes are eventually translated to the matching non-LOGICAL 252 * opcode, but instead of taking a single payload blog they expect their 253 * arguments separately as individual sources, like untyped write/read. 254 */ 255 ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, 256 ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, 257 ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL, 258 ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL, 259 260 /** 261 * Memory fence messages. 262 * 263 * Source 0: Must be register g0, used as header. 264 * Source 1: Immediate bool to indicate whether control is returned to the 265 * thread only after the fence has been honored. 266 * Source 2: Immediate byte indicating which memory to fence. Zero means 267 * global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only). 268 * 269 * Vec4 backend only uses Source 0. 270 */ 271 ELK_SHADER_OPCODE_MEMORY_FENCE, 272 273 /** 274 * Scheduling-only fence. 275 * 276 * Sources can be used to force a stall until the registers in those are 277 * available. This might generate MOVs or SYNC_NOPs (Gfx12+). 278 */ 279 ELK_FS_OPCODE_SCHEDULING_FENCE, 280 281 ELK_SHADER_OPCODE_GFX4_SCRATCH_READ, 282 ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE, 283 ELK_SHADER_OPCODE_GFX7_SCRATCH_READ, 284 285 ELK_SHADER_OPCODE_SCRATCH_HEADER, 286 287 /** 288 * Gfx8+ SIMD8 URB messages. 289 */ 290 ELK_SHADER_OPCODE_URB_READ_LOGICAL, 291 ELK_SHADER_OPCODE_URB_WRITE_LOGICAL, 292 293 /** 294 * Return the index of the first enabled live channel and assign it to 295 * to the first component of the destination. Frequently used as input 296 * for the BROADCAST pseudo-opcode. 297 */ 298 ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL, 299 300 /** 301 * Return the index of the last enabled live channel and assign it to 302 * the first component of the destination. 303 */ 304 ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL, 305 306 /** 307 * Return the current execution mask in the specified flag subregister. 308 * Can be CSE'ed more easily than a plain MOV from the ce0 ARF register. 309 */ 310 ELK_FS_OPCODE_LOAD_LIVE_CHANNELS, 311 312 /** 313 * Pick the channel from its first source register given by the index 314 * specified as second source. Useful for variable indexing of surfaces. 315 * 316 * Note that because the result of this instruction is by definition 317 * uniform and it can always be splatted to multiple channels using a 318 * scalar regioning mode, only the first channel of the destination region 319 * is guaranteed to be updated, which implies that BROADCAST instructions 320 * should usually be marked force_writemask_all. 321 */ 322 ELK_SHADER_OPCODE_BROADCAST, 323 324 /* Pick the channel from its first source register given by the index 325 * specified as second source. 326 * 327 * This is similar to the BROADCAST instruction except that it takes a 328 * dynamic index and potentially puts a different value in each output 329 * channel. 330 */ 331 ELK_SHADER_OPCODE_SHUFFLE, 332 333 /* Select between src0 and src1 based on channel enables. 334 * 335 * This instruction copies src0 into the enabled channels of the 336 * destination and copies src1 into the disabled channels. 337 */ 338 ELK_SHADER_OPCODE_SEL_EXEC, 339 340 /* This turns into an align16 mov from src0 to dst with a swizzle 341 * provided as an immediate in src1. 342 */ 343 ELK_SHADER_OPCODE_QUAD_SWIZZLE, 344 345 /* Take every Nth element in src0 and broadcast it to the group of N 346 * channels in which it lives in the destination. The offset within the 347 * cluster is given by src1 and the cluster size is given by src2. 348 */ 349 ELK_SHADER_OPCODE_CLUSTER_BROADCAST, 350 351 ELK_SHADER_OPCODE_GET_BUFFER_SIZE, 352 353 ELK_SHADER_OPCODE_INTERLOCK, 354 355 /** Target for a HALT 356 * 357 * All HALT instructions in a shader must target the same jump point and 358 * that point is denoted by a HALT_TARGET instruction. 359 */ 360 ELK_SHADER_OPCODE_HALT_TARGET, 361 362 ELK_VEC4_OPCODE_MOV_BYTES, 363 ELK_VEC4_OPCODE_PACK_BYTES, 364 ELK_VEC4_OPCODE_UNPACK_UNIFORM, 365 ELK_VEC4_OPCODE_DOUBLE_TO_F32, 366 ELK_VEC4_OPCODE_DOUBLE_TO_D32, 367 ELK_VEC4_OPCODE_DOUBLE_TO_U32, 368 ELK_VEC4_OPCODE_TO_DOUBLE, 369 ELK_VEC4_OPCODE_PICK_LOW_32BIT, 370 ELK_VEC4_OPCODE_PICK_HIGH_32BIT, 371 ELK_VEC4_OPCODE_SET_LOW_32BIT, 372 ELK_VEC4_OPCODE_SET_HIGH_32BIT, 373 ELK_VEC4_OPCODE_MOV_FOR_SCRATCH, 374 ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS, 375 376 ELK_FS_OPCODE_DDX_COARSE, 377 ELK_FS_OPCODE_DDX_FINE, 378 /** 379 * Compute dFdy(), dFdyCoarse(), or dFdyFine(). 380 */ 381 ELK_FS_OPCODE_DDY_COARSE, 382 ELK_FS_OPCODE_DDY_FINE, 383 ELK_FS_OPCODE_LINTERP, 384 ELK_FS_OPCODE_PIXEL_X, 385 ELK_FS_OPCODE_PIXEL_Y, 386 ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 387 ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4, 388 ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, 389 ELK_FS_OPCODE_SET_SAMPLE_ID, 390 ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT, 391 ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE, 392 ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, 393 ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, 394 395 ELK_VEC4_VS_OPCODE_URB_WRITE, 396 ELK_VS_OPCODE_PULL_CONSTANT_LOAD, 397 ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7, 398 399 ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2, 400 401 /** 402 * Write geometry shader output data to the URB. 403 * 404 * Unlike ELK_VEC4_VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from 405 * R0 to the first MRF. This allows the geometry shader to override the 406 * "Slot {0,1} Offset" fields in the message header. 407 */ 408 ELK_VEC4_GS_OPCODE_URB_WRITE, 409 410 /** 411 * Write geometry shader output data to the URB and request a new URB 412 * handle (gfx6). 413 * 414 * This opcode doesn't do an implied move from R0 to the first MRF. 415 */ 416 ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE, 417 418 /** 419 * Terminate the geometry shader thread by doing an empty URB write. 420 * 421 * This opcode doesn't do an implied move from R0 to the first MRF. This 422 * allows the geometry shader to override the "GS Number of Output Vertices 423 * for Slot {0,1}" fields in the message header. 424 */ 425 ELK_GS_OPCODE_THREAD_END, 426 427 /** 428 * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header. 429 * 430 * - dst is the MRF containing the message header. 431 * 432 * - src0.x indicates which portion of the URB should be written to (e.g. a 433 * vertex number) 434 * 435 * - src1 is an immediate multiplier which will be applied to src0 436 * (e.g. the size of a single vertex in the URB). 437 * 438 * Note: the hardware will apply this offset *in addition to* the offset in 439 * vec4_instruction::offset. 440 */ 441 ELK_GS_OPCODE_SET_WRITE_OFFSET, 442 443 /** 444 * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a 445 * URB_WRITE message header. 446 * 447 * - dst is the MRF containing the message header. 448 * 449 * - src0.x is the vertex count. The upper 16 bits will be ignored. 450 */ 451 ELK_GS_OPCODE_SET_VERTEX_COUNT, 452 453 /** 454 * Set DWORD 2 of dst to the value in src. 455 */ 456 ELK_GS_OPCODE_SET_DWORD_2, 457 458 /** 459 * Prepare the dst register for storage in the "Channel Mask" fields of a 460 * URB_WRITE message header. 461 * 462 * DWORD 4 of dst is shifted left by 4 bits, so that later, 463 * ELK_GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the 464 * final channel mask. 465 * 466 * Note: since ELK_GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to 467 * form the final channel mask, DWORDs 0 and 4 of the dst register must not 468 * have any extraneous bits set prior to execution of this opcode (that is, 469 * they should be in the range 0x0 to 0xf). 470 */ 471 ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS, 472 473 /** 474 * Set the "Channel Mask" fields of a URB_WRITE message header. 475 * 476 * - dst is the MRF containing the message header. 477 * 478 * - src.x is the channel mask, as prepared by 479 * ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to 480 * form the final channel mask. 481 */ 482 ELK_GS_OPCODE_SET_CHANNEL_MASKS, 483 484 /** 485 * Get the "Instance ID" fields from the payload. 486 * 487 * - dst is the GRF for gl_InvocationID. 488 */ 489 ELK_GS_OPCODE_GET_INSTANCE_ID, 490 491 /** 492 * Send a FF_SYNC message to allocate initial URB handles (gfx6). 493 * 494 * - dst will be used as the writeback register for the FF_SYNC operation. 495 * 496 * - src0 is the number of primitives written. 497 * 498 * - src1 is the value to hold in M0.0: number of SO vertices to write 499 * and number of SO primitives needed. Its value will be overwritten 500 * with the SVBI values if transform feedback is enabled. 501 * 502 * Note: This opcode uses an implicit MRF register for the ff_sync message 503 * header, so the caller is expected to set inst->base_mrf and initialize 504 * that MRF register to r0. This opcode will also write to this MRF register 505 * to include the allocated URB handle so it can then be reused directly as 506 * the header in the URB write operation we are allocating the handle for. 507 */ 508 ELK_GS_OPCODE_FF_SYNC, 509 510 /** 511 * Move r0.1 (which holds PrimitiveID information in gfx6) to a separate 512 * register. 513 * 514 * - dst is the GRF where PrimitiveID information will be moved. 515 */ 516 ELK_GS_OPCODE_SET_PRIMITIVE_ID, 517 518 /** 519 * Write transform feedback data to the SVB by sending a SVB WRITE message. 520 * Used in gfx6. 521 * 522 * - dst is the MRF register containing the message header. 523 * 524 * - src0 is the register where the vertex data is going to be copied from. 525 * 526 * - src1 is the destination register when write commit occurs. 527 */ 528 ELK_GS_OPCODE_SVB_WRITE, 529 530 /** 531 * Set destination index in the SVB write message payload (M0.5). Used 532 * in gfx6 for transform feedback. 533 * 534 * - dst is the header to save the destination indices for SVB WRITE. 535 * - src is the register that holds the destination indices value. 536 */ 537 ELK_GS_OPCODE_SVB_SET_DST_INDEX, 538 539 /** 540 * Prepare Mx.0 subregister for being used in the FF_SYNC message header. 541 * Used in gfx6 for transform feedback. 542 * 543 * - dst will hold the register with the final Mx.0 value. 544 * 545 * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite) 546 * 547 * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded) 548 * 549 * - src2 is the value to hold in M0: number of SO vertices to write 550 * and number of SO primitives needed. 551 */ 552 ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES, 553 554 /** 555 * Terminate the compute shader. 556 */ 557 ELK_CS_OPCODE_CS_TERMINATE, 558 559 /** 560 * GLSL barrier() 561 */ 562 ELK_SHADER_OPCODE_BARRIER, 563 564 /** 565 * Calculate the high 32-bits of a 32x32 multiply. 566 */ 567 ELK_SHADER_OPCODE_MULH, 568 569 /** Signed subtraction with saturation. */ 570 ELK_SHADER_OPCODE_ISUB_SAT, 571 572 /** Unsigned subtraction with saturation. */ 573 ELK_SHADER_OPCODE_USUB_SAT, 574 575 /** 576 * A MOV that uses VxH indirect addressing. 577 * 578 * Source 0: A register to start from (HW_REG). 579 * Source 1: An indirect offset (in bytes, UD GRF). 580 * Source 2: The length of the region that could be accessed (in bytes, 581 * UD immediate). 582 */ 583 ELK_SHADER_OPCODE_MOV_INDIRECT, 584 585 /** Fills out a relocatable immediate */ 586 ELK_SHADER_OPCODE_MOV_RELOC_IMM, 587 588 ELK_VEC4_OPCODE_URB_READ, 589 ELK_TCS_OPCODE_GET_INSTANCE_ID, 590 ELK_VEC4_TCS_OPCODE_URB_WRITE, 591 ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS, 592 ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, 593 ELK_TCS_OPCODE_GET_PRIMITIVE_ID, 594 ELK_TCS_OPCODE_CREATE_BARRIER_HEADER, 595 ELK_TCS_OPCODE_SRC0_010_IS_ZERO, 596 ELK_TCS_OPCODE_RELEASE_INPUT, 597 ELK_TCS_OPCODE_THREAD_END, 598 599 ELK_TES_OPCODE_GET_PRIMITIVE_ID, 600 ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER, 601 ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET, 602 603 ELK_SHADER_OPCODE_READ_SR_REG, 604 }; 605 606 607 #ifdef __cplusplus 608 } 609 #endif 610 611 #endif 612 613