xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_eu_opcodes.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef ELK_EU_OPCODES_H
7 #define ELK_EU_OPCODES_H
8 
9 #ifdef __cplusplus
10 extern "C" {
11 #endif
12 
13 enum elk_opcode {
14    /* These are the actual hardware instructions. */
15    ELK_OPCODE_ILLEGAL,
16    ELK_OPCODE_MOV,
17    ELK_OPCODE_SEL,
18    ELK_OPCODE_MOVI, /**< G45+ */
19    ELK_OPCODE_NOT,
20    ELK_OPCODE_AND,
21    ELK_OPCODE_OR,
22    ELK_OPCODE_XOR,
23    ELK_OPCODE_SHR,
24    ELK_OPCODE_SHL,
25    ELK_OPCODE_DIM, /**< Gfx7.5 only */
26    ELK_OPCODE_SMOV, /**< Gfx8+ */
27    ELK_OPCODE_ASR,
28    ELK_OPCODE_CMP,
29    ELK_OPCODE_CMPN,
30    ELK_OPCODE_CSEL, /**< Gfx8+ */
31    ELK_OPCODE_F32TO16, /**< Gfx7 only */
32    ELK_OPCODE_F16TO32, /**< Gfx7 only */
33    ELK_OPCODE_BFREV, /**< Gfx7+ */
34    ELK_OPCODE_BFE, /**< Gfx7+ */
35    ELK_OPCODE_BFI1, /**< Gfx7+ */
36    ELK_OPCODE_BFI2, /**< Gfx7+ */
37    ELK_OPCODE_JMPI,
38    ELK_OPCODE_BRD, /**< Gfx7+ */
39    ELK_OPCODE_IF,
40    ELK_OPCODE_IFF, /**< Pre-Gfx6 */
41    ELK_OPCODE_BRC, /**< Gfx7+ */
42    ELK_OPCODE_ELSE,
43    ELK_OPCODE_ENDIF,
44    ELK_OPCODE_DO, /**< Pre-Gfx6 */
45    ELK_OPCODE_CASE, /**< Gfx6 only */
46    ELK_OPCODE_WHILE,
47    ELK_OPCODE_BREAK,
48    ELK_OPCODE_CONTINUE,
49    ELK_OPCODE_HALT,
50    ELK_OPCODE_CALLA, /**< Gfx7.5+ */
51    ELK_OPCODE_MSAVE, /**< Pre-Gfx6 */
52    ELK_OPCODE_CALL, /**< Gfx6+ */
53    ELK_OPCODE_MREST, /**< Pre-Gfx6 */
54    ELK_OPCODE_RET, /**< Gfx6+ */
55    ELK_OPCODE_PUSH, /**< Pre-Gfx6 */
56    ELK_OPCODE_FORK, /**< Gfx6 only */
57    ELK_OPCODE_GOTO, /**< Gfx8+ */
58    ELK_OPCODE_POP, /**< Pre-Gfx6 */
59    ELK_OPCODE_WAIT,
60    ELK_OPCODE_SEND,
61    ELK_OPCODE_SENDC,
62    ELK_OPCODE_MATH, /**< Gfx6+ */
63    ELK_OPCODE_ADD,
64    ELK_OPCODE_MUL,
65    ELK_OPCODE_AVG,
66    ELK_OPCODE_FRC,
67    ELK_OPCODE_RNDU,
68    ELK_OPCODE_RNDD,
69    ELK_OPCODE_RNDE,
70    ELK_OPCODE_RNDZ,
71    ELK_OPCODE_MAC,
72    ELK_OPCODE_MACH,
73    ELK_OPCODE_LZD,
74    ELK_OPCODE_FBH, /**< Gfx7+ */
75    ELK_OPCODE_FBL, /**< Gfx7+ */
76    ELK_OPCODE_CBIT, /**< Gfx7+ */
77    ELK_OPCODE_ADDC, /**< Gfx7+ */
78    ELK_OPCODE_SUBB, /**< Gfx7+ */
79    ELK_OPCODE_SAD2,
80    ELK_OPCODE_SADA2,
81    ELK_OPCODE_DP4,
82    ELK_OPCODE_DPH,
83    ELK_OPCODE_DP3,
84    ELK_OPCODE_DP2,
85    ELK_OPCODE_LINE,
86    ELK_OPCODE_PLN, /**< G45+ */
87    ELK_OPCODE_MAD, /**< Gfx6+ */
88    ELK_OPCODE_LRP, /**< Gfx6+ */
89    ELK_OPCODE_MADM, /**< Gfx8+ */
90    ELK_OPCODE_NENOP, /**< G45 only */
91    ELK_OPCODE_NOP,
92 
93    NUM_ELK_OPCODES,
94 
95    /* These are compiler backend opcodes that get translated into other
96     * instructions.
97     */
98    ELK_FS_OPCODE_FB_WRITE = NUM_ELK_OPCODES,
99 
100    /**
101     * Same as ELK_FS_OPCODE_FB_WRITE but expects its arguments separately as
102     * individual sources instead of as a single payload blob. The
103     * position/ordering of the arguments are defined by the enum
104     * fb_write_logical_srcs.
105     */
106    ELK_FS_OPCODE_FB_WRITE_LOGICAL,
107 
108    ELK_FS_OPCODE_REP_FB_WRITE,
109 
110    ELK_SHADER_OPCODE_RCP,
111    ELK_SHADER_OPCODE_RSQ,
112    ELK_SHADER_OPCODE_SQRT,
113    ELK_SHADER_OPCODE_EXP2,
114    ELK_SHADER_OPCODE_LOG2,
115    ELK_SHADER_OPCODE_POW,
116    ELK_SHADER_OPCODE_INT_QUOTIENT,
117    ELK_SHADER_OPCODE_INT_REMAINDER,
118    ELK_SHADER_OPCODE_SIN,
119    ELK_SHADER_OPCODE_COS,
120 
121    /**
122     * A generic "send" opcode.  The first source is the descriptor and
123     * the second source is the message payload.
124     */
125    ELK_SHADER_OPCODE_SEND,
126 
127    /**
128     * An "undefined" write which does nothing but indicates to liveness that
129     * we don't care about any values in the register which predate this
130     * instruction.  Used to prevent partial writes from causing issues with
131     * live ranges.
132     */
133    ELK_SHADER_OPCODE_UNDEF,
134 
135    /**
136     * Texture sampling opcodes.
137     *
138     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
139     * opcode but instead of taking a single payload blob they expect their
140     * arguments separately as individual sources. The position/ordering of the
141     * arguments are defined by the enum tex_logical_srcs.
142     */
143    ELK_SHADER_OPCODE_TEX,
144    ELK_SHADER_OPCODE_TEX_LOGICAL,
145    ELK_SHADER_OPCODE_TXD,
146    ELK_SHADER_OPCODE_TXD_LOGICAL,
147    ELK_SHADER_OPCODE_TXF,
148    ELK_SHADER_OPCODE_TXF_LOGICAL,
149    ELK_SHADER_OPCODE_TXF_LZ,
150    ELK_SHADER_OPCODE_TXL,
151    ELK_SHADER_OPCODE_TXL_LOGICAL,
152    ELK_SHADER_OPCODE_TXL_LZ,
153    ELK_SHADER_OPCODE_TXS,
154    ELK_SHADER_OPCODE_TXS_LOGICAL,
155    ELK_FS_OPCODE_TXB,
156    ELK_FS_OPCODE_TXB_LOGICAL,
157    ELK_SHADER_OPCODE_TXF_CMS,
158    ELK_SHADER_OPCODE_TXF_CMS_LOGICAL,
159    ELK_SHADER_OPCODE_TXF_CMS_W,
160    ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL,
161    ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL,
162    ELK_SHADER_OPCODE_TXF_UMS,
163    ELK_SHADER_OPCODE_TXF_UMS_LOGICAL,
164    ELK_SHADER_OPCODE_TXF_MCS,
165    ELK_SHADER_OPCODE_TXF_MCS_LOGICAL,
166    ELK_SHADER_OPCODE_LOD,
167    ELK_SHADER_OPCODE_LOD_LOGICAL,
168    ELK_SHADER_OPCODE_TG4,
169    ELK_SHADER_OPCODE_TG4_LOGICAL,
170    ELK_SHADER_OPCODE_TG4_OFFSET,
171    ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL,
172    ELK_SHADER_OPCODE_SAMPLEINFO,
173    ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL,
174 
175    ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
176 
177    /**
178     * Combines multiple sources of size 1 into a larger virtual GRF.
179     * For example, parameters for a send-from-GRF message.  Or, updating
180     * channels of a size 4 VGRF used to store vec4s such as texturing results.
181     *
182     * This will be lowered into MOVs from each source to consecutive offsets
183     * of the destination VGRF.
184     *
185     * src[0] may be BAD_FILE.  If so, the lowering pass skips emitting the MOV,
186     * but still reserves the first channel of the destination VGRF.  This can be
187     * used to reserve space for, say, a message header set up by the generators.
188     */
189    ELK_SHADER_OPCODE_LOAD_PAYLOAD,
190 
191    /**
192     * Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this
193     * acts intra-channel, obtaining the final value for each channel by
194     * combining the sources values for the same channel, the first source
195     * occupying the lowest bits and the last source occupying the highest
196     * bits.
197     */
198    ELK_FS_OPCODE_PACK,
199 
200    /**
201     * Typed and untyped surface access opcodes.
202     *
203     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
204     * opcode but instead of taking a single payload blob they expect their
205     * arguments separately as individual sources:
206     *
207     * Source 0: [required] Surface coordinates.
208     * Source 1: [optional] Operation source.
209     * Source 2: [required] Surface index.
210     * Source 3: [required] Number of coordinate components (as UD immediate).
211     * Source 4: [required] Opcode-specific control immediate, same as source 2
212     *                      of the matching non-LOGICAL opcode.
213     */
214    ELK_VEC4_OPCODE_UNTYPED_ATOMIC,
215    ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
216    ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ,
217    ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
218    ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
219    ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
220 
221    ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
222    ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL,
223 
224    /**
225     * Untyped A64 surface access opcodes.
226     *
227     * Source 0: 64-bit address
228     * Source 1: Operational source
229     * Source 2: [required] Opcode-specific control immediate, same as source 2
230     *                      of the matching non-LOGICAL opcode.
231     */
232    ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL,
233    ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
234    ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
235    ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
236    ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
237    ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
238    ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
239    ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
240 
241    ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
242    ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
243    ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
244 
245    ELK_SHADER_OPCODE_RND_MODE,
246    ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE,
247 
248    /**
249     * Byte scattered write/read opcodes.
250     *
251     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
252     * opcode, but instead of taking a single payload blog they expect their
253     * arguments separately as individual sources, like untyped write/read.
254     */
255    ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
256    ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
257    ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL,
258    ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL,
259 
260    /**
261     * Memory fence messages.
262     *
263     * Source 0: Must be register g0, used as header.
264     * Source 1: Immediate bool to indicate whether control is returned to the
265     *           thread only after the fence has been honored.
266     * Source 2: Immediate byte indicating which memory to fence.  Zero means
267     *           global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only).
268     *
269     * Vec4 backend only uses Source 0.
270     */
271    ELK_SHADER_OPCODE_MEMORY_FENCE,
272 
273    /**
274     * Scheduling-only fence.
275     *
276     * Sources can be used to force a stall until the registers in those are
277     * available.  This might generate MOVs or SYNC_NOPs (Gfx12+).
278     */
279    ELK_FS_OPCODE_SCHEDULING_FENCE,
280 
281    ELK_SHADER_OPCODE_GFX4_SCRATCH_READ,
282    ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE,
283    ELK_SHADER_OPCODE_GFX7_SCRATCH_READ,
284 
285    ELK_SHADER_OPCODE_SCRATCH_HEADER,
286 
287    /**
288     * Gfx8+ SIMD8 URB messages.
289     */
290    ELK_SHADER_OPCODE_URB_READ_LOGICAL,
291    ELK_SHADER_OPCODE_URB_WRITE_LOGICAL,
292 
293    /**
294     * Return the index of the first enabled live channel and assign it to
295     * to the first component of the destination.  Frequently used as input
296     * for the BROADCAST pseudo-opcode.
297     */
298    ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL,
299 
300    /**
301     * Return the index of the last enabled live channel and assign it to
302     * the first component of the destination.
303     */
304    ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL,
305 
306    /**
307     * Return the current execution mask in the specified flag subregister.
308     * Can be CSE'ed more easily than a plain MOV from the ce0 ARF register.
309     */
310    ELK_FS_OPCODE_LOAD_LIVE_CHANNELS,
311 
312    /**
313     * Pick the channel from its first source register given by the index
314     * specified as second source.  Useful for variable indexing of surfaces.
315     *
316     * Note that because the result of this instruction is by definition
317     * uniform and it can always be splatted to multiple channels using a
318     * scalar regioning mode, only the first channel of the destination region
319     * is guaranteed to be updated, which implies that BROADCAST instructions
320     * should usually be marked force_writemask_all.
321     */
322    ELK_SHADER_OPCODE_BROADCAST,
323 
324    /* Pick the channel from its first source register given by the index
325     * specified as second source.
326     *
327     * This is similar to the BROADCAST instruction except that it takes a
328     * dynamic index and potentially puts a different value in each output
329     * channel.
330     */
331    ELK_SHADER_OPCODE_SHUFFLE,
332 
333    /* Select between src0 and src1 based on channel enables.
334     *
335     * This instruction copies src0 into the enabled channels of the
336     * destination and copies src1 into the disabled channels.
337     */
338    ELK_SHADER_OPCODE_SEL_EXEC,
339 
340    /* This turns into an align16 mov from src0 to dst with a swizzle
341     * provided as an immediate in src1.
342     */
343    ELK_SHADER_OPCODE_QUAD_SWIZZLE,
344 
345    /* Take every Nth element in src0 and broadcast it to the group of N
346     * channels in which it lives in the destination.  The offset within the
347     * cluster is given by src1 and the cluster size is given by src2.
348     */
349    ELK_SHADER_OPCODE_CLUSTER_BROADCAST,
350 
351    ELK_SHADER_OPCODE_GET_BUFFER_SIZE,
352 
353    ELK_SHADER_OPCODE_INTERLOCK,
354 
355    /** Target for a HALT
356     *
357     * All HALT instructions in a shader must target the same jump point and
358     * that point is denoted by a HALT_TARGET instruction.
359     */
360    ELK_SHADER_OPCODE_HALT_TARGET,
361 
362    ELK_VEC4_OPCODE_MOV_BYTES,
363    ELK_VEC4_OPCODE_PACK_BYTES,
364    ELK_VEC4_OPCODE_UNPACK_UNIFORM,
365    ELK_VEC4_OPCODE_DOUBLE_TO_F32,
366    ELK_VEC4_OPCODE_DOUBLE_TO_D32,
367    ELK_VEC4_OPCODE_DOUBLE_TO_U32,
368    ELK_VEC4_OPCODE_TO_DOUBLE,
369    ELK_VEC4_OPCODE_PICK_LOW_32BIT,
370    ELK_VEC4_OPCODE_PICK_HIGH_32BIT,
371    ELK_VEC4_OPCODE_SET_LOW_32BIT,
372    ELK_VEC4_OPCODE_SET_HIGH_32BIT,
373    ELK_VEC4_OPCODE_MOV_FOR_SCRATCH,
374    ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS,
375 
376    ELK_FS_OPCODE_DDX_COARSE,
377    ELK_FS_OPCODE_DDX_FINE,
378    /**
379     * Compute dFdy(), dFdyCoarse(), or dFdyFine().
380     */
381    ELK_FS_OPCODE_DDY_COARSE,
382    ELK_FS_OPCODE_DDY_FINE,
383    ELK_FS_OPCODE_LINTERP,
384    ELK_FS_OPCODE_PIXEL_X,
385    ELK_FS_OPCODE_PIXEL_Y,
386    ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
387    ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4,
388    ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
389    ELK_FS_OPCODE_SET_SAMPLE_ID,
390    ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT,
391    ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE,
392    ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
393    ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
394 
395    ELK_VEC4_VS_OPCODE_URB_WRITE,
396    ELK_VS_OPCODE_PULL_CONSTANT_LOAD,
397    ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7,
398 
399    ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
400 
401    /**
402     * Write geometry shader output data to the URB.
403     *
404     * Unlike ELK_VEC4_VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from
405     * R0 to the first MRF.  This allows the geometry shader to override the
406     * "Slot {0,1} Offset" fields in the message header.
407     */
408    ELK_VEC4_GS_OPCODE_URB_WRITE,
409 
410    /**
411     * Write geometry shader output data to the URB and request a new URB
412     * handle (gfx6).
413     *
414     * This opcode doesn't do an implied move from R0 to the first MRF.
415     */
416    ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE,
417 
418    /**
419     * Terminate the geometry shader thread by doing an empty URB write.
420     *
421     * This opcode doesn't do an implied move from R0 to the first MRF.  This
422     * allows the geometry shader to override the "GS Number of Output Vertices
423     * for Slot {0,1}" fields in the message header.
424     */
425    ELK_GS_OPCODE_THREAD_END,
426 
427    /**
428     * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header.
429     *
430     * - dst is the MRF containing the message header.
431     *
432     * - src0.x indicates which portion of the URB should be written to (e.g. a
433     *   vertex number)
434     *
435     * - src1 is an immediate multiplier which will be applied to src0
436     *   (e.g. the size of a single vertex in the URB).
437     *
438     * Note: the hardware will apply this offset *in addition to* the offset in
439     * vec4_instruction::offset.
440     */
441    ELK_GS_OPCODE_SET_WRITE_OFFSET,
442 
443    /**
444     * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a
445     * URB_WRITE message header.
446     *
447     * - dst is the MRF containing the message header.
448     *
449     * - src0.x is the vertex count.  The upper 16 bits will be ignored.
450     */
451    ELK_GS_OPCODE_SET_VERTEX_COUNT,
452 
453    /**
454     * Set DWORD 2 of dst to the value in src.
455     */
456    ELK_GS_OPCODE_SET_DWORD_2,
457 
458    /**
459     * Prepare the dst register for storage in the "Channel Mask" fields of a
460     * URB_WRITE message header.
461     *
462     * DWORD 4 of dst is shifted left by 4 bits, so that later,
463     * ELK_GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
464     * final channel mask.
465     *
466     * Note: since ELK_GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to
467     * form the final channel mask, DWORDs 0 and 4 of the dst register must not
468     * have any extraneous bits set prior to execution of this opcode (that is,
469     * they should be in the range 0x0 to 0xf).
470     */
471    ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS,
472 
473    /**
474     * Set the "Channel Mask" fields of a URB_WRITE message header.
475     *
476     * - dst is the MRF containing the message header.
477     *
478     * - src.x is the channel mask, as prepared by
479     *   ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS.  DWORDs 0 and 4 are OR'ed together to
480     *   form the final channel mask.
481     */
482    ELK_GS_OPCODE_SET_CHANNEL_MASKS,
483 
484    /**
485     * Get the "Instance ID" fields from the payload.
486     *
487     * - dst is the GRF for gl_InvocationID.
488     */
489    ELK_GS_OPCODE_GET_INSTANCE_ID,
490 
491    /**
492     * Send a FF_SYNC message to allocate initial URB handles (gfx6).
493     *
494     * - dst will be used as the writeback register for the FF_SYNC operation.
495     *
496     * - src0 is the number of primitives written.
497     *
498     * - src1 is the value to hold in M0.0: number of SO vertices to write
499     *   and number of SO primitives needed. Its value will be overwritten
500     *   with the SVBI values if transform feedback is enabled.
501     *
502     * Note: This opcode uses an implicit MRF register for the ff_sync message
503     * header, so the caller is expected to set inst->base_mrf and initialize
504     * that MRF register to r0. This opcode will also write to this MRF register
505     * to include the allocated URB handle so it can then be reused directly as
506     * the header in the URB write operation we are allocating the handle for.
507     */
508    ELK_GS_OPCODE_FF_SYNC,
509 
510    /**
511     * Move r0.1 (which holds PrimitiveID information in gfx6) to a separate
512     * register.
513     *
514     * - dst is the GRF where PrimitiveID information will be moved.
515     */
516    ELK_GS_OPCODE_SET_PRIMITIVE_ID,
517 
518    /**
519     * Write transform feedback data to the SVB by sending a SVB WRITE message.
520     * Used in gfx6.
521     *
522     * - dst is the MRF register containing the message header.
523     *
524     * - src0 is the register where the vertex data is going to be copied from.
525     *
526     * - src1 is the destination register when write commit occurs.
527     */
528    ELK_GS_OPCODE_SVB_WRITE,
529 
530    /**
531     * Set destination index in the SVB write message payload (M0.5). Used
532     * in gfx6 for transform feedback.
533     *
534     * - dst is the header to save the destination indices for SVB WRITE.
535     * - src is the register that holds the destination indices value.
536     */
537    ELK_GS_OPCODE_SVB_SET_DST_INDEX,
538 
539    /**
540     * Prepare Mx.0 subregister for being used in the FF_SYNC message header.
541     * Used in gfx6 for transform feedback.
542     *
543     * - dst will hold the register with the final Mx.0 value.
544     *
545     * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
546     *
547     * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
548     *
549     * - src2 is the value to hold in M0: number of SO vertices to write
550     *   and number of SO primitives needed.
551     */
552    ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
553 
554    /**
555     * Terminate the compute shader.
556     */
557    ELK_CS_OPCODE_CS_TERMINATE,
558 
559    /**
560     * GLSL barrier()
561     */
562    ELK_SHADER_OPCODE_BARRIER,
563 
564    /**
565     * Calculate the high 32-bits of a 32x32 multiply.
566     */
567    ELK_SHADER_OPCODE_MULH,
568 
569    /** Signed subtraction with saturation. */
570    ELK_SHADER_OPCODE_ISUB_SAT,
571 
572    /** Unsigned subtraction with saturation. */
573    ELK_SHADER_OPCODE_USUB_SAT,
574 
575    /**
576     * A MOV that uses VxH indirect addressing.
577     *
578     * Source 0: A register to start from (HW_REG).
579     * Source 1: An indirect offset (in bytes, UD GRF).
580     * Source 2: The length of the region that could be accessed (in bytes,
581     *           UD immediate).
582     */
583    ELK_SHADER_OPCODE_MOV_INDIRECT,
584 
585    /** Fills out a relocatable immediate */
586    ELK_SHADER_OPCODE_MOV_RELOC_IMM,
587 
588    ELK_VEC4_OPCODE_URB_READ,
589    ELK_TCS_OPCODE_GET_INSTANCE_ID,
590    ELK_VEC4_TCS_OPCODE_URB_WRITE,
591    ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS,
592    ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
593    ELK_TCS_OPCODE_GET_PRIMITIVE_ID,
594    ELK_TCS_OPCODE_CREATE_BARRIER_HEADER,
595    ELK_TCS_OPCODE_SRC0_010_IS_ZERO,
596    ELK_TCS_OPCODE_RELEASE_INPUT,
597    ELK_TCS_OPCODE_THREAD_END,
598 
599    ELK_TES_OPCODE_GET_PRIMITIVE_ID,
600    ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER,
601    ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
602 
603    ELK_SHADER_OPCODE_READ_SR_REG,
604 };
605 
606 
607 #ifdef __cplusplus
608 }
609 #endif
610 
611 #endif
612 
613