xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_shader.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "elk_cfg.h"
25 #include "elk_eu.h"
26 #include "elk_fs.h"
27 #include "elk_nir.h"
28 #include "elk_private.h"
29 #include "elk_vec4_tes.h"
30 #include "dev/intel_debug.h"
31 #include "util/macros.h"
32 #include "util/u_debug.h"
33 
34 enum elk_reg_type
elk_type_for_base_type(const struct glsl_type * type)35 elk_type_for_base_type(const struct glsl_type *type)
36 {
37    switch (type->base_type) {
38    case GLSL_TYPE_FLOAT16:
39       return ELK_REGISTER_TYPE_HF;
40    case GLSL_TYPE_FLOAT:
41       return ELK_REGISTER_TYPE_F;
42    case GLSL_TYPE_INT:
43    case GLSL_TYPE_BOOL:
44    case GLSL_TYPE_SUBROUTINE:
45       return ELK_REGISTER_TYPE_D;
46    case GLSL_TYPE_INT16:
47       return ELK_REGISTER_TYPE_W;
48    case GLSL_TYPE_INT8:
49       return ELK_REGISTER_TYPE_B;
50    case GLSL_TYPE_UINT:
51       return ELK_REGISTER_TYPE_UD;
52    case GLSL_TYPE_UINT16:
53       return ELK_REGISTER_TYPE_UW;
54    case GLSL_TYPE_UINT8:
55       return ELK_REGISTER_TYPE_UB;
56    case GLSL_TYPE_ARRAY:
57       return elk_type_for_base_type(type->fields.array);
58    case GLSL_TYPE_STRUCT:
59    case GLSL_TYPE_INTERFACE:
60    case GLSL_TYPE_SAMPLER:
61    case GLSL_TYPE_TEXTURE:
62    case GLSL_TYPE_ATOMIC_UINT:
63       /* These should be overridden with the type of the member when
64        * dereferenced into.  ELK_REGISTER_TYPE_UD seems like a likely
65        * way to trip up if we don't.
66        */
67       return ELK_REGISTER_TYPE_UD;
68    case GLSL_TYPE_IMAGE:
69       return ELK_REGISTER_TYPE_UD;
70    case GLSL_TYPE_DOUBLE:
71       return ELK_REGISTER_TYPE_DF;
72    case GLSL_TYPE_UINT64:
73       return ELK_REGISTER_TYPE_UQ;
74    case GLSL_TYPE_INT64:
75       return ELK_REGISTER_TYPE_Q;
76    case GLSL_TYPE_VOID:
77    case GLSL_TYPE_ERROR:
78    case GLSL_TYPE_COOPERATIVE_MATRIX:
79       unreachable("not reached");
80    }
81 
82    return ELK_REGISTER_TYPE_F;
83 }
84 
85 uint32_t
elk_math_function(enum elk_opcode op)86 elk_math_function(enum elk_opcode op)
87 {
88    switch (op) {
89    case ELK_SHADER_OPCODE_RCP:
90       return ELK_MATH_FUNCTION_INV;
91    case ELK_SHADER_OPCODE_RSQ:
92       return ELK_MATH_FUNCTION_RSQ;
93    case ELK_SHADER_OPCODE_SQRT:
94       return ELK_MATH_FUNCTION_SQRT;
95    case ELK_SHADER_OPCODE_EXP2:
96       return ELK_MATH_FUNCTION_EXP;
97    case ELK_SHADER_OPCODE_LOG2:
98       return ELK_MATH_FUNCTION_LOG;
99    case ELK_SHADER_OPCODE_POW:
100       return ELK_MATH_FUNCTION_POW;
101    case ELK_SHADER_OPCODE_SIN:
102       return ELK_MATH_FUNCTION_SIN;
103    case ELK_SHADER_OPCODE_COS:
104       return ELK_MATH_FUNCTION_COS;
105    case ELK_SHADER_OPCODE_INT_QUOTIENT:
106       return ELK_MATH_FUNCTION_INT_DIV_QUOTIENT;
107    case ELK_SHADER_OPCODE_INT_REMAINDER:
108       return ELK_MATH_FUNCTION_INT_DIV_REMAINDER;
109    default:
110       unreachable("not reached: unknown math function");
111    }
112 }
113 
114 bool
elk_texture_offset(const nir_tex_instr * tex,unsigned src,uint32_t * offset_bits_out)115 elk_texture_offset(const nir_tex_instr *tex, unsigned src,
116                    uint32_t *offset_bits_out)
117 {
118    if (!nir_src_is_const(tex->src[src].src))
119       return false;
120 
121    const unsigned num_components = nir_tex_instr_src_size(tex, src);
122 
123    /* Combine all three offsets into a single unsigned dword:
124     *
125     *    bits 11:8 - U Offset (X component)
126     *    bits  7:4 - V Offset (Y component)
127     *    bits  3:0 - R Offset (Z component)
128     */
129    uint32_t offset_bits = 0;
130    for (unsigned i = 0; i < num_components; i++) {
131       int offset = nir_src_comp_as_int(tex->src[src].src, i);
132 
133       /* offset out of bounds; caller will handle it. */
134       if (offset > 7 || offset < -8)
135          return false;
136 
137       const unsigned shift = 4 * (2 - i);
138       offset_bits |= (offset & 0xF) << shift;
139    }
140 
141    *offset_bits_out = offset_bits;
142 
143    return true;
144 }
145 
146 const char *
elk_instruction_name(const struct elk_isa_info * isa,enum elk_opcode op)147 elk_instruction_name(const struct elk_isa_info *isa, enum elk_opcode op)
148 {
149    const struct intel_device_info *devinfo = isa->devinfo;
150 
151    switch (op) {
152    case 0 ... NUM_ELK_OPCODES - 1:
153       /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
154        * start of a loop in the IR.
155        */
156       if (devinfo->ver >= 6 && op == ELK_OPCODE_DO)
157          return "do";
158 
159       /* The following conversion opcodes doesn't exist on Gfx8+, but we use
160        * then to mark that we want to do the conversion.
161        */
162       if (devinfo->ver > 7 && op == ELK_OPCODE_F32TO16)
163          return "f32to16";
164 
165       if (devinfo->ver > 7 && op == ELK_OPCODE_F16TO32)
166          return "f16to32";
167 
168       assert(elk_opcode_desc(isa, op)->name);
169       return elk_opcode_desc(isa, op)->name;
170    case ELK_FS_OPCODE_FB_WRITE:
171       return "fb_write";
172    case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
173       return "fb_write_logical";
174    case ELK_FS_OPCODE_REP_FB_WRITE:
175       return "rep_fb_write";
176 
177    case ELK_SHADER_OPCODE_RCP:
178       return "rcp";
179    case ELK_SHADER_OPCODE_RSQ:
180       return "rsq";
181    case ELK_SHADER_OPCODE_SQRT:
182       return "sqrt";
183    case ELK_SHADER_OPCODE_EXP2:
184       return "exp2";
185    case ELK_SHADER_OPCODE_LOG2:
186       return "log2";
187    case ELK_SHADER_OPCODE_POW:
188       return "pow";
189    case ELK_SHADER_OPCODE_INT_QUOTIENT:
190       return "int_quot";
191    case ELK_SHADER_OPCODE_INT_REMAINDER:
192       return "int_rem";
193    case ELK_SHADER_OPCODE_SIN:
194       return "sin";
195    case ELK_SHADER_OPCODE_COS:
196       return "cos";
197 
198    case ELK_SHADER_OPCODE_SEND:
199       return "send";
200 
201    case ELK_SHADER_OPCODE_UNDEF:
202       return "undef";
203 
204    case ELK_SHADER_OPCODE_TEX:
205       return "tex";
206    case ELK_SHADER_OPCODE_TEX_LOGICAL:
207       return "tex_logical";
208    case ELK_SHADER_OPCODE_TXD:
209       return "txd";
210    case ELK_SHADER_OPCODE_TXD_LOGICAL:
211       return "txd_logical";
212    case ELK_SHADER_OPCODE_TXF:
213       return "txf";
214    case ELK_SHADER_OPCODE_TXF_LOGICAL:
215       return "txf_logical";
216    case ELK_SHADER_OPCODE_TXF_LZ:
217       return "txf_lz";
218    case ELK_SHADER_OPCODE_TXL:
219       return "txl";
220    case ELK_SHADER_OPCODE_TXL_LOGICAL:
221       return "txl_logical";
222    case ELK_SHADER_OPCODE_TXL_LZ:
223       return "txl_lz";
224    case ELK_SHADER_OPCODE_TXS:
225       return "txs";
226    case ELK_SHADER_OPCODE_TXS_LOGICAL:
227       return "txs_logical";
228    case ELK_FS_OPCODE_TXB:
229       return "txb";
230    case ELK_FS_OPCODE_TXB_LOGICAL:
231       return "txb_logical";
232    case ELK_SHADER_OPCODE_TXF_CMS:
233       return "txf_cms";
234    case ELK_SHADER_OPCODE_TXF_CMS_LOGICAL:
235       return "txf_cms_logical";
236    case ELK_SHADER_OPCODE_TXF_CMS_W:
237       return "txf_cms_w";
238    case ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL:
239       return "txf_cms_w_logical";
240    case ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
241       return "txf_cms_w_gfx12_logical";
242    case ELK_SHADER_OPCODE_TXF_UMS:
243       return "txf_ums";
244    case ELK_SHADER_OPCODE_TXF_UMS_LOGICAL:
245       return "txf_ums_logical";
246    case ELK_SHADER_OPCODE_TXF_MCS:
247       return "txf_mcs";
248    case ELK_SHADER_OPCODE_TXF_MCS_LOGICAL:
249       return "txf_mcs_logical";
250    case ELK_SHADER_OPCODE_LOD:
251       return "lod";
252    case ELK_SHADER_OPCODE_LOD_LOGICAL:
253       return "lod_logical";
254    case ELK_SHADER_OPCODE_TG4:
255       return "tg4";
256    case ELK_SHADER_OPCODE_TG4_LOGICAL:
257       return "tg4_logical";
258    case ELK_SHADER_OPCODE_TG4_OFFSET:
259       return "tg4_offset";
260    case ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL:
261       return "tg4_offset_logical";
262    case ELK_SHADER_OPCODE_SAMPLEINFO:
263       return "sampleinfo";
264    case ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL:
265       return "sampleinfo_logical";
266 
267    case ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
268       return "image_size_logical";
269 
270    case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
271       return "untyped_atomic";
272    case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
273       return "untyped_atomic_logical";
274    case ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ:
275       return "untyped_surface_read";
276    case ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
277       return "untyped_surface_read_logical";
278    case ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
279       return "untyped_surface_write";
280    case ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
281       return "untyped_surface_write_logical";
282    case ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
283       return "unaligned_oword_block_read_logical";
284    case ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
285       return "oword_block_write_logical";
286    case ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
287       return "a64_untyped_read_logical";
288    case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
289       return "a64_oword_block_read_logical";
290    case ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
291       return "a64_unaligned_oword_block_read_logical";
292    case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
293       return "a64_oword_block_write_logical";
294    case ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
295       return "a64_untyped_write_logical";
296    case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
297       return "a64_byte_scattered_read_logical";
298    case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
299       return "a64_byte_scattered_write_logical";
300    case ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
301       return "a64_untyped_atomic_logical";
302    case ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
303       return "typed_atomic_logical";
304    case ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
305       return "typed_surface_read_logical";
306    case ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
307       return "typed_surface_write_logical";
308    case ELK_SHADER_OPCODE_MEMORY_FENCE:
309       return "memory_fence";
310    case ELK_FS_OPCODE_SCHEDULING_FENCE:
311       return "scheduling_fence";
312    case ELK_SHADER_OPCODE_INTERLOCK:
313       /* For an interlock we actually issue a memory fence via sendc. */
314       return "interlock";
315 
316    case ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
317       return "byte_scattered_read_logical";
318    case ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
319       return "byte_scattered_write_logical";
320    case ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
321       return "dword_scattered_read_logical";
322    case ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
323       return "dword_scattered_write_logical";
324 
325    case ELK_SHADER_OPCODE_LOAD_PAYLOAD:
326       return "load_payload";
327    case ELK_FS_OPCODE_PACK:
328       return "pack";
329 
330    case ELK_SHADER_OPCODE_GFX4_SCRATCH_READ:
331       return "gfx4_scratch_read";
332    case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE:
333       return "gfx4_scratch_write";
334    case ELK_SHADER_OPCODE_GFX7_SCRATCH_READ:
335       return "gfx7_scratch_read";
336    case ELK_SHADER_OPCODE_SCRATCH_HEADER:
337       return "scratch_header";
338 
339    case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
340       return "urb_write_logical";
341    case ELK_SHADER_OPCODE_URB_READ_LOGICAL:
342       return "urb_read_logical";
343 
344    case ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL:
345       return "find_live_channel";
346    case ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
347       return "find_last_live_channel";
348    case ELK_FS_OPCODE_LOAD_LIVE_CHANNELS:
349       return "load_live_channels";
350 
351    case ELK_SHADER_OPCODE_BROADCAST:
352       return "broadcast";
353    case ELK_SHADER_OPCODE_SHUFFLE:
354       return "shuffle";
355    case ELK_SHADER_OPCODE_SEL_EXEC:
356       return "sel_exec";
357    case ELK_SHADER_OPCODE_QUAD_SWIZZLE:
358       return "quad_swizzle";
359    case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
360       return "cluster_broadcast";
361 
362    case ELK_SHADER_OPCODE_GET_BUFFER_SIZE:
363       return "get_buffer_size";
364 
365    case ELK_VEC4_OPCODE_MOV_BYTES:
366       return "mov_bytes";
367    case ELK_VEC4_OPCODE_PACK_BYTES:
368       return "pack_bytes";
369    case ELK_VEC4_OPCODE_UNPACK_UNIFORM:
370       return "unpack_uniform";
371    case ELK_VEC4_OPCODE_DOUBLE_TO_F32:
372       return "double_to_f32";
373    case ELK_VEC4_OPCODE_DOUBLE_TO_D32:
374       return "double_to_d32";
375    case ELK_VEC4_OPCODE_DOUBLE_TO_U32:
376       return "double_to_u32";
377    case ELK_VEC4_OPCODE_TO_DOUBLE:
378       return "single_to_double";
379    case ELK_VEC4_OPCODE_PICK_LOW_32BIT:
380       return "pick_low_32bit";
381    case ELK_VEC4_OPCODE_PICK_HIGH_32BIT:
382       return "pick_high_32bit";
383    case ELK_VEC4_OPCODE_SET_LOW_32BIT:
384       return "set_low_32bit";
385    case ELK_VEC4_OPCODE_SET_HIGH_32BIT:
386       return "set_high_32bit";
387    case ELK_VEC4_OPCODE_MOV_FOR_SCRATCH:
388       return "mov_for_scratch";
389    case ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
390       return "zero_oob_push_regs";
391 
392    case ELK_FS_OPCODE_DDX_COARSE:
393       return "ddx_coarse";
394    case ELK_FS_OPCODE_DDX_FINE:
395       return "ddx_fine";
396    case ELK_FS_OPCODE_DDY_COARSE:
397       return "ddy_coarse";
398    case ELK_FS_OPCODE_DDY_FINE:
399       return "ddy_fine";
400 
401    case ELK_FS_OPCODE_LINTERP:
402       return "linterp";
403 
404    case ELK_FS_OPCODE_PIXEL_X:
405       return "pixel_x";
406    case ELK_FS_OPCODE_PIXEL_Y:
407       return "pixel_y";
408 
409    case ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
410       return "uniform_pull_const";
411    case ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
412       return "varying_pull_const_gfx4";
413    case ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
414       return "varying_pull_const_logical";
415 
416    case ELK_FS_OPCODE_SET_SAMPLE_ID:
417       return "set_sample_id";
418 
419    case ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT:
420       return "pack_half_2x16_split";
421 
422    case ELK_SHADER_OPCODE_HALT_TARGET:
423       return "halt_target";
424 
425    case ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE:
426       return "interp_sample";
427    case ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
428       return "interp_shared_offset";
429    case ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
430       return "interp_per_slot_offset";
431 
432    case ELK_VEC4_VS_OPCODE_URB_WRITE:
433       return "vs_urb_write";
434    case ELK_VS_OPCODE_PULL_CONSTANT_LOAD:
435       return "pull_constant_load";
436    case ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
437       return "pull_constant_load_gfx7";
438 
439    case ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
440       return "unpack_flags_simd4x2";
441 
442    case ELK_VEC4_GS_OPCODE_URB_WRITE:
443       return "gs_urb_write";
444    case ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE:
445       return "gs_urb_write_allocate";
446    case ELK_GS_OPCODE_THREAD_END:
447       return "gs_thread_end";
448    case ELK_GS_OPCODE_SET_WRITE_OFFSET:
449       return "set_write_offset";
450    case ELK_GS_OPCODE_SET_VERTEX_COUNT:
451       return "set_vertex_count";
452    case ELK_GS_OPCODE_SET_DWORD_2:
453       return "set_dword_2";
454    case ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS:
455       return "prepare_channel_masks";
456    case ELK_GS_OPCODE_SET_CHANNEL_MASKS:
457       return "set_channel_masks";
458    case ELK_GS_OPCODE_GET_INSTANCE_ID:
459       return "get_instance_id";
460    case ELK_GS_OPCODE_FF_SYNC:
461       return "ff_sync";
462    case ELK_GS_OPCODE_SET_PRIMITIVE_ID:
463       return "set_primitive_id";
464    case ELK_GS_OPCODE_SVB_WRITE:
465       return "gs_svb_write";
466    case ELK_GS_OPCODE_SVB_SET_DST_INDEX:
467       return "gs_svb_set_dst_index";
468    case ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
469       return "gs_ff_sync_set_primitives";
470    case ELK_CS_OPCODE_CS_TERMINATE:
471       return "cs_terminate";
472    case ELK_SHADER_OPCODE_BARRIER:
473       return "barrier";
474    case ELK_SHADER_OPCODE_MULH:
475       return "mulh";
476    case ELK_SHADER_OPCODE_ISUB_SAT:
477       return "isub_sat";
478    case ELK_SHADER_OPCODE_USUB_SAT:
479       return "usub_sat";
480    case ELK_SHADER_OPCODE_MOV_INDIRECT:
481       return "mov_indirect";
482    case ELK_SHADER_OPCODE_MOV_RELOC_IMM:
483       return "mov_reloc_imm";
484 
485    case ELK_VEC4_OPCODE_URB_READ:
486       return "urb_read";
487    case ELK_TCS_OPCODE_GET_INSTANCE_ID:
488       return "tcs_get_instance_id";
489    case ELK_VEC4_TCS_OPCODE_URB_WRITE:
490       return "tcs_urb_write";
491    case ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
492       return "tcs_set_input_urb_offsets";
493    case ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
494       return "tcs_set_output_urb_offsets";
495    case ELK_TCS_OPCODE_GET_PRIMITIVE_ID:
496       return "tcs_get_primitive_id";
497    case ELK_TCS_OPCODE_CREATE_BARRIER_HEADER:
498       return "tcs_create_barrier_header";
499    case ELK_TCS_OPCODE_SRC0_010_IS_ZERO:
500       return "tcs_src0<0,1,0>_is_zero";
501    case ELK_TCS_OPCODE_RELEASE_INPUT:
502       return "tcs_release_input";
503    case ELK_TCS_OPCODE_THREAD_END:
504       return "tcs_thread_end";
505    case ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER:
506       return "tes_create_input_read_header";
507    case ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
508       return "tes_add_indirect_urb_offset";
509    case ELK_TES_OPCODE_GET_PRIMITIVE_ID:
510       return "tes_get_primitive_id";
511 
512    case ELK_SHADER_OPCODE_RND_MODE:
513       return "rnd_mode";
514    case ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE:
515       return "float_control_mode";
516    case ELK_SHADER_OPCODE_READ_SR_REG:
517       return "read_sr_reg";
518    }
519 
520    unreachable("not reached");
521 }
522 
523 bool
elk_saturate_immediate(enum elk_reg_type type,struct elk_reg * reg)524 elk_saturate_immediate(enum elk_reg_type type, struct elk_reg *reg)
525 {
526    union {
527       unsigned ud;
528       int d;
529       float f;
530       double df;
531    } imm, sat_imm = { 0 };
532 
533    const unsigned size = type_sz(type);
534 
535    /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
536     * irrelevant, so just check the size of the type and copy from/to an
537     * appropriately sized field.
538     */
539    if (size < 8)
540       imm.ud = reg->ud;
541    else
542       imm.df = reg->df;
543 
544    switch (type) {
545    case ELK_REGISTER_TYPE_UD:
546    case ELK_REGISTER_TYPE_D:
547    case ELK_REGISTER_TYPE_UW:
548    case ELK_REGISTER_TYPE_W:
549    case ELK_REGISTER_TYPE_UQ:
550    case ELK_REGISTER_TYPE_Q:
551       /* Nothing to do. */
552       return false;
553    case ELK_REGISTER_TYPE_F:
554       sat_imm.f = SATURATE(imm.f);
555       break;
556    case ELK_REGISTER_TYPE_DF:
557       sat_imm.df = SATURATE(imm.df);
558       break;
559    case ELK_REGISTER_TYPE_UB:
560    case ELK_REGISTER_TYPE_B:
561       unreachable("no UB/B immediates");
562    case ELK_REGISTER_TYPE_V:
563    case ELK_REGISTER_TYPE_UV:
564    case ELK_REGISTER_TYPE_VF:
565       unreachable("unimplemented: saturate vector immediate");
566    case ELK_REGISTER_TYPE_HF:
567       unreachable("unimplemented: saturate HF immediate");
568    case ELK_REGISTER_TYPE_NF:
569       unreachable("no NF immediates");
570    }
571 
572    if (size < 8) {
573       if (imm.ud != sat_imm.ud) {
574          reg->ud = sat_imm.ud;
575          return true;
576       }
577    } else {
578       if (imm.df != sat_imm.df) {
579          reg->df = sat_imm.df;
580          return true;
581       }
582    }
583    return false;
584 }
585 
586 bool
elk_negate_immediate(enum elk_reg_type type,struct elk_reg * reg)587 elk_negate_immediate(enum elk_reg_type type, struct elk_reg *reg)
588 {
589    switch (type) {
590    case ELK_REGISTER_TYPE_D:
591    case ELK_REGISTER_TYPE_UD:
592       reg->d = -reg->d;
593       return true;
594    case ELK_REGISTER_TYPE_W:
595    case ELK_REGISTER_TYPE_UW: {
596       uint16_t value = -(int16_t)reg->ud;
597       reg->ud = value | (uint32_t)value << 16;
598       return true;
599    }
600    case ELK_REGISTER_TYPE_F:
601       reg->f = -reg->f;
602       return true;
603    case ELK_REGISTER_TYPE_VF:
604       reg->ud ^= 0x80808080;
605       return true;
606    case ELK_REGISTER_TYPE_DF:
607       reg->df = -reg->df;
608       return true;
609    case ELK_REGISTER_TYPE_UQ:
610    case ELK_REGISTER_TYPE_Q:
611       reg->d64 = -reg->d64;
612       return true;
613    case ELK_REGISTER_TYPE_UB:
614    case ELK_REGISTER_TYPE_B:
615       unreachable("no UB/B immediates");
616    case ELK_REGISTER_TYPE_UV:
617    case ELK_REGISTER_TYPE_V:
618       assert(!"unimplemented: negate UV/V immediate");
619    case ELK_REGISTER_TYPE_HF:
620       reg->ud ^= 0x80008000;
621       return true;
622    case ELK_REGISTER_TYPE_NF:
623       unreachable("no NF immediates");
624    }
625 
626    return false;
627 }
628 
629 bool
elk_abs_immediate(enum elk_reg_type type,struct elk_reg * reg)630 elk_abs_immediate(enum elk_reg_type type, struct elk_reg *reg)
631 {
632    switch (type) {
633    case ELK_REGISTER_TYPE_D:
634       reg->d = abs(reg->d);
635       return true;
636    case ELK_REGISTER_TYPE_W: {
637       uint16_t value = abs((int16_t)reg->ud);
638       reg->ud = value | (uint32_t)value << 16;
639       return true;
640    }
641    case ELK_REGISTER_TYPE_F:
642       reg->f = fabsf(reg->f);
643       return true;
644    case ELK_REGISTER_TYPE_DF:
645       reg->df = fabs(reg->df);
646       return true;
647    case ELK_REGISTER_TYPE_VF:
648       reg->ud &= ~0x80808080;
649       return true;
650    case ELK_REGISTER_TYPE_Q:
651       reg->d64 = imaxabs(reg->d64);
652       return true;
653    case ELK_REGISTER_TYPE_UB:
654    case ELK_REGISTER_TYPE_B:
655       unreachable("no UB/B immediates");
656    case ELK_REGISTER_TYPE_UQ:
657    case ELK_REGISTER_TYPE_UD:
658    case ELK_REGISTER_TYPE_UW:
659    case ELK_REGISTER_TYPE_UV:
660       /* Presumably the absolute value modifier on an unsigned source is a
661        * nop, but it would be nice to confirm.
662        */
663       assert(!"unimplemented: abs unsigned immediate");
664    case ELK_REGISTER_TYPE_V:
665       assert(!"unimplemented: abs V immediate");
666    case ELK_REGISTER_TYPE_HF:
667       reg->ud &= ~0x80008000;
668       return true;
669    case ELK_REGISTER_TYPE_NF:
670       unreachable("no NF immediates");
671    }
672 
673    return false;
674 }
675 
elk_backend_shader(const struct elk_compiler * compiler,const struct elk_compile_params * params,const nir_shader * shader,struct elk_stage_prog_data * stage_prog_data,bool debug_enabled)676 elk_backend_shader::elk_backend_shader(const struct elk_compiler *compiler,
677                                const struct elk_compile_params *params,
678                                const nir_shader *shader,
679                                struct elk_stage_prog_data *stage_prog_data,
680                                bool debug_enabled)
681    : compiler(compiler),
682      log_data(params->log_data),
683      devinfo(compiler->devinfo),
684      nir(shader),
685      stage_prog_data(stage_prog_data),
686      mem_ctx(params->mem_ctx),
687      cfg(NULL), idom_analysis(this),
688      stage(shader->info.stage),
689      debug_enabled(debug_enabled)
690 {
691 }
692 
~elk_backend_shader()693 elk_backend_shader::~elk_backend_shader()
694 {
695 }
696 
697 bool
equals(const elk_backend_reg & r) const698 elk_backend_reg::equals(const elk_backend_reg &r) const
699 {
700    return elk_regs_equal(this, &r) && offset == r.offset;
701 }
702 
703 bool
negative_equals(const elk_backend_reg & r) const704 elk_backend_reg::negative_equals(const elk_backend_reg &r) const
705 {
706    return elk_regs_negative_equal(this, &r) && offset == r.offset;
707 }
708 
709 bool
is_zero() const710 elk_backend_reg::is_zero() const
711 {
712    if (file != IMM)
713       return false;
714 
715    assert(type_sz(type) > 1);
716 
717    switch (type) {
718    case ELK_REGISTER_TYPE_HF:
719       assert((d & 0xffff) == ((d >> 16) & 0xffff));
720       return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
721    case ELK_REGISTER_TYPE_F:
722       return f == 0;
723    case ELK_REGISTER_TYPE_DF:
724       return df == 0;
725    case ELK_REGISTER_TYPE_W:
726    case ELK_REGISTER_TYPE_UW:
727       assert((d & 0xffff) == ((d >> 16) & 0xffff));
728       return (d & 0xffff) == 0;
729    case ELK_REGISTER_TYPE_D:
730    case ELK_REGISTER_TYPE_UD:
731       return d == 0;
732    case ELK_REGISTER_TYPE_UQ:
733    case ELK_REGISTER_TYPE_Q:
734       return u64 == 0;
735    default:
736       return false;
737    }
738 }
739 
740 bool
is_one() const741 elk_backend_reg::is_one() const
742 {
743    if (file != IMM)
744       return false;
745 
746    assert(type_sz(type) > 1);
747 
748    switch (type) {
749    case ELK_REGISTER_TYPE_HF:
750       assert((d & 0xffff) == ((d >> 16) & 0xffff));
751       return (d & 0xffff) == 0x3c00;
752    case ELK_REGISTER_TYPE_F:
753       return f == 1.0f;
754    case ELK_REGISTER_TYPE_DF:
755       return df == 1.0;
756    case ELK_REGISTER_TYPE_W:
757    case ELK_REGISTER_TYPE_UW:
758       assert((d & 0xffff) == ((d >> 16) & 0xffff));
759       return (d & 0xffff) == 1;
760    case ELK_REGISTER_TYPE_D:
761    case ELK_REGISTER_TYPE_UD:
762       return d == 1;
763    case ELK_REGISTER_TYPE_UQ:
764    case ELK_REGISTER_TYPE_Q:
765       return u64 == 1;
766    default:
767       return false;
768    }
769 }
770 
771 bool
is_negative_one() const772 elk_backend_reg::is_negative_one() const
773 {
774    if (file != IMM)
775       return false;
776 
777    assert(type_sz(type) > 1);
778 
779    switch (type) {
780    case ELK_REGISTER_TYPE_HF:
781       assert((d & 0xffff) == ((d >> 16) & 0xffff));
782       return (d & 0xffff) == 0xbc00;
783    case ELK_REGISTER_TYPE_F:
784       return f == -1.0;
785    case ELK_REGISTER_TYPE_DF:
786       return df == -1.0;
787    case ELK_REGISTER_TYPE_W:
788       assert((d & 0xffff) == ((d >> 16) & 0xffff));
789       return (d & 0xffff) == 0xffff;
790    case ELK_REGISTER_TYPE_D:
791       return d == -1;
792    case ELK_REGISTER_TYPE_Q:
793       return d64 == -1;
794    default:
795       return false;
796    }
797 }
798 
799 bool
is_null() const800 elk_backend_reg::is_null() const
801 {
802    return file == ARF && nr == ELK_ARF_NULL;
803 }
804 
805 
806 bool
is_accumulator() const807 elk_backend_reg::is_accumulator() const
808 {
809    return file == ARF && nr == ELK_ARF_ACCUMULATOR;
810 }
811 
812 bool
is_commutative() const813 elk_backend_instruction::is_commutative() const
814 {
815    switch (opcode) {
816    case ELK_OPCODE_AND:
817    case ELK_OPCODE_OR:
818    case ELK_OPCODE_XOR:
819    case ELK_OPCODE_ADD:
820    case ELK_OPCODE_MUL:
821    case ELK_SHADER_OPCODE_MULH:
822       return true;
823    case ELK_OPCODE_SEL:
824       /* MIN and MAX are commutative. */
825       if (conditional_mod == ELK_CONDITIONAL_GE ||
826           conditional_mod == ELK_CONDITIONAL_L) {
827          return true;
828       }
829       FALLTHROUGH;
830    default:
831       return false;
832    }
833 }
834 
835 bool
elk_is_3src(const struct elk_compiler * compiler) const836 elk_backend_instruction::elk_is_3src(const struct elk_compiler *compiler) const
837 {
838    return ::elk_is_3src(&compiler->isa, opcode);
839 }
840 
841 bool
is_math() const842 elk_backend_instruction::is_math() const
843 {
844    return (opcode == ELK_SHADER_OPCODE_RCP ||
845            opcode == ELK_SHADER_OPCODE_RSQ ||
846            opcode == ELK_SHADER_OPCODE_SQRT ||
847            opcode == ELK_SHADER_OPCODE_EXP2 ||
848            opcode == ELK_SHADER_OPCODE_LOG2 ||
849            opcode == ELK_SHADER_OPCODE_SIN ||
850            opcode == ELK_SHADER_OPCODE_COS ||
851            opcode == ELK_SHADER_OPCODE_INT_QUOTIENT ||
852            opcode == ELK_SHADER_OPCODE_INT_REMAINDER ||
853            opcode == ELK_SHADER_OPCODE_POW);
854 }
855 
856 bool
is_control_flow_begin() const857 elk_backend_instruction::is_control_flow_begin() const
858 {
859    switch (opcode) {
860    case ELK_OPCODE_DO:
861    case ELK_OPCODE_IF:
862    case ELK_OPCODE_ELSE:
863       return true;
864    default:
865       return false;
866    }
867 }
868 
869 bool
is_control_flow_end() const870 elk_backend_instruction::is_control_flow_end() const
871 {
872    switch (opcode) {
873    case ELK_OPCODE_ELSE:
874    case ELK_OPCODE_WHILE:
875    case ELK_OPCODE_ENDIF:
876       return true;
877    default:
878       return false;
879    }
880 }
881 
882 bool
is_control_flow() const883 elk_backend_instruction::is_control_flow() const
884 {
885    switch (opcode) {
886    case ELK_OPCODE_DO:
887    case ELK_OPCODE_WHILE:
888    case ELK_OPCODE_IF:
889    case ELK_OPCODE_ELSE:
890    case ELK_OPCODE_ENDIF:
891    case ELK_OPCODE_BREAK:
892    case ELK_OPCODE_CONTINUE:
893       return true;
894    default:
895       return false;
896    }
897 }
898 
899 bool
uses_indirect_addressing() const900 elk_backend_instruction::uses_indirect_addressing() const
901 {
902    switch (opcode) {
903    case ELK_SHADER_OPCODE_BROADCAST:
904    case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
905    case ELK_SHADER_OPCODE_MOV_INDIRECT:
906       return true;
907    default:
908       return false;
909    }
910 }
911 
912 bool
can_do_source_mods() const913 elk_backend_instruction::can_do_source_mods() const
914 {
915    switch (opcode) {
916    case ELK_OPCODE_ADDC:
917    case ELK_OPCODE_BFE:
918    case ELK_OPCODE_BFI1:
919    case ELK_OPCODE_BFI2:
920    case ELK_OPCODE_BFREV:
921    case ELK_OPCODE_CBIT:
922    case ELK_OPCODE_FBH:
923    case ELK_OPCODE_FBL:
924    case ELK_OPCODE_SUBB:
925    case ELK_SHADER_OPCODE_BROADCAST:
926    case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
927    case ELK_SHADER_OPCODE_MOV_INDIRECT:
928    case ELK_SHADER_OPCODE_SHUFFLE:
929    case ELK_SHADER_OPCODE_INT_QUOTIENT:
930    case ELK_SHADER_OPCODE_INT_REMAINDER:
931       return false;
932    default:
933       return true;
934    }
935 }
936 
937 bool
can_do_saturate() const938 elk_backend_instruction::can_do_saturate() const
939 {
940    switch (opcode) {
941    case ELK_OPCODE_ADD:
942    case ELK_OPCODE_ASR:
943    case ELK_OPCODE_AVG:
944    case ELK_OPCODE_CSEL:
945    case ELK_OPCODE_DP2:
946    case ELK_OPCODE_DP3:
947    case ELK_OPCODE_DP4:
948    case ELK_OPCODE_DPH:
949    case ELK_OPCODE_F16TO32:
950    case ELK_OPCODE_F32TO16:
951    case ELK_OPCODE_LINE:
952    case ELK_OPCODE_LRP:
953    case ELK_OPCODE_MAC:
954    case ELK_OPCODE_MAD:
955    case ELK_OPCODE_MATH:
956    case ELK_OPCODE_MOV:
957    case ELK_OPCODE_MUL:
958    case ELK_SHADER_OPCODE_MULH:
959    case ELK_OPCODE_PLN:
960    case ELK_OPCODE_RNDD:
961    case ELK_OPCODE_RNDE:
962    case ELK_OPCODE_RNDU:
963    case ELK_OPCODE_RNDZ:
964    case ELK_OPCODE_SEL:
965    case ELK_OPCODE_SHL:
966    case ELK_OPCODE_SHR:
967    case ELK_FS_OPCODE_LINTERP:
968    case ELK_SHADER_OPCODE_COS:
969    case ELK_SHADER_OPCODE_EXP2:
970    case ELK_SHADER_OPCODE_LOG2:
971    case ELK_SHADER_OPCODE_POW:
972    case ELK_SHADER_OPCODE_RCP:
973    case ELK_SHADER_OPCODE_RSQ:
974    case ELK_SHADER_OPCODE_SIN:
975    case ELK_SHADER_OPCODE_SQRT:
976       return true;
977    default:
978       return false;
979    }
980 }
981 
982 bool
can_do_cmod() const983 elk_backend_instruction::can_do_cmod() const
984 {
985    switch (opcode) {
986    case ELK_OPCODE_ADD:
987    case ELK_OPCODE_ADDC:
988    case ELK_OPCODE_AND:
989    case ELK_OPCODE_ASR:
990    case ELK_OPCODE_AVG:
991    case ELK_OPCODE_CMP:
992    case ELK_OPCODE_CMPN:
993    case ELK_OPCODE_DP2:
994    case ELK_OPCODE_DP3:
995    case ELK_OPCODE_DP4:
996    case ELK_OPCODE_DPH:
997    case ELK_OPCODE_F16TO32:
998    case ELK_OPCODE_F32TO16:
999    case ELK_OPCODE_FRC:
1000    case ELK_OPCODE_LINE:
1001    case ELK_OPCODE_LRP:
1002    case ELK_OPCODE_LZD:
1003    case ELK_OPCODE_MAC:
1004    case ELK_OPCODE_MACH:
1005    case ELK_OPCODE_MAD:
1006    case ELK_OPCODE_MOV:
1007    case ELK_OPCODE_MUL:
1008    case ELK_OPCODE_NOT:
1009    case ELK_OPCODE_OR:
1010    case ELK_OPCODE_PLN:
1011    case ELK_OPCODE_RNDD:
1012    case ELK_OPCODE_RNDE:
1013    case ELK_OPCODE_RNDU:
1014    case ELK_OPCODE_RNDZ:
1015    case ELK_OPCODE_SAD2:
1016    case ELK_OPCODE_SADA2:
1017    case ELK_OPCODE_SHL:
1018    case ELK_OPCODE_SHR:
1019    case ELK_OPCODE_SUBB:
1020    case ELK_OPCODE_XOR:
1021    case ELK_FS_OPCODE_LINTERP:
1022       return true;
1023    default:
1024       return false;
1025    }
1026 }
1027 
1028 bool
reads_accumulator_implicitly() const1029 elk_backend_instruction::reads_accumulator_implicitly() const
1030 {
1031    switch (opcode) {
1032    case ELK_OPCODE_MAC:
1033    case ELK_OPCODE_MACH:
1034    case ELK_OPCODE_SADA2:
1035       return true;
1036    default:
1037       return false;
1038    }
1039 }
1040 
1041 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const1042 elk_backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
1043 {
1044    return writes_accumulator ||
1045           (devinfo->ver < 6 &&
1046            ((opcode >= ELK_OPCODE_ADD && opcode < ELK_OPCODE_NOP) ||
1047             (opcode >= ELK_FS_OPCODE_DDX_COARSE && opcode <= ELK_FS_OPCODE_LINTERP))) ||
1048           (opcode == ELK_FS_OPCODE_LINTERP &&
1049            (!devinfo->has_pln || devinfo->ver <= 6)) ||
1050           (eot && intel_needs_workaround(devinfo, 14010017096));
1051 }
1052 
1053 bool
has_side_effects() const1054 elk_backend_instruction::has_side_effects() const
1055 {
1056    switch (opcode) {
1057    case ELK_SHADER_OPCODE_SEND:
1058       return send_has_side_effects;
1059 
1060    case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
1061    case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
1062    case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE:
1063    case ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
1064    case ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
1065    case ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
1066    case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
1067    case ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
1068    case ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
1069    case ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
1070    case ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
1071    case ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
1072    case ELK_SHADER_OPCODE_MEMORY_FENCE:
1073    case ELK_SHADER_OPCODE_INTERLOCK:
1074    case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
1075    case ELK_FS_OPCODE_FB_WRITE:
1076    case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
1077    case ELK_FS_OPCODE_REP_FB_WRITE:
1078    case ELK_SHADER_OPCODE_BARRIER:
1079    case ELK_VEC4_TCS_OPCODE_URB_WRITE:
1080    case ELK_TCS_OPCODE_RELEASE_INPUT:
1081    case ELK_SHADER_OPCODE_RND_MODE:
1082    case ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE:
1083    case ELK_FS_OPCODE_SCHEDULING_FENCE:
1084    case ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
1085    case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
1086    case ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
1087       return true;
1088    default:
1089       return eot;
1090    }
1091 }
1092 
1093 bool
is_volatile() const1094 elk_backend_instruction::is_volatile() const
1095 {
1096    switch (opcode) {
1097    case ELK_SHADER_OPCODE_SEND:
1098       return send_is_volatile;
1099 
1100    case ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ:
1101    case ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1102    case ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1103    case ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1104    case ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
1105    case ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1106    case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1107    case ELK_VEC4_OPCODE_URB_READ:
1108       return true;
1109    default:
1110       return false;
1111    }
1112 }
1113 
1114 #ifndef NDEBUG
1115 static bool
inst_is_in_block(const elk_bblock_t * block,const elk_backend_instruction * inst)1116 inst_is_in_block(const elk_bblock_t *block, const elk_backend_instruction *inst)
1117 {
1118    const exec_node *n = inst;
1119 
1120    /* Find the tail sentinel. If the tail sentinel is the sentinel from the
1121     * list header in the elk_bblock_t, then this instruction is in that basic
1122     * block.
1123     */
1124    while (!n->is_tail_sentinel())
1125       n = n->get_next();
1126 
1127    return n == &block->instructions.tail_sentinel;
1128 }
1129 #endif
1130 
1131 static void
adjust_later_block_ips(elk_bblock_t * start_block,int ip_adjustment)1132 adjust_later_block_ips(elk_bblock_t *start_block, int ip_adjustment)
1133 {
1134    for (elk_bblock_t *block_iter = start_block->next();
1135         block_iter;
1136         block_iter = block_iter->next()) {
1137       block_iter->start_ip += ip_adjustment;
1138       block_iter->end_ip += ip_adjustment;
1139    }
1140 }
1141 
1142 void
insert_after(elk_bblock_t * block,elk_backend_instruction * inst)1143 elk_backend_instruction::insert_after(elk_bblock_t *block, elk_backend_instruction *inst)
1144 {
1145    assert(this != inst);
1146    assert(block->end_ip_delta == 0);
1147 
1148    if (!this->is_head_sentinel())
1149       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1150 
1151    block->end_ip++;
1152 
1153    adjust_later_block_ips(block, 1);
1154 
1155    exec_node::insert_after(inst);
1156 }
1157 
1158 void
insert_before(elk_bblock_t * block,elk_backend_instruction * inst)1159 elk_backend_instruction::insert_before(elk_bblock_t *block, elk_backend_instruction *inst)
1160 {
1161    assert(this != inst);
1162    assert(block->end_ip_delta == 0);
1163 
1164    if (!this->is_tail_sentinel())
1165       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1166 
1167    block->end_ip++;
1168 
1169    adjust_later_block_ips(block, 1);
1170 
1171    exec_node::insert_before(inst);
1172 }
1173 
1174 void
remove(elk_bblock_t * block,bool defer_later_block_ip_updates)1175 elk_backend_instruction::remove(elk_bblock_t *block, bool defer_later_block_ip_updates)
1176 {
1177    assert(inst_is_in_block(block, this) || !"Instruction not in block");
1178 
1179    if (defer_later_block_ip_updates) {
1180       block->end_ip_delta--;
1181    } else {
1182       assert(block->end_ip_delta == 0);
1183       adjust_later_block_ips(block, -1);
1184    }
1185 
1186    if (block->start_ip == block->end_ip) {
1187       if (block->end_ip_delta != 0) {
1188          adjust_later_block_ips(block, block->end_ip_delta);
1189          block->end_ip_delta = 0;
1190       }
1191 
1192       block->cfg->remove_block(block);
1193    } else {
1194       block->end_ip--;
1195    }
1196 
1197    exec_node::remove();
1198 }
1199 
1200 void
dump_instructions(const char * name) const1201 elk_backend_shader::dump_instructions(const char *name) const
1202 {
1203    FILE *file = stderr;
1204    if (name && __normal_user()) {
1205       file = fopen(name, "w");
1206       if (!file)
1207          file = stderr;
1208    }
1209 
1210    dump_instructions_to_file(file);
1211 
1212    if (file != stderr) {
1213       fclose(file);
1214    }
1215 }
1216 
1217 void
dump_instructions_to_file(FILE * file) const1218 elk_backend_shader::dump_instructions_to_file(FILE *file) const
1219 {
1220    if (cfg) {
1221       int ip = 0;
1222       foreach_block_and_inst(block, elk_backend_instruction, inst, cfg) {
1223          if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1224             fprintf(file, "%4d: ", ip++);
1225          dump_instruction(inst, file);
1226       }
1227    } else {
1228       int ip = 0;
1229       foreach_in_list(elk_backend_instruction, inst, &instructions) {
1230          if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1231             fprintf(file, "%4d: ", ip++);
1232          dump_instruction(inst, file);
1233       }
1234    }
1235 }
1236 
1237 void
calculate_cfg()1238 elk_backend_shader::calculate_cfg()
1239 {
1240    if (this->cfg)
1241       return;
1242    cfg = new(mem_ctx) elk_cfg_t(this, &this->instructions);
1243 }
1244 
1245 void
invalidate_analysis(elk::analysis_dependency_class c)1246 elk_backend_shader::invalidate_analysis(elk::analysis_dependency_class c)
1247 {
1248    idom_analysis.invalidate(c);
1249 }
1250 
1251 extern "C" const unsigned *
elk_compile_tes(const struct elk_compiler * compiler,elk_compile_tes_params * params)1252 elk_compile_tes(const struct elk_compiler *compiler,
1253                 elk_compile_tes_params *params)
1254 {
1255    const struct intel_device_info *devinfo = compiler->devinfo;
1256    nir_shader *nir = params->base.nir;
1257    const struct elk_tes_prog_key *key = params->key;
1258    const struct intel_vue_map *input_vue_map = params->input_vue_map;
1259    struct elk_tes_prog_data *prog_data = params->prog_data;
1260 
1261    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
1262    const bool debug_enabled = elk_should_print_shader(nir, DEBUG_TES);
1263    const unsigned *assembly;
1264 
1265    prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
1266 
1267    nir->info.inputs_read = key->inputs_read;
1268    nir->info.patch_inputs_read = key->patch_inputs_read;
1269 
1270    elk_nir_apply_key(nir, compiler, &key->base, 8);
1271    elk_nir_lower_tes_inputs(nir, input_vue_map);
1272    elk_nir_lower_vue_outputs(nir);
1273    elk_postprocess_nir(nir, compiler, debug_enabled,
1274                        key->base.robust_flags);
1275 
1276    elk_compute_vue_map(devinfo, &prog_data->base.vue_map,
1277                        nir->info.outputs_written,
1278                        nir->info.separate_shader, 1);
1279 
1280    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1281 
1282    assert(output_size_bytes >= 1);
1283    if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1284       params->base.error_str = ralloc_strdup(params->base.mem_ctx,
1285                                              "DS outputs exceed maximum size");
1286       return NULL;
1287    }
1288 
1289    prog_data->base.clip_distance_mask =
1290       ((1 << nir->info.clip_distance_array_size) - 1);
1291    prog_data->base.cull_distance_mask =
1292       ((1 << nir->info.cull_distance_array_size) - 1) <<
1293       nir->info.clip_distance_array_size;
1294 
1295    prog_data->include_primitive_id =
1296       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
1297 
1298    /* URB entry sizes are stored as a multiple of 64 bytes. */
1299    prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1300 
1301    prog_data->base.urb_read_length = 0;
1302 
1303    STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1304    STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
1305                  TESS_SPACING_FRACTIONAL_ODD - 1);
1306    STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1307                  TESS_SPACING_FRACTIONAL_EVEN - 1);
1308 
1309    prog_data->partitioning =
1310       (enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
1311 
1312    switch (nir->info.tess._primitive_mode) {
1313    case TESS_PRIMITIVE_QUADS:
1314       prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
1315       break;
1316    case TESS_PRIMITIVE_TRIANGLES:
1317       prog_data->domain = INTEL_TESS_DOMAIN_TRI;
1318       break;
1319    case TESS_PRIMITIVE_ISOLINES:
1320       prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
1321       break;
1322    default:
1323       unreachable("invalid domain shader primitive mode");
1324    }
1325 
1326    if (nir->info.tess.point_mode) {
1327       prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
1328    } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
1329       prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
1330    } else {
1331       /* Hardware winding order is backwards from OpenGL */
1332       prog_data->output_topology =
1333          nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
1334                              : INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1335    }
1336 
1337    if (unlikely(debug_enabled)) {
1338       fprintf(stderr, "TES Input ");
1339       elk_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
1340       fprintf(stderr, "TES Output ");
1341       elk_print_vue_map(stderr, &prog_data->base.vue_map,
1342                         MESA_SHADER_TESS_EVAL);
1343    }
1344 
1345    if (is_scalar) {
1346       const unsigned dispatch_width = 8;
1347       elk_fs_visitor v(compiler, &params->base, &key->base,
1348                    &prog_data->base.base, nir, dispatch_width,
1349                    params->base.stats != NULL, debug_enabled);
1350       if (!v.run_tes()) {
1351          params->base.error_str =
1352             ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1353          return NULL;
1354       }
1355 
1356       assert(v.payload().num_regs % reg_unit(devinfo) == 0);
1357       prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
1358 
1359       prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
1360 
1361       elk_fs_generator g(compiler, &params->base,
1362                      &prog_data->base.base, false, MESA_SHADER_TESS_EVAL);
1363       if (unlikely(debug_enabled)) {
1364          g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
1365                                         "%s tessellation evaluation shader %s",
1366                                         nir->info.label ? nir->info.label
1367                                                         : "unnamed",
1368                                         nir->info.name));
1369       }
1370 
1371       g.generate_code(v.cfg, dispatch_width, v.shader_stats,
1372                       v.performance_analysis.require(), params->base.stats);
1373 
1374       g.add_const_data(nir->constant_data, nir->constant_data_size);
1375 
1376       assembly = g.get_assembly();
1377    } else {
1378       elk::vec4_tes_visitor v(compiler, &params->base, key, prog_data,
1379                               nir, debug_enabled);
1380       if (!v.run()) {
1381          params->base.error_str =
1382             ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1383 	 return NULL;
1384       }
1385 
1386       if (unlikely(debug_enabled))
1387 	 v.dump_instructions();
1388 
1389       assembly = elk_vec4_generate_assembly(compiler, &params->base, nir,
1390                                             &prog_data->base, v.cfg,
1391                                             v.performance_analysis.require(),
1392                                             debug_enabled);
1393    }
1394 
1395    return assembly;
1396 }
1397