1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "elk_cfg.h"
25 #include "elk_eu.h"
26 #include "elk_fs.h"
27 #include "elk_nir.h"
28 #include "elk_private.h"
29 #include "elk_vec4_tes.h"
30 #include "dev/intel_debug.h"
31 #include "util/macros.h"
32 #include "util/u_debug.h"
33
34 enum elk_reg_type
elk_type_for_base_type(const struct glsl_type * type)35 elk_type_for_base_type(const struct glsl_type *type)
36 {
37 switch (type->base_type) {
38 case GLSL_TYPE_FLOAT16:
39 return ELK_REGISTER_TYPE_HF;
40 case GLSL_TYPE_FLOAT:
41 return ELK_REGISTER_TYPE_F;
42 case GLSL_TYPE_INT:
43 case GLSL_TYPE_BOOL:
44 case GLSL_TYPE_SUBROUTINE:
45 return ELK_REGISTER_TYPE_D;
46 case GLSL_TYPE_INT16:
47 return ELK_REGISTER_TYPE_W;
48 case GLSL_TYPE_INT8:
49 return ELK_REGISTER_TYPE_B;
50 case GLSL_TYPE_UINT:
51 return ELK_REGISTER_TYPE_UD;
52 case GLSL_TYPE_UINT16:
53 return ELK_REGISTER_TYPE_UW;
54 case GLSL_TYPE_UINT8:
55 return ELK_REGISTER_TYPE_UB;
56 case GLSL_TYPE_ARRAY:
57 return elk_type_for_base_type(type->fields.array);
58 case GLSL_TYPE_STRUCT:
59 case GLSL_TYPE_INTERFACE:
60 case GLSL_TYPE_SAMPLER:
61 case GLSL_TYPE_TEXTURE:
62 case GLSL_TYPE_ATOMIC_UINT:
63 /* These should be overridden with the type of the member when
64 * dereferenced into. ELK_REGISTER_TYPE_UD seems like a likely
65 * way to trip up if we don't.
66 */
67 return ELK_REGISTER_TYPE_UD;
68 case GLSL_TYPE_IMAGE:
69 return ELK_REGISTER_TYPE_UD;
70 case GLSL_TYPE_DOUBLE:
71 return ELK_REGISTER_TYPE_DF;
72 case GLSL_TYPE_UINT64:
73 return ELK_REGISTER_TYPE_UQ;
74 case GLSL_TYPE_INT64:
75 return ELK_REGISTER_TYPE_Q;
76 case GLSL_TYPE_VOID:
77 case GLSL_TYPE_ERROR:
78 case GLSL_TYPE_COOPERATIVE_MATRIX:
79 unreachable("not reached");
80 }
81
82 return ELK_REGISTER_TYPE_F;
83 }
84
85 uint32_t
elk_math_function(enum elk_opcode op)86 elk_math_function(enum elk_opcode op)
87 {
88 switch (op) {
89 case ELK_SHADER_OPCODE_RCP:
90 return ELK_MATH_FUNCTION_INV;
91 case ELK_SHADER_OPCODE_RSQ:
92 return ELK_MATH_FUNCTION_RSQ;
93 case ELK_SHADER_OPCODE_SQRT:
94 return ELK_MATH_FUNCTION_SQRT;
95 case ELK_SHADER_OPCODE_EXP2:
96 return ELK_MATH_FUNCTION_EXP;
97 case ELK_SHADER_OPCODE_LOG2:
98 return ELK_MATH_FUNCTION_LOG;
99 case ELK_SHADER_OPCODE_POW:
100 return ELK_MATH_FUNCTION_POW;
101 case ELK_SHADER_OPCODE_SIN:
102 return ELK_MATH_FUNCTION_SIN;
103 case ELK_SHADER_OPCODE_COS:
104 return ELK_MATH_FUNCTION_COS;
105 case ELK_SHADER_OPCODE_INT_QUOTIENT:
106 return ELK_MATH_FUNCTION_INT_DIV_QUOTIENT;
107 case ELK_SHADER_OPCODE_INT_REMAINDER:
108 return ELK_MATH_FUNCTION_INT_DIV_REMAINDER;
109 default:
110 unreachable("not reached: unknown math function");
111 }
112 }
113
114 bool
elk_texture_offset(const nir_tex_instr * tex,unsigned src,uint32_t * offset_bits_out)115 elk_texture_offset(const nir_tex_instr *tex, unsigned src,
116 uint32_t *offset_bits_out)
117 {
118 if (!nir_src_is_const(tex->src[src].src))
119 return false;
120
121 const unsigned num_components = nir_tex_instr_src_size(tex, src);
122
123 /* Combine all three offsets into a single unsigned dword:
124 *
125 * bits 11:8 - U Offset (X component)
126 * bits 7:4 - V Offset (Y component)
127 * bits 3:0 - R Offset (Z component)
128 */
129 uint32_t offset_bits = 0;
130 for (unsigned i = 0; i < num_components; i++) {
131 int offset = nir_src_comp_as_int(tex->src[src].src, i);
132
133 /* offset out of bounds; caller will handle it. */
134 if (offset > 7 || offset < -8)
135 return false;
136
137 const unsigned shift = 4 * (2 - i);
138 offset_bits |= (offset & 0xF) << shift;
139 }
140
141 *offset_bits_out = offset_bits;
142
143 return true;
144 }
145
146 const char *
elk_instruction_name(const struct elk_isa_info * isa,enum elk_opcode op)147 elk_instruction_name(const struct elk_isa_info *isa, enum elk_opcode op)
148 {
149 const struct intel_device_info *devinfo = isa->devinfo;
150
151 switch (op) {
152 case 0 ... NUM_ELK_OPCODES - 1:
153 /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
154 * start of a loop in the IR.
155 */
156 if (devinfo->ver >= 6 && op == ELK_OPCODE_DO)
157 return "do";
158
159 /* The following conversion opcodes doesn't exist on Gfx8+, but we use
160 * then to mark that we want to do the conversion.
161 */
162 if (devinfo->ver > 7 && op == ELK_OPCODE_F32TO16)
163 return "f32to16";
164
165 if (devinfo->ver > 7 && op == ELK_OPCODE_F16TO32)
166 return "f16to32";
167
168 assert(elk_opcode_desc(isa, op)->name);
169 return elk_opcode_desc(isa, op)->name;
170 case ELK_FS_OPCODE_FB_WRITE:
171 return "fb_write";
172 case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
173 return "fb_write_logical";
174 case ELK_FS_OPCODE_REP_FB_WRITE:
175 return "rep_fb_write";
176
177 case ELK_SHADER_OPCODE_RCP:
178 return "rcp";
179 case ELK_SHADER_OPCODE_RSQ:
180 return "rsq";
181 case ELK_SHADER_OPCODE_SQRT:
182 return "sqrt";
183 case ELK_SHADER_OPCODE_EXP2:
184 return "exp2";
185 case ELK_SHADER_OPCODE_LOG2:
186 return "log2";
187 case ELK_SHADER_OPCODE_POW:
188 return "pow";
189 case ELK_SHADER_OPCODE_INT_QUOTIENT:
190 return "int_quot";
191 case ELK_SHADER_OPCODE_INT_REMAINDER:
192 return "int_rem";
193 case ELK_SHADER_OPCODE_SIN:
194 return "sin";
195 case ELK_SHADER_OPCODE_COS:
196 return "cos";
197
198 case ELK_SHADER_OPCODE_SEND:
199 return "send";
200
201 case ELK_SHADER_OPCODE_UNDEF:
202 return "undef";
203
204 case ELK_SHADER_OPCODE_TEX:
205 return "tex";
206 case ELK_SHADER_OPCODE_TEX_LOGICAL:
207 return "tex_logical";
208 case ELK_SHADER_OPCODE_TXD:
209 return "txd";
210 case ELK_SHADER_OPCODE_TXD_LOGICAL:
211 return "txd_logical";
212 case ELK_SHADER_OPCODE_TXF:
213 return "txf";
214 case ELK_SHADER_OPCODE_TXF_LOGICAL:
215 return "txf_logical";
216 case ELK_SHADER_OPCODE_TXF_LZ:
217 return "txf_lz";
218 case ELK_SHADER_OPCODE_TXL:
219 return "txl";
220 case ELK_SHADER_OPCODE_TXL_LOGICAL:
221 return "txl_logical";
222 case ELK_SHADER_OPCODE_TXL_LZ:
223 return "txl_lz";
224 case ELK_SHADER_OPCODE_TXS:
225 return "txs";
226 case ELK_SHADER_OPCODE_TXS_LOGICAL:
227 return "txs_logical";
228 case ELK_FS_OPCODE_TXB:
229 return "txb";
230 case ELK_FS_OPCODE_TXB_LOGICAL:
231 return "txb_logical";
232 case ELK_SHADER_OPCODE_TXF_CMS:
233 return "txf_cms";
234 case ELK_SHADER_OPCODE_TXF_CMS_LOGICAL:
235 return "txf_cms_logical";
236 case ELK_SHADER_OPCODE_TXF_CMS_W:
237 return "txf_cms_w";
238 case ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL:
239 return "txf_cms_w_logical";
240 case ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
241 return "txf_cms_w_gfx12_logical";
242 case ELK_SHADER_OPCODE_TXF_UMS:
243 return "txf_ums";
244 case ELK_SHADER_OPCODE_TXF_UMS_LOGICAL:
245 return "txf_ums_logical";
246 case ELK_SHADER_OPCODE_TXF_MCS:
247 return "txf_mcs";
248 case ELK_SHADER_OPCODE_TXF_MCS_LOGICAL:
249 return "txf_mcs_logical";
250 case ELK_SHADER_OPCODE_LOD:
251 return "lod";
252 case ELK_SHADER_OPCODE_LOD_LOGICAL:
253 return "lod_logical";
254 case ELK_SHADER_OPCODE_TG4:
255 return "tg4";
256 case ELK_SHADER_OPCODE_TG4_LOGICAL:
257 return "tg4_logical";
258 case ELK_SHADER_OPCODE_TG4_OFFSET:
259 return "tg4_offset";
260 case ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL:
261 return "tg4_offset_logical";
262 case ELK_SHADER_OPCODE_SAMPLEINFO:
263 return "sampleinfo";
264 case ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL:
265 return "sampleinfo_logical";
266
267 case ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
268 return "image_size_logical";
269
270 case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
271 return "untyped_atomic";
272 case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
273 return "untyped_atomic_logical";
274 case ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ:
275 return "untyped_surface_read";
276 case ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
277 return "untyped_surface_read_logical";
278 case ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
279 return "untyped_surface_write";
280 case ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
281 return "untyped_surface_write_logical";
282 case ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
283 return "unaligned_oword_block_read_logical";
284 case ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
285 return "oword_block_write_logical";
286 case ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
287 return "a64_untyped_read_logical";
288 case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
289 return "a64_oword_block_read_logical";
290 case ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
291 return "a64_unaligned_oword_block_read_logical";
292 case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
293 return "a64_oword_block_write_logical";
294 case ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
295 return "a64_untyped_write_logical";
296 case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
297 return "a64_byte_scattered_read_logical";
298 case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
299 return "a64_byte_scattered_write_logical";
300 case ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
301 return "a64_untyped_atomic_logical";
302 case ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
303 return "typed_atomic_logical";
304 case ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
305 return "typed_surface_read_logical";
306 case ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
307 return "typed_surface_write_logical";
308 case ELK_SHADER_OPCODE_MEMORY_FENCE:
309 return "memory_fence";
310 case ELK_FS_OPCODE_SCHEDULING_FENCE:
311 return "scheduling_fence";
312 case ELK_SHADER_OPCODE_INTERLOCK:
313 /* For an interlock we actually issue a memory fence via sendc. */
314 return "interlock";
315
316 case ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
317 return "byte_scattered_read_logical";
318 case ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
319 return "byte_scattered_write_logical";
320 case ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
321 return "dword_scattered_read_logical";
322 case ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
323 return "dword_scattered_write_logical";
324
325 case ELK_SHADER_OPCODE_LOAD_PAYLOAD:
326 return "load_payload";
327 case ELK_FS_OPCODE_PACK:
328 return "pack";
329
330 case ELK_SHADER_OPCODE_GFX4_SCRATCH_READ:
331 return "gfx4_scratch_read";
332 case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE:
333 return "gfx4_scratch_write";
334 case ELK_SHADER_OPCODE_GFX7_SCRATCH_READ:
335 return "gfx7_scratch_read";
336 case ELK_SHADER_OPCODE_SCRATCH_HEADER:
337 return "scratch_header";
338
339 case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
340 return "urb_write_logical";
341 case ELK_SHADER_OPCODE_URB_READ_LOGICAL:
342 return "urb_read_logical";
343
344 case ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL:
345 return "find_live_channel";
346 case ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
347 return "find_last_live_channel";
348 case ELK_FS_OPCODE_LOAD_LIVE_CHANNELS:
349 return "load_live_channels";
350
351 case ELK_SHADER_OPCODE_BROADCAST:
352 return "broadcast";
353 case ELK_SHADER_OPCODE_SHUFFLE:
354 return "shuffle";
355 case ELK_SHADER_OPCODE_SEL_EXEC:
356 return "sel_exec";
357 case ELK_SHADER_OPCODE_QUAD_SWIZZLE:
358 return "quad_swizzle";
359 case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
360 return "cluster_broadcast";
361
362 case ELK_SHADER_OPCODE_GET_BUFFER_SIZE:
363 return "get_buffer_size";
364
365 case ELK_VEC4_OPCODE_MOV_BYTES:
366 return "mov_bytes";
367 case ELK_VEC4_OPCODE_PACK_BYTES:
368 return "pack_bytes";
369 case ELK_VEC4_OPCODE_UNPACK_UNIFORM:
370 return "unpack_uniform";
371 case ELK_VEC4_OPCODE_DOUBLE_TO_F32:
372 return "double_to_f32";
373 case ELK_VEC4_OPCODE_DOUBLE_TO_D32:
374 return "double_to_d32";
375 case ELK_VEC4_OPCODE_DOUBLE_TO_U32:
376 return "double_to_u32";
377 case ELK_VEC4_OPCODE_TO_DOUBLE:
378 return "single_to_double";
379 case ELK_VEC4_OPCODE_PICK_LOW_32BIT:
380 return "pick_low_32bit";
381 case ELK_VEC4_OPCODE_PICK_HIGH_32BIT:
382 return "pick_high_32bit";
383 case ELK_VEC4_OPCODE_SET_LOW_32BIT:
384 return "set_low_32bit";
385 case ELK_VEC4_OPCODE_SET_HIGH_32BIT:
386 return "set_high_32bit";
387 case ELK_VEC4_OPCODE_MOV_FOR_SCRATCH:
388 return "mov_for_scratch";
389 case ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
390 return "zero_oob_push_regs";
391
392 case ELK_FS_OPCODE_DDX_COARSE:
393 return "ddx_coarse";
394 case ELK_FS_OPCODE_DDX_FINE:
395 return "ddx_fine";
396 case ELK_FS_OPCODE_DDY_COARSE:
397 return "ddy_coarse";
398 case ELK_FS_OPCODE_DDY_FINE:
399 return "ddy_fine";
400
401 case ELK_FS_OPCODE_LINTERP:
402 return "linterp";
403
404 case ELK_FS_OPCODE_PIXEL_X:
405 return "pixel_x";
406 case ELK_FS_OPCODE_PIXEL_Y:
407 return "pixel_y";
408
409 case ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
410 return "uniform_pull_const";
411 case ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
412 return "varying_pull_const_gfx4";
413 case ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
414 return "varying_pull_const_logical";
415
416 case ELK_FS_OPCODE_SET_SAMPLE_ID:
417 return "set_sample_id";
418
419 case ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT:
420 return "pack_half_2x16_split";
421
422 case ELK_SHADER_OPCODE_HALT_TARGET:
423 return "halt_target";
424
425 case ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE:
426 return "interp_sample";
427 case ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
428 return "interp_shared_offset";
429 case ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
430 return "interp_per_slot_offset";
431
432 case ELK_VEC4_VS_OPCODE_URB_WRITE:
433 return "vs_urb_write";
434 case ELK_VS_OPCODE_PULL_CONSTANT_LOAD:
435 return "pull_constant_load";
436 case ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
437 return "pull_constant_load_gfx7";
438
439 case ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
440 return "unpack_flags_simd4x2";
441
442 case ELK_VEC4_GS_OPCODE_URB_WRITE:
443 return "gs_urb_write";
444 case ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE:
445 return "gs_urb_write_allocate";
446 case ELK_GS_OPCODE_THREAD_END:
447 return "gs_thread_end";
448 case ELK_GS_OPCODE_SET_WRITE_OFFSET:
449 return "set_write_offset";
450 case ELK_GS_OPCODE_SET_VERTEX_COUNT:
451 return "set_vertex_count";
452 case ELK_GS_OPCODE_SET_DWORD_2:
453 return "set_dword_2";
454 case ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS:
455 return "prepare_channel_masks";
456 case ELK_GS_OPCODE_SET_CHANNEL_MASKS:
457 return "set_channel_masks";
458 case ELK_GS_OPCODE_GET_INSTANCE_ID:
459 return "get_instance_id";
460 case ELK_GS_OPCODE_FF_SYNC:
461 return "ff_sync";
462 case ELK_GS_OPCODE_SET_PRIMITIVE_ID:
463 return "set_primitive_id";
464 case ELK_GS_OPCODE_SVB_WRITE:
465 return "gs_svb_write";
466 case ELK_GS_OPCODE_SVB_SET_DST_INDEX:
467 return "gs_svb_set_dst_index";
468 case ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
469 return "gs_ff_sync_set_primitives";
470 case ELK_CS_OPCODE_CS_TERMINATE:
471 return "cs_terminate";
472 case ELK_SHADER_OPCODE_BARRIER:
473 return "barrier";
474 case ELK_SHADER_OPCODE_MULH:
475 return "mulh";
476 case ELK_SHADER_OPCODE_ISUB_SAT:
477 return "isub_sat";
478 case ELK_SHADER_OPCODE_USUB_SAT:
479 return "usub_sat";
480 case ELK_SHADER_OPCODE_MOV_INDIRECT:
481 return "mov_indirect";
482 case ELK_SHADER_OPCODE_MOV_RELOC_IMM:
483 return "mov_reloc_imm";
484
485 case ELK_VEC4_OPCODE_URB_READ:
486 return "urb_read";
487 case ELK_TCS_OPCODE_GET_INSTANCE_ID:
488 return "tcs_get_instance_id";
489 case ELK_VEC4_TCS_OPCODE_URB_WRITE:
490 return "tcs_urb_write";
491 case ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
492 return "tcs_set_input_urb_offsets";
493 case ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
494 return "tcs_set_output_urb_offsets";
495 case ELK_TCS_OPCODE_GET_PRIMITIVE_ID:
496 return "tcs_get_primitive_id";
497 case ELK_TCS_OPCODE_CREATE_BARRIER_HEADER:
498 return "tcs_create_barrier_header";
499 case ELK_TCS_OPCODE_SRC0_010_IS_ZERO:
500 return "tcs_src0<0,1,0>_is_zero";
501 case ELK_TCS_OPCODE_RELEASE_INPUT:
502 return "tcs_release_input";
503 case ELK_TCS_OPCODE_THREAD_END:
504 return "tcs_thread_end";
505 case ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER:
506 return "tes_create_input_read_header";
507 case ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
508 return "tes_add_indirect_urb_offset";
509 case ELK_TES_OPCODE_GET_PRIMITIVE_ID:
510 return "tes_get_primitive_id";
511
512 case ELK_SHADER_OPCODE_RND_MODE:
513 return "rnd_mode";
514 case ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE:
515 return "float_control_mode";
516 case ELK_SHADER_OPCODE_READ_SR_REG:
517 return "read_sr_reg";
518 }
519
520 unreachable("not reached");
521 }
522
523 bool
elk_saturate_immediate(enum elk_reg_type type,struct elk_reg * reg)524 elk_saturate_immediate(enum elk_reg_type type, struct elk_reg *reg)
525 {
526 union {
527 unsigned ud;
528 int d;
529 float f;
530 double df;
531 } imm, sat_imm = { 0 };
532
533 const unsigned size = type_sz(type);
534
535 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
536 * irrelevant, so just check the size of the type and copy from/to an
537 * appropriately sized field.
538 */
539 if (size < 8)
540 imm.ud = reg->ud;
541 else
542 imm.df = reg->df;
543
544 switch (type) {
545 case ELK_REGISTER_TYPE_UD:
546 case ELK_REGISTER_TYPE_D:
547 case ELK_REGISTER_TYPE_UW:
548 case ELK_REGISTER_TYPE_W:
549 case ELK_REGISTER_TYPE_UQ:
550 case ELK_REGISTER_TYPE_Q:
551 /* Nothing to do. */
552 return false;
553 case ELK_REGISTER_TYPE_F:
554 sat_imm.f = SATURATE(imm.f);
555 break;
556 case ELK_REGISTER_TYPE_DF:
557 sat_imm.df = SATURATE(imm.df);
558 break;
559 case ELK_REGISTER_TYPE_UB:
560 case ELK_REGISTER_TYPE_B:
561 unreachable("no UB/B immediates");
562 case ELK_REGISTER_TYPE_V:
563 case ELK_REGISTER_TYPE_UV:
564 case ELK_REGISTER_TYPE_VF:
565 unreachable("unimplemented: saturate vector immediate");
566 case ELK_REGISTER_TYPE_HF:
567 unreachable("unimplemented: saturate HF immediate");
568 case ELK_REGISTER_TYPE_NF:
569 unreachable("no NF immediates");
570 }
571
572 if (size < 8) {
573 if (imm.ud != sat_imm.ud) {
574 reg->ud = sat_imm.ud;
575 return true;
576 }
577 } else {
578 if (imm.df != sat_imm.df) {
579 reg->df = sat_imm.df;
580 return true;
581 }
582 }
583 return false;
584 }
585
586 bool
elk_negate_immediate(enum elk_reg_type type,struct elk_reg * reg)587 elk_negate_immediate(enum elk_reg_type type, struct elk_reg *reg)
588 {
589 switch (type) {
590 case ELK_REGISTER_TYPE_D:
591 case ELK_REGISTER_TYPE_UD:
592 reg->d = -reg->d;
593 return true;
594 case ELK_REGISTER_TYPE_W:
595 case ELK_REGISTER_TYPE_UW: {
596 uint16_t value = -(int16_t)reg->ud;
597 reg->ud = value | (uint32_t)value << 16;
598 return true;
599 }
600 case ELK_REGISTER_TYPE_F:
601 reg->f = -reg->f;
602 return true;
603 case ELK_REGISTER_TYPE_VF:
604 reg->ud ^= 0x80808080;
605 return true;
606 case ELK_REGISTER_TYPE_DF:
607 reg->df = -reg->df;
608 return true;
609 case ELK_REGISTER_TYPE_UQ:
610 case ELK_REGISTER_TYPE_Q:
611 reg->d64 = -reg->d64;
612 return true;
613 case ELK_REGISTER_TYPE_UB:
614 case ELK_REGISTER_TYPE_B:
615 unreachable("no UB/B immediates");
616 case ELK_REGISTER_TYPE_UV:
617 case ELK_REGISTER_TYPE_V:
618 assert(!"unimplemented: negate UV/V immediate");
619 case ELK_REGISTER_TYPE_HF:
620 reg->ud ^= 0x80008000;
621 return true;
622 case ELK_REGISTER_TYPE_NF:
623 unreachable("no NF immediates");
624 }
625
626 return false;
627 }
628
629 bool
elk_abs_immediate(enum elk_reg_type type,struct elk_reg * reg)630 elk_abs_immediate(enum elk_reg_type type, struct elk_reg *reg)
631 {
632 switch (type) {
633 case ELK_REGISTER_TYPE_D:
634 reg->d = abs(reg->d);
635 return true;
636 case ELK_REGISTER_TYPE_W: {
637 uint16_t value = abs((int16_t)reg->ud);
638 reg->ud = value | (uint32_t)value << 16;
639 return true;
640 }
641 case ELK_REGISTER_TYPE_F:
642 reg->f = fabsf(reg->f);
643 return true;
644 case ELK_REGISTER_TYPE_DF:
645 reg->df = fabs(reg->df);
646 return true;
647 case ELK_REGISTER_TYPE_VF:
648 reg->ud &= ~0x80808080;
649 return true;
650 case ELK_REGISTER_TYPE_Q:
651 reg->d64 = imaxabs(reg->d64);
652 return true;
653 case ELK_REGISTER_TYPE_UB:
654 case ELK_REGISTER_TYPE_B:
655 unreachable("no UB/B immediates");
656 case ELK_REGISTER_TYPE_UQ:
657 case ELK_REGISTER_TYPE_UD:
658 case ELK_REGISTER_TYPE_UW:
659 case ELK_REGISTER_TYPE_UV:
660 /* Presumably the absolute value modifier on an unsigned source is a
661 * nop, but it would be nice to confirm.
662 */
663 assert(!"unimplemented: abs unsigned immediate");
664 case ELK_REGISTER_TYPE_V:
665 assert(!"unimplemented: abs V immediate");
666 case ELK_REGISTER_TYPE_HF:
667 reg->ud &= ~0x80008000;
668 return true;
669 case ELK_REGISTER_TYPE_NF:
670 unreachable("no NF immediates");
671 }
672
673 return false;
674 }
675
elk_backend_shader(const struct elk_compiler * compiler,const struct elk_compile_params * params,const nir_shader * shader,struct elk_stage_prog_data * stage_prog_data,bool debug_enabled)676 elk_backend_shader::elk_backend_shader(const struct elk_compiler *compiler,
677 const struct elk_compile_params *params,
678 const nir_shader *shader,
679 struct elk_stage_prog_data *stage_prog_data,
680 bool debug_enabled)
681 : compiler(compiler),
682 log_data(params->log_data),
683 devinfo(compiler->devinfo),
684 nir(shader),
685 stage_prog_data(stage_prog_data),
686 mem_ctx(params->mem_ctx),
687 cfg(NULL), idom_analysis(this),
688 stage(shader->info.stage),
689 debug_enabled(debug_enabled)
690 {
691 }
692
~elk_backend_shader()693 elk_backend_shader::~elk_backend_shader()
694 {
695 }
696
697 bool
equals(const elk_backend_reg & r) const698 elk_backend_reg::equals(const elk_backend_reg &r) const
699 {
700 return elk_regs_equal(this, &r) && offset == r.offset;
701 }
702
703 bool
negative_equals(const elk_backend_reg & r) const704 elk_backend_reg::negative_equals(const elk_backend_reg &r) const
705 {
706 return elk_regs_negative_equal(this, &r) && offset == r.offset;
707 }
708
709 bool
is_zero() const710 elk_backend_reg::is_zero() const
711 {
712 if (file != IMM)
713 return false;
714
715 assert(type_sz(type) > 1);
716
717 switch (type) {
718 case ELK_REGISTER_TYPE_HF:
719 assert((d & 0xffff) == ((d >> 16) & 0xffff));
720 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
721 case ELK_REGISTER_TYPE_F:
722 return f == 0;
723 case ELK_REGISTER_TYPE_DF:
724 return df == 0;
725 case ELK_REGISTER_TYPE_W:
726 case ELK_REGISTER_TYPE_UW:
727 assert((d & 0xffff) == ((d >> 16) & 0xffff));
728 return (d & 0xffff) == 0;
729 case ELK_REGISTER_TYPE_D:
730 case ELK_REGISTER_TYPE_UD:
731 return d == 0;
732 case ELK_REGISTER_TYPE_UQ:
733 case ELK_REGISTER_TYPE_Q:
734 return u64 == 0;
735 default:
736 return false;
737 }
738 }
739
740 bool
is_one() const741 elk_backend_reg::is_one() const
742 {
743 if (file != IMM)
744 return false;
745
746 assert(type_sz(type) > 1);
747
748 switch (type) {
749 case ELK_REGISTER_TYPE_HF:
750 assert((d & 0xffff) == ((d >> 16) & 0xffff));
751 return (d & 0xffff) == 0x3c00;
752 case ELK_REGISTER_TYPE_F:
753 return f == 1.0f;
754 case ELK_REGISTER_TYPE_DF:
755 return df == 1.0;
756 case ELK_REGISTER_TYPE_W:
757 case ELK_REGISTER_TYPE_UW:
758 assert((d & 0xffff) == ((d >> 16) & 0xffff));
759 return (d & 0xffff) == 1;
760 case ELK_REGISTER_TYPE_D:
761 case ELK_REGISTER_TYPE_UD:
762 return d == 1;
763 case ELK_REGISTER_TYPE_UQ:
764 case ELK_REGISTER_TYPE_Q:
765 return u64 == 1;
766 default:
767 return false;
768 }
769 }
770
771 bool
is_negative_one() const772 elk_backend_reg::is_negative_one() const
773 {
774 if (file != IMM)
775 return false;
776
777 assert(type_sz(type) > 1);
778
779 switch (type) {
780 case ELK_REGISTER_TYPE_HF:
781 assert((d & 0xffff) == ((d >> 16) & 0xffff));
782 return (d & 0xffff) == 0xbc00;
783 case ELK_REGISTER_TYPE_F:
784 return f == -1.0;
785 case ELK_REGISTER_TYPE_DF:
786 return df == -1.0;
787 case ELK_REGISTER_TYPE_W:
788 assert((d & 0xffff) == ((d >> 16) & 0xffff));
789 return (d & 0xffff) == 0xffff;
790 case ELK_REGISTER_TYPE_D:
791 return d == -1;
792 case ELK_REGISTER_TYPE_Q:
793 return d64 == -1;
794 default:
795 return false;
796 }
797 }
798
799 bool
is_null() const800 elk_backend_reg::is_null() const
801 {
802 return file == ARF && nr == ELK_ARF_NULL;
803 }
804
805
806 bool
is_accumulator() const807 elk_backend_reg::is_accumulator() const
808 {
809 return file == ARF && nr == ELK_ARF_ACCUMULATOR;
810 }
811
812 bool
is_commutative() const813 elk_backend_instruction::is_commutative() const
814 {
815 switch (opcode) {
816 case ELK_OPCODE_AND:
817 case ELK_OPCODE_OR:
818 case ELK_OPCODE_XOR:
819 case ELK_OPCODE_ADD:
820 case ELK_OPCODE_MUL:
821 case ELK_SHADER_OPCODE_MULH:
822 return true;
823 case ELK_OPCODE_SEL:
824 /* MIN and MAX are commutative. */
825 if (conditional_mod == ELK_CONDITIONAL_GE ||
826 conditional_mod == ELK_CONDITIONAL_L) {
827 return true;
828 }
829 FALLTHROUGH;
830 default:
831 return false;
832 }
833 }
834
835 bool
elk_is_3src(const struct elk_compiler * compiler) const836 elk_backend_instruction::elk_is_3src(const struct elk_compiler *compiler) const
837 {
838 return ::elk_is_3src(&compiler->isa, opcode);
839 }
840
841 bool
is_math() const842 elk_backend_instruction::is_math() const
843 {
844 return (opcode == ELK_SHADER_OPCODE_RCP ||
845 opcode == ELK_SHADER_OPCODE_RSQ ||
846 opcode == ELK_SHADER_OPCODE_SQRT ||
847 opcode == ELK_SHADER_OPCODE_EXP2 ||
848 opcode == ELK_SHADER_OPCODE_LOG2 ||
849 opcode == ELK_SHADER_OPCODE_SIN ||
850 opcode == ELK_SHADER_OPCODE_COS ||
851 opcode == ELK_SHADER_OPCODE_INT_QUOTIENT ||
852 opcode == ELK_SHADER_OPCODE_INT_REMAINDER ||
853 opcode == ELK_SHADER_OPCODE_POW);
854 }
855
856 bool
is_control_flow_begin() const857 elk_backend_instruction::is_control_flow_begin() const
858 {
859 switch (opcode) {
860 case ELK_OPCODE_DO:
861 case ELK_OPCODE_IF:
862 case ELK_OPCODE_ELSE:
863 return true;
864 default:
865 return false;
866 }
867 }
868
869 bool
is_control_flow_end() const870 elk_backend_instruction::is_control_flow_end() const
871 {
872 switch (opcode) {
873 case ELK_OPCODE_ELSE:
874 case ELK_OPCODE_WHILE:
875 case ELK_OPCODE_ENDIF:
876 return true;
877 default:
878 return false;
879 }
880 }
881
882 bool
is_control_flow() const883 elk_backend_instruction::is_control_flow() const
884 {
885 switch (opcode) {
886 case ELK_OPCODE_DO:
887 case ELK_OPCODE_WHILE:
888 case ELK_OPCODE_IF:
889 case ELK_OPCODE_ELSE:
890 case ELK_OPCODE_ENDIF:
891 case ELK_OPCODE_BREAK:
892 case ELK_OPCODE_CONTINUE:
893 return true;
894 default:
895 return false;
896 }
897 }
898
899 bool
uses_indirect_addressing() const900 elk_backend_instruction::uses_indirect_addressing() const
901 {
902 switch (opcode) {
903 case ELK_SHADER_OPCODE_BROADCAST:
904 case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
905 case ELK_SHADER_OPCODE_MOV_INDIRECT:
906 return true;
907 default:
908 return false;
909 }
910 }
911
912 bool
can_do_source_mods() const913 elk_backend_instruction::can_do_source_mods() const
914 {
915 switch (opcode) {
916 case ELK_OPCODE_ADDC:
917 case ELK_OPCODE_BFE:
918 case ELK_OPCODE_BFI1:
919 case ELK_OPCODE_BFI2:
920 case ELK_OPCODE_BFREV:
921 case ELK_OPCODE_CBIT:
922 case ELK_OPCODE_FBH:
923 case ELK_OPCODE_FBL:
924 case ELK_OPCODE_SUBB:
925 case ELK_SHADER_OPCODE_BROADCAST:
926 case ELK_SHADER_OPCODE_CLUSTER_BROADCAST:
927 case ELK_SHADER_OPCODE_MOV_INDIRECT:
928 case ELK_SHADER_OPCODE_SHUFFLE:
929 case ELK_SHADER_OPCODE_INT_QUOTIENT:
930 case ELK_SHADER_OPCODE_INT_REMAINDER:
931 return false;
932 default:
933 return true;
934 }
935 }
936
937 bool
can_do_saturate() const938 elk_backend_instruction::can_do_saturate() const
939 {
940 switch (opcode) {
941 case ELK_OPCODE_ADD:
942 case ELK_OPCODE_ASR:
943 case ELK_OPCODE_AVG:
944 case ELK_OPCODE_CSEL:
945 case ELK_OPCODE_DP2:
946 case ELK_OPCODE_DP3:
947 case ELK_OPCODE_DP4:
948 case ELK_OPCODE_DPH:
949 case ELK_OPCODE_F16TO32:
950 case ELK_OPCODE_F32TO16:
951 case ELK_OPCODE_LINE:
952 case ELK_OPCODE_LRP:
953 case ELK_OPCODE_MAC:
954 case ELK_OPCODE_MAD:
955 case ELK_OPCODE_MATH:
956 case ELK_OPCODE_MOV:
957 case ELK_OPCODE_MUL:
958 case ELK_SHADER_OPCODE_MULH:
959 case ELK_OPCODE_PLN:
960 case ELK_OPCODE_RNDD:
961 case ELK_OPCODE_RNDE:
962 case ELK_OPCODE_RNDU:
963 case ELK_OPCODE_RNDZ:
964 case ELK_OPCODE_SEL:
965 case ELK_OPCODE_SHL:
966 case ELK_OPCODE_SHR:
967 case ELK_FS_OPCODE_LINTERP:
968 case ELK_SHADER_OPCODE_COS:
969 case ELK_SHADER_OPCODE_EXP2:
970 case ELK_SHADER_OPCODE_LOG2:
971 case ELK_SHADER_OPCODE_POW:
972 case ELK_SHADER_OPCODE_RCP:
973 case ELK_SHADER_OPCODE_RSQ:
974 case ELK_SHADER_OPCODE_SIN:
975 case ELK_SHADER_OPCODE_SQRT:
976 return true;
977 default:
978 return false;
979 }
980 }
981
982 bool
can_do_cmod() const983 elk_backend_instruction::can_do_cmod() const
984 {
985 switch (opcode) {
986 case ELK_OPCODE_ADD:
987 case ELK_OPCODE_ADDC:
988 case ELK_OPCODE_AND:
989 case ELK_OPCODE_ASR:
990 case ELK_OPCODE_AVG:
991 case ELK_OPCODE_CMP:
992 case ELK_OPCODE_CMPN:
993 case ELK_OPCODE_DP2:
994 case ELK_OPCODE_DP3:
995 case ELK_OPCODE_DP4:
996 case ELK_OPCODE_DPH:
997 case ELK_OPCODE_F16TO32:
998 case ELK_OPCODE_F32TO16:
999 case ELK_OPCODE_FRC:
1000 case ELK_OPCODE_LINE:
1001 case ELK_OPCODE_LRP:
1002 case ELK_OPCODE_LZD:
1003 case ELK_OPCODE_MAC:
1004 case ELK_OPCODE_MACH:
1005 case ELK_OPCODE_MAD:
1006 case ELK_OPCODE_MOV:
1007 case ELK_OPCODE_MUL:
1008 case ELK_OPCODE_NOT:
1009 case ELK_OPCODE_OR:
1010 case ELK_OPCODE_PLN:
1011 case ELK_OPCODE_RNDD:
1012 case ELK_OPCODE_RNDE:
1013 case ELK_OPCODE_RNDU:
1014 case ELK_OPCODE_RNDZ:
1015 case ELK_OPCODE_SAD2:
1016 case ELK_OPCODE_SADA2:
1017 case ELK_OPCODE_SHL:
1018 case ELK_OPCODE_SHR:
1019 case ELK_OPCODE_SUBB:
1020 case ELK_OPCODE_XOR:
1021 case ELK_FS_OPCODE_LINTERP:
1022 return true;
1023 default:
1024 return false;
1025 }
1026 }
1027
1028 bool
reads_accumulator_implicitly() const1029 elk_backend_instruction::reads_accumulator_implicitly() const
1030 {
1031 switch (opcode) {
1032 case ELK_OPCODE_MAC:
1033 case ELK_OPCODE_MACH:
1034 case ELK_OPCODE_SADA2:
1035 return true;
1036 default:
1037 return false;
1038 }
1039 }
1040
1041 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const1042 elk_backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
1043 {
1044 return writes_accumulator ||
1045 (devinfo->ver < 6 &&
1046 ((opcode >= ELK_OPCODE_ADD && opcode < ELK_OPCODE_NOP) ||
1047 (opcode >= ELK_FS_OPCODE_DDX_COARSE && opcode <= ELK_FS_OPCODE_LINTERP))) ||
1048 (opcode == ELK_FS_OPCODE_LINTERP &&
1049 (!devinfo->has_pln || devinfo->ver <= 6)) ||
1050 (eot && intel_needs_workaround(devinfo, 14010017096));
1051 }
1052
1053 bool
has_side_effects() const1054 elk_backend_instruction::has_side_effects() const
1055 {
1056 switch (opcode) {
1057 case ELK_SHADER_OPCODE_SEND:
1058 return send_has_side_effects;
1059
1060 case ELK_VEC4_OPCODE_UNTYPED_ATOMIC:
1061 case ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
1062 case ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE:
1063 case ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
1064 case ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
1065 case ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
1066 case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
1067 case ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
1068 case ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
1069 case ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
1070 case ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
1071 case ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
1072 case ELK_SHADER_OPCODE_MEMORY_FENCE:
1073 case ELK_SHADER_OPCODE_INTERLOCK:
1074 case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
1075 case ELK_FS_OPCODE_FB_WRITE:
1076 case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
1077 case ELK_FS_OPCODE_REP_FB_WRITE:
1078 case ELK_SHADER_OPCODE_BARRIER:
1079 case ELK_VEC4_TCS_OPCODE_URB_WRITE:
1080 case ELK_TCS_OPCODE_RELEASE_INPUT:
1081 case ELK_SHADER_OPCODE_RND_MODE:
1082 case ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE:
1083 case ELK_FS_OPCODE_SCHEDULING_FENCE:
1084 case ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
1085 case ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
1086 case ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
1087 return true;
1088 default:
1089 return eot;
1090 }
1091 }
1092
1093 bool
is_volatile() const1094 elk_backend_instruction::is_volatile() const
1095 {
1096 switch (opcode) {
1097 case ELK_SHADER_OPCODE_SEND:
1098 return send_is_volatile;
1099
1100 case ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ:
1101 case ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1102 case ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1103 case ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1104 case ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
1105 case ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1106 case ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1107 case ELK_VEC4_OPCODE_URB_READ:
1108 return true;
1109 default:
1110 return false;
1111 }
1112 }
1113
1114 #ifndef NDEBUG
1115 static bool
inst_is_in_block(const elk_bblock_t * block,const elk_backend_instruction * inst)1116 inst_is_in_block(const elk_bblock_t *block, const elk_backend_instruction *inst)
1117 {
1118 const exec_node *n = inst;
1119
1120 /* Find the tail sentinel. If the tail sentinel is the sentinel from the
1121 * list header in the elk_bblock_t, then this instruction is in that basic
1122 * block.
1123 */
1124 while (!n->is_tail_sentinel())
1125 n = n->get_next();
1126
1127 return n == &block->instructions.tail_sentinel;
1128 }
1129 #endif
1130
1131 static void
adjust_later_block_ips(elk_bblock_t * start_block,int ip_adjustment)1132 adjust_later_block_ips(elk_bblock_t *start_block, int ip_adjustment)
1133 {
1134 for (elk_bblock_t *block_iter = start_block->next();
1135 block_iter;
1136 block_iter = block_iter->next()) {
1137 block_iter->start_ip += ip_adjustment;
1138 block_iter->end_ip += ip_adjustment;
1139 }
1140 }
1141
1142 void
insert_after(elk_bblock_t * block,elk_backend_instruction * inst)1143 elk_backend_instruction::insert_after(elk_bblock_t *block, elk_backend_instruction *inst)
1144 {
1145 assert(this != inst);
1146 assert(block->end_ip_delta == 0);
1147
1148 if (!this->is_head_sentinel())
1149 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1150
1151 block->end_ip++;
1152
1153 adjust_later_block_ips(block, 1);
1154
1155 exec_node::insert_after(inst);
1156 }
1157
1158 void
insert_before(elk_bblock_t * block,elk_backend_instruction * inst)1159 elk_backend_instruction::insert_before(elk_bblock_t *block, elk_backend_instruction *inst)
1160 {
1161 assert(this != inst);
1162 assert(block->end_ip_delta == 0);
1163
1164 if (!this->is_tail_sentinel())
1165 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1166
1167 block->end_ip++;
1168
1169 adjust_later_block_ips(block, 1);
1170
1171 exec_node::insert_before(inst);
1172 }
1173
1174 void
remove(elk_bblock_t * block,bool defer_later_block_ip_updates)1175 elk_backend_instruction::remove(elk_bblock_t *block, bool defer_later_block_ip_updates)
1176 {
1177 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1178
1179 if (defer_later_block_ip_updates) {
1180 block->end_ip_delta--;
1181 } else {
1182 assert(block->end_ip_delta == 0);
1183 adjust_later_block_ips(block, -1);
1184 }
1185
1186 if (block->start_ip == block->end_ip) {
1187 if (block->end_ip_delta != 0) {
1188 adjust_later_block_ips(block, block->end_ip_delta);
1189 block->end_ip_delta = 0;
1190 }
1191
1192 block->cfg->remove_block(block);
1193 } else {
1194 block->end_ip--;
1195 }
1196
1197 exec_node::remove();
1198 }
1199
1200 void
dump_instructions(const char * name) const1201 elk_backend_shader::dump_instructions(const char *name) const
1202 {
1203 FILE *file = stderr;
1204 if (name && __normal_user()) {
1205 file = fopen(name, "w");
1206 if (!file)
1207 file = stderr;
1208 }
1209
1210 dump_instructions_to_file(file);
1211
1212 if (file != stderr) {
1213 fclose(file);
1214 }
1215 }
1216
1217 void
dump_instructions_to_file(FILE * file) const1218 elk_backend_shader::dump_instructions_to_file(FILE *file) const
1219 {
1220 if (cfg) {
1221 int ip = 0;
1222 foreach_block_and_inst(block, elk_backend_instruction, inst, cfg) {
1223 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1224 fprintf(file, "%4d: ", ip++);
1225 dump_instruction(inst, file);
1226 }
1227 } else {
1228 int ip = 0;
1229 foreach_in_list(elk_backend_instruction, inst, &instructions) {
1230 if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
1231 fprintf(file, "%4d: ", ip++);
1232 dump_instruction(inst, file);
1233 }
1234 }
1235 }
1236
1237 void
calculate_cfg()1238 elk_backend_shader::calculate_cfg()
1239 {
1240 if (this->cfg)
1241 return;
1242 cfg = new(mem_ctx) elk_cfg_t(this, &this->instructions);
1243 }
1244
1245 void
invalidate_analysis(elk::analysis_dependency_class c)1246 elk_backend_shader::invalidate_analysis(elk::analysis_dependency_class c)
1247 {
1248 idom_analysis.invalidate(c);
1249 }
1250
1251 extern "C" const unsigned *
elk_compile_tes(const struct elk_compiler * compiler,elk_compile_tes_params * params)1252 elk_compile_tes(const struct elk_compiler *compiler,
1253 elk_compile_tes_params *params)
1254 {
1255 const struct intel_device_info *devinfo = compiler->devinfo;
1256 nir_shader *nir = params->base.nir;
1257 const struct elk_tes_prog_key *key = params->key;
1258 const struct intel_vue_map *input_vue_map = params->input_vue_map;
1259 struct elk_tes_prog_data *prog_data = params->prog_data;
1260
1261 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
1262 const bool debug_enabled = elk_should_print_shader(nir, DEBUG_TES);
1263 const unsigned *assembly;
1264
1265 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL;
1266
1267 nir->info.inputs_read = key->inputs_read;
1268 nir->info.patch_inputs_read = key->patch_inputs_read;
1269
1270 elk_nir_apply_key(nir, compiler, &key->base, 8);
1271 elk_nir_lower_tes_inputs(nir, input_vue_map);
1272 elk_nir_lower_vue_outputs(nir);
1273 elk_postprocess_nir(nir, compiler, debug_enabled,
1274 key->base.robust_flags);
1275
1276 elk_compute_vue_map(devinfo, &prog_data->base.vue_map,
1277 nir->info.outputs_written,
1278 nir->info.separate_shader, 1);
1279
1280 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1281
1282 assert(output_size_bytes >= 1);
1283 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1284 params->base.error_str = ralloc_strdup(params->base.mem_ctx,
1285 "DS outputs exceed maximum size");
1286 return NULL;
1287 }
1288
1289 prog_data->base.clip_distance_mask =
1290 ((1 << nir->info.clip_distance_array_size) - 1);
1291 prog_data->base.cull_distance_mask =
1292 ((1 << nir->info.cull_distance_array_size) - 1) <<
1293 nir->info.clip_distance_array_size;
1294
1295 prog_data->include_primitive_id =
1296 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
1297
1298 /* URB entry sizes are stored as a multiple of 64 bytes. */
1299 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1300
1301 prog_data->base.urb_read_length = 0;
1302
1303 STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1304 STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
1305 TESS_SPACING_FRACTIONAL_ODD - 1);
1306 STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1307 TESS_SPACING_FRACTIONAL_EVEN - 1);
1308
1309 prog_data->partitioning =
1310 (enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
1311
1312 switch (nir->info.tess._primitive_mode) {
1313 case TESS_PRIMITIVE_QUADS:
1314 prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
1315 break;
1316 case TESS_PRIMITIVE_TRIANGLES:
1317 prog_data->domain = INTEL_TESS_DOMAIN_TRI;
1318 break;
1319 case TESS_PRIMITIVE_ISOLINES:
1320 prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
1321 break;
1322 default:
1323 unreachable("invalid domain shader primitive mode");
1324 }
1325
1326 if (nir->info.tess.point_mode) {
1327 prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
1328 } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
1329 prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
1330 } else {
1331 /* Hardware winding order is backwards from OpenGL */
1332 prog_data->output_topology =
1333 nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
1334 : INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1335 }
1336
1337 if (unlikely(debug_enabled)) {
1338 fprintf(stderr, "TES Input ");
1339 elk_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
1340 fprintf(stderr, "TES Output ");
1341 elk_print_vue_map(stderr, &prog_data->base.vue_map,
1342 MESA_SHADER_TESS_EVAL);
1343 }
1344
1345 if (is_scalar) {
1346 const unsigned dispatch_width = 8;
1347 elk_fs_visitor v(compiler, ¶ms->base, &key->base,
1348 &prog_data->base.base, nir, dispatch_width,
1349 params->base.stats != NULL, debug_enabled);
1350 if (!v.run_tes()) {
1351 params->base.error_str =
1352 ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1353 return NULL;
1354 }
1355
1356 assert(v.payload().num_regs % reg_unit(devinfo) == 0);
1357 prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
1358
1359 prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
1360
1361 elk_fs_generator g(compiler, ¶ms->base,
1362 &prog_data->base.base, false, MESA_SHADER_TESS_EVAL);
1363 if (unlikely(debug_enabled)) {
1364 g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
1365 "%s tessellation evaluation shader %s",
1366 nir->info.label ? nir->info.label
1367 : "unnamed",
1368 nir->info.name));
1369 }
1370
1371 g.generate_code(v.cfg, dispatch_width, v.shader_stats,
1372 v.performance_analysis.require(), params->base.stats);
1373
1374 g.add_const_data(nir->constant_data, nir->constant_data_size);
1375
1376 assembly = g.get_assembly();
1377 } else {
1378 elk::vec4_tes_visitor v(compiler, ¶ms->base, key, prog_data,
1379 nir, debug_enabled);
1380 if (!v.run()) {
1381 params->base.error_str =
1382 ralloc_strdup(params->base.mem_ctx, v.fail_msg);
1383 return NULL;
1384 }
1385
1386 if (unlikely(debug_enabled))
1387 v.dump_instructions();
1388
1389 assembly = elk_vec4_generate_assembly(compiler, ¶ms->base, nir,
1390 &prog_data->base, v.cfg,
1391 v.performance_analysis.require(),
1392 debug_enabled);
1393 }
1394
1395 return assembly;
1396 }
1397