1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <[email protected]>
25 *
26 */
27
28 #ifndef ELK_FS_H
29 #define ELK_FS_H
30
31 #include "elk_shader.h"
32 #include "elk_ir_fs.h"
33 #include "elk_fs_live_variables.h"
34 #include "elk_ir_performance.h"
35 #include "compiler/nir/nir.h"
36
37 struct elk_bblock_t;
38 namespace {
39 struct acp_entry;
40 }
41
42 class elk_fs_visitor;
43
44 namespace elk {
45 /**
46 * Register pressure analysis of a shader. Estimates how many registers
47 * are live at any point of the program in GRF units.
48 */
49 struct register_pressure {
50 register_pressure(const elk_fs_visitor *v);
51 register_pressure(const register_pressure &) = delete;
52 ~register_pressure();
53 register_pressure & operator=(const register_pressure &) = delete;
54
55 analysis_dependency_class
dependency_classregister_pressure56 dependency_class() const
57 {
58 return (DEPENDENCY_INSTRUCTION_IDENTITY |
59 DEPENDENCY_INSTRUCTION_DATA_FLOW |
60 DEPENDENCY_VARIABLES);
61 }
62
63 bool
validateregister_pressure64 validate(const elk_fs_visitor *) const
65 {
66 /* FINISHME */
67 return true;
68 }
69
70 unsigned *regs_live_at_ip;
71 };
72 }
73
74 struct elk_gs_compile;
75
76 namespace elk {
77 class fs_builder;
78 }
79
80 struct shader_stats {
81 const char *scheduler_mode;
82 unsigned promoted_constants;
83 unsigned spill_count;
84 unsigned fill_count;
85 unsigned max_register_pressure;
86 };
87
88 /** Register numbers for thread payload fields. */
89 struct elk_elk_thread_payload {
90 /** The number of thread payload registers the hardware will supply. */
91 uint8_t num_regs;
92
93 virtual ~elk_elk_thread_payload() = default;
94
95 protected:
elk_elk_thread_payloadelk_elk_thread_payload96 elk_elk_thread_payload() : num_regs() {}
97 };
98
99 struct elk_vs_thread_payload : public elk_elk_thread_payload {
100 elk_vs_thread_payload(const elk_fs_visitor &v);
101
102 elk_fs_reg urb_handles;
103 };
104
105 struct elk_tcs_thread_payload : public elk_elk_thread_payload {
106 elk_tcs_thread_payload(const elk_fs_visitor &v);
107
108 elk_fs_reg patch_urb_output;
109 elk_fs_reg primitive_id;
110 elk_fs_reg icp_handle_start;
111 };
112
113 struct elk_tes_thread_payload : public elk_elk_thread_payload {
114 elk_tes_thread_payload(const elk_fs_visitor &v);
115
116 elk_fs_reg patch_urb_input;
117 elk_fs_reg primitive_id;
118 elk_fs_reg coords[3];
119 elk_fs_reg urb_output;
120 };
121
122 struct elk_gs_thread_payload : public elk_elk_thread_payload {
123 elk_gs_thread_payload(elk_fs_visitor &v);
124
125 elk_fs_reg urb_handles;
126 elk_fs_reg primitive_id;
127 elk_fs_reg instance_id;
128 elk_fs_reg icp_handle_start;
129 };
130
131 struct elk_fs_thread_payload : public elk_elk_thread_payload {
132 elk_fs_thread_payload(const elk_fs_visitor &v,
133 bool &source_depth_to_render_target,
134 bool &runtime_check_aads_emit);
135
136 uint8_t subspan_coord_reg[2];
137 uint8_t source_depth_reg[2];
138 uint8_t source_w_reg[2];
139 uint8_t aa_dest_stencil_reg[2];
140 uint8_t dest_depth_reg[2];
141 uint8_t sample_pos_reg[2];
142 uint8_t sample_mask_in_reg[2];
143 uint8_t depth_w_coef_reg;
144 uint8_t barycentric_coord_reg[ELK_BARYCENTRIC_MODE_COUNT][2];
145 };
146
147 struct elk_cs_thread_payload : public elk_elk_thread_payload {
148 elk_cs_thread_payload(const elk_fs_visitor &v);
149
150 void load_subgroup_id(const elk::fs_builder &bld, elk_fs_reg &dest) const;
151
152 elk_fs_reg local_invocation_id[3];
153
154 protected:
155 elk_fs_reg subgroup_id_;
156 };
157
158 class elk_fs_instruction_scheduler;
159
160 /**
161 * The fragment shader front-end.
162 *
163 * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
164 */
165 class elk_fs_visitor : public elk_backend_shader
166 {
167 public:
168 elk_fs_visitor(const struct elk_compiler *compiler,
169 const struct elk_compile_params *params,
170 const elk_base_prog_key *key,
171 struct elk_stage_prog_data *prog_data,
172 const nir_shader *shader,
173 unsigned dispatch_width,
174 bool needs_register_pressure,
175 bool debug_enabled);
176 elk_fs_visitor(const struct elk_compiler *compiler,
177 const struct elk_compile_params *params,
178 const elk_wm_prog_key *key,
179 struct elk_wm_prog_data *prog_data,
180 const nir_shader *shader,
181 unsigned dispatch_width,
182 bool needs_register_pressure,
183 bool debug_enabled);
184 elk_fs_visitor(const struct elk_compiler *compiler,
185 const struct elk_compile_params *params,
186 struct elk_gs_compile *gs_compile,
187 struct elk_gs_prog_data *prog_data,
188 const nir_shader *shader,
189 bool needs_register_pressure,
190 bool debug_enabled);
191 elk_fs_visitor(const elk_fs_visitor &) = delete;
192 void init();
193 ~elk_fs_visitor();
194
195 elk_fs_visitor & operator=(const elk_fs_visitor &) = delete;
196
197 elk_fs_reg vgrf(const glsl_type *const type);
198 void import_uniforms(elk_fs_visitor *v);
199
200 void VARYING_PULL_CONSTANT_LOAD(const elk::fs_builder &bld,
201 const elk_fs_reg &dst,
202 const elk_fs_reg &surface,
203 const elk_fs_reg &surface_handle,
204 const elk_fs_reg &varying_offset,
205 uint32_t const_offset,
206 uint8_t alignment,
207 unsigned components);
208 void DEP_RESOLVE_MOV(const elk::fs_builder &bld, int grf);
209
210 bool run_fs(bool allow_spilling, bool do_rep_send);
211 bool run_vs();
212 bool run_tcs();
213 bool run_tes();
214 bool run_gs();
215 bool run_cs(bool allow_spilling);
216 void optimize();
217 void allocate_registers(bool allow_spilling);
218 uint32_t compute_max_register_pressure();
219 void fixup_3src_null_dest();
220 void assign_curb_setup();
221 void assign_urb_setup();
222 void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
223 void assign_vs_urb_setup();
224 void assign_tcs_urb_setup();
225 void assign_tes_urb_setup();
226 void assign_gs_urb_setup();
227 bool assign_regs(bool allow_spilling, bool spill_all);
228 void assign_regs_trivial();
229 void calculate_payload_ranges(unsigned payload_node_count,
230 int *payload_last_use_ip) const;
231 bool split_virtual_grfs();
232 bool compact_virtual_grfs();
233 void assign_constant_locations();
234 bool get_pull_locs(const elk_fs_reg &src, unsigned *out_surf_index,
235 unsigned *out_pull_index);
236 bool lower_constant_loads();
237 virtual void invalidate_analysis(elk::analysis_dependency_class c);
238
239 #ifndef NDEBUG
240 void validate();
241 #else
validate()242 void validate() {}
243 #endif
244
245 bool opt_algebraic();
246 bool opt_redundant_halt();
247 bool opt_cse();
248 bool opt_cse_local(const elk::fs_live_variables &live, elk_bblock_t *block, int &ip);
249
250 bool opt_copy_propagation();
251 bool opt_bank_conflicts();
252 bool register_coalesce();
253 bool compute_to_mrf();
254 bool eliminate_find_live_channel();
255 bool dead_code_eliminate();
256 bool remove_duplicate_mrf_writes();
257 bool remove_extra_rounding_modes();
258
259 elk_fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
260 void schedule_instructions_pre_ra(elk_fs_instruction_scheduler *sched,
261 instruction_scheduler_mode mode);
262 void schedule_instructions_post_ra();
263
264 void insert_gfx4_send_dependency_workarounds();
265 void insert_gfx4_pre_send_dependency_workarounds(elk_bblock_t *block,
266 elk_fs_inst *inst);
267 void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
268 elk_fs_inst *inst);
269 void vfail(const char *msg, va_list args);
270 void fail(const char *msg, ...);
271 void limit_dispatch_width(unsigned n, const char *msg);
272 bool lower_uniform_pull_constant_loads();
273 bool lower_load_payload();
274 bool lower_pack();
275 bool lower_regioning();
276 bool lower_logical_sends();
277 bool lower_integer_multiplication();
278 bool lower_minmax();
279 bool lower_simd_width();
280 bool lower_barycentrics();
281 bool lower_find_live_channel();
282 bool lower_scoreboard();
283 bool lower_sub_sat();
284 bool opt_combine_constants();
285
286 void emit_repclear_shader();
287 void emit_interpolation_setup_gfx4();
288 void emit_interpolation_setup_gfx6();
289 bool opt_peephole_sel();
290 bool opt_saturate_propagation();
291 bool opt_cmod_propagation();
292 bool opt_zero_samples();
293
294 void set_tcs_invocation_id();
295
296 void emit_alpha_test();
297 elk_fs_inst *emit_single_fb_write(const elk::fs_builder &bld,
298 elk_fs_reg color1, elk_fs_reg color2,
299 elk_fs_reg src0_alpha, unsigned components);
300 void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
301 void emit_fb_writes();
302 void emit_urb_writes(const elk_fs_reg &gs_vertex_count = elk_fs_reg());
303 void emit_gs_control_data_bits(const elk_fs_reg &vertex_count);
304 void emit_gs_thread_end();
305 bool mark_last_urb_write_with_eot();
306 void emit_tcs_thread_end();
307 void emit_urb_fence();
308 void emit_cs_terminate();
309
310 elk_fs_reg interp_reg(const elk::fs_builder &bld, unsigned location,
311 unsigned channel, unsigned comp);
312 elk_fs_reg per_primitive_reg(const elk::fs_builder &bld,
313 int location, unsigned comp);
314
315 virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
316 virtual void dump_instructions_to_file(FILE *file) const;
317
318 const elk_base_prog_key *const key;
319 const struct elk_sampler_prog_key_data *key_tex;
320
321 struct elk_gs_compile *gs_compile;
322
323 struct elk_stage_prog_data *prog_data;
324
325 elk_analysis<elk::fs_live_variables, elk_backend_shader> live_analysis;
326 elk_analysis<elk::register_pressure, elk_fs_visitor> regpressure_analysis;
327 elk_analysis<elk::performance, elk_fs_visitor> performance_analysis;
328
329 /** Number of uniform variable components visited. */
330 unsigned uniforms;
331
332 /** Byte-offset for the next available spot in the scratch space buffer. */
333 unsigned last_scratch;
334
335 /**
336 * Array mapping UNIFORM register numbers to the push parameter index,
337 * or -1 if this uniform register isn't being uploaded as a push constant.
338 */
339 int *push_constant_loc;
340
341 elk_fs_reg frag_depth;
342 elk_fs_reg frag_stencil;
343 elk_fs_reg sample_mask;
344 elk_fs_reg outputs[VARYING_SLOT_MAX];
345 elk_fs_reg dual_src_output;
346 int first_non_payload_grf;
347 /** Either ELK_MAX_GRF or GFX7_MRF_HACK_START */
348 unsigned max_grf;
349
350 bool failed;
351 char *fail_msg;
352
353 elk_elk_thread_payload *payload_;
354
payload()355 elk_elk_thread_payload &payload() {
356 return *this->payload_;
357 }
358
vs_payload()359 elk_vs_thread_payload &vs_payload() {
360 assert(stage == MESA_SHADER_VERTEX);
361 return *static_cast<elk_vs_thread_payload *>(this->payload_);
362 }
363
tcs_payload()364 elk_tcs_thread_payload &tcs_payload() {
365 assert(stage == MESA_SHADER_TESS_CTRL);
366 return *static_cast<elk_tcs_thread_payload *>(this->payload_);
367 }
368
tes_payload()369 elk_tes_thread_payload &tes_payload() {
370 assert(stage == MESA_SHADER_TESS_EVAL);
371 return *static_cast<elk_tes_thread_payload *>(this->payload_);
372 }
373
gs_payload()374 elk_gs_thread_payload &gs_payload() {
375 assert(stage == MESA_SHADER_GEOMETRY);
376 return *static_cast<elk_gs_thread_payload *>(this->payload_);
377 }
378
fs_payload()379 elk_fs_thread_payload &fs_payload() {
380 assert(stage == MESA_SHADER_FRAGMENT);
381 return *static_cast<elk_fs_thread_payload *>(this->payload_);
382 };
383
cs_payload()384 elk_cs_thread_payload &cs_payload() {
385 assert(gl_shader_stage_uses_workgroup(stage));
386 return *static_cast<elk_cs_thread_payload *>(this->payload_);
387 }
388
389 bool source_depth_to_render_target;
390 bool runtime_check_aads_emit;
391
392 elk_fs_reg pixel_x;
393 elk_fs_reg pixel_y;
394 elk_fs_reg pixel_z;
395 elk_fs_reg wpos_w;
396 elk_fs_reg pixel_w;
397 elk_fs_reg delta_xy[ELK_BARYCENTRIC_MODE_COUNT];
398 elk_fs_reg final_gs_vertex_count;
399 elk_fs_reg control_data_bits;
400 elk_fs_reg invocation_id;
401
402 unsigned grf_used;
403 bool spilled_any_registers;
404 bool needs_register_pressure;
405
406 const unsigned dispatch_width; /**< 8, 16 or 32 */
407 unsigned max_dispatch_width;
408
409 /* The API selected subgroup size */
410 unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
411
412 struct shader_stats shader_stats;
413
414 void lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block);
415 void lower_mul_qword_inst(elk_fs_inst *inst, elk_bblock_t *block);
416 void lower_mulh_inst(elk_fs_inst *inst, elk_bblock_t *block);
417
418 unsigned workgroup_size() const;
419
420 void debug_optimizer(const nir_shader *nir,
421 const char *pass_name,
422 int iteration, int pass_num) const;
423 };
424
425 /**
426 * Return the flag register used in fragment shaders to keep track of live
427 * samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
428 * dispatch mode, while earlier generations are constrained to f0.1, which
429 * limits the dispatch width to SIMD16 for fragment shaders that use discard.
430 */
431 static inline unsigned
sample_mask_flag_subreg(const elk_fs_visitor & s)432 sample_mask_flag_subreg(const elk_fs_visitor &s)
433 {
434 assert(s.stage == MESA_SHADER_FRAGMENT);
435 return s.devinfo->ver >= 7 ? 2 : 1;
436 }
437
438 /**
439 * The fragment shader code generator.
440 *
441 * Translates FS IR to actual i965 assembly code.
442 */
443 class elk_fs_generator
444 {
445 public:
446 elk_fs_generator(const struct elk_compiler *compiler,
447 const struct elk_compile_params *params,
448 struct elk_stage_prog_data *prog_data,
449 bool runtime_check_aads_emit,
450 gl_shader_stage stage);
451 ~elk_fs_generator();
452
453 void enable_debug(const char *shader_name);
454 int generate_code(const elk_cfg_t *cfg, int dispatch_width,
455 struct shader_stats shader_stats,
456 const elk::performance &perf,
457 struct elk_compile_stats *stats);
458 void add_const_data(void *data, unsigned size);
459 const unsigned *get_assembly();
460
461 private:
462 void fire_fb_write(elk_fs_inst *inst,
463 struct elk_reg payload,
464 struct elk_reg implied_header,
465 GLuint nr);
466 void generate_send(elk_fs_inst *inst,
467 struct elk_reg dst,
468 struct elk_reg desc,
469 struct elk_reg payload);
470 void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
471 void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
472 void generate_barrier(elk_fs_inst *inst, struct elk_reg src);
473 bool generate_linterp(elk_fs_inst *inst, struct elk_reg dst,
474 struct elk_reg *src);
475 void generate_tex(elk_fs_inst *inst, struct elk_reg dst,
476 struct elk_reg surface_index,
477 struct elk_reg sampler_index);
478 void generate_ddx(const elk_fs_inst *inst,
479 struct elk_reg dst, struct elk_reg src);
480 void generate_ddy(const elk_fs_inst *inst,
481 struct elk_reg dst, struct elk_reg src);
482 void generate_scratch_write(elk_fs_inst *inst, struct elk_reg src);
483 void generate_scratch_read(elk_fs_inst *inst, struct elk_reg dst);
484 void generate_scratch_read_gfx7(elk_fs_inst *inst, struct elk_reg dst);
485 void generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst);
486 void generate_uniform_pull_constant_load(elk_fs_inst *inst, struct elk_reg dst,
487 struct elk_reg index,
488 struct elk_reg offset);
489 void generate_varying_pull_constant_load_gfx4(elk_fs_inst *inst,
490 struct elk_reg dst,
491 struct elk_reg index);
492
493 void generate_set_sample_id(elk_fs_inst *inst,
494 struct elk_reg dst,
495 struct elk_reg src0,
496 struct elk_reg src1);
497
498 void generate_halt(elk_fs_inst *inst);
499
500 void generate_mov_indirect(elk_fs_inst *inst,
501 struct elk_reg dst,
502 struct elk_reg reg,
503 struct elk_reg indirect_byte_offset);
504
505 void generate_shuffle(elk_fs_inst *inst,
506 struct elk_reg dst,
507 struct elk_reg src,
508 struct elk_reg idx);
509
510 void generate_quad_swizzle(const elk_fs_inst *inst,
511 struct elk_reg dst, struct elk_reg src,
512 unsigned swiz);
513
514 bool patch_halt_jumps();
515
516 const struct elk_compiler *compiler;
517 const struct elk_compile_params *params;
518
519 const struct intel_device_info *devinfo;
520
521 struct elk_codegen *p;
522 struct elk_stage_prog_data * const prog_data;
523
524 unsigned dispatch_width; /**< 8, 16 or 32 */
525
526 exec_list discard_halt_patches;
527 bool runtime_check_aads_emit;
528 bool debug_flag;
529 const char *shader_name;
530 gl_shader_stage stage;
531 void *mem_ctx;
532 };
533
534 namespace elk {
535 elk_fs_reg
536 fetch_payload_reg(const elk::fs_builder &bld, uint8_t regs[2],
537 elk_reg_type type = ELK_REGISTER_TYPE_F,
538 unsigned n = 1);
539
540 elk_fs_reg
541 fetch_barycentric_reg(const elk::fs_builder &bld, uint8_t regs[2]);
542
543 inline elk_fs_reg
dynamic_msaa_flags(const struct elk_wm_prog_data * wm_prog_data)544 dynamic_msaa_flags(const struct elk_wm_prog_data *wm_prog_data)
545 {
546 return elk_fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
547 ELK_REGISTER_TYPE_UD);
548 }
549
550 void
551 check_dynamic_msaa_flag(const fs_builder &bld,
552 const struct elk_wm_prog_data *wm_prog_data,
553 enum intel_msaa_flags flag);
554
555 bool
556 lower_src_modifiers(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, unsigned i);
557 }
558
559 void elk_shuffle_from_32bit_read(const elk::fs_builder &bld,
560 const elk_fs_reg &dst,
561 const elk_fs_reg &src,
562 uint32_t first_component,
563 uint32_t components);
564
565 elk_fs_reg elk_setup_imm_df(const elk::fs_builder &bld,
566 double v);
567
568 elk_fs_reg elk_setup_imm_b(const elk::fs_builder &bld,
569 int8_t v);
570
571 elk_fs_reg elk_setup_imm_ub(const elk::fs_builder &bld,
572 uint8_t v);
573
574 enum elk_barycentric_mode elk_barycentric_mode(nir_intrinsic_instr *intr);
575
576 uint32_t elk_fb_write_msg_control(const elk_fs_inst *inst,
577 const struct elk_wm_prog_data *prog_data);
578
579 void elk_compute_urb_setup_index(struct elk_wm_prog_data *wm_prog_data);
580
581 bool elk_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
582
583 elk_fs_reg elk_sample_mask_reg(const elk::fs_builder &bld);
584 void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst *inst);
585
586 int elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
587 const elk_stage_prog_data *prog_data);
588
589 void nir_to_elk(elk_fs_visitor *s);
590
591 #endif /* ELK_FS_H */
592