xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_fs.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <[email protected]>
25  *
26  */
27 
28 #ifndef ELK_FS_H
29 #define ELK_FS_H
30 
31 #include "elk_shader.h"
32 #include "elk_ir_fs.h"
33 #include "elk_fs_live_variables.h"
34 #include "elk_ir_performance.h"
35 #include "compiler/nir/nir.h"
36 
37 struct elk_bblock_t;
38 namespace {
39    struct acp_entry;
40 }
41 
42 class elk_fs_visitor;
43 
44 namespace elk {
45    /**
46     * Register pressure analysis of a shader.  Estimates how many registers
47     * are live at any point of the program in GRF units.
48     */
49    struct register_pressure {
50       register_pressure(const elk_fs_visitor *v);
51       register_pressure(const register_pressure &) = delete;
52       ~register_pressure();
53       register_pressure & operator=(const register_pressure &) = delete;
54 
55       analysis_dependency_class
dependency_classregister_pressure56       dependency_class() const
57       {
58          return (DEPENDENCY_INSTRUCTION_IDENTITY |
59                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
60                  DEPENDENCY_VARIABLES);
61       }
62 
63       bool
validateregister_pressure64       validate(const elk_fs_visitor *) const
65       {
66          /* FINISHME */
67          return true;
68       }
69 
70       unsigned *regs_live_at_ip;
71    };
72 }
73 
74 struct elk_gs_compile;
75 
76 namespace elk {
77 class fs_builder;
78 }
79 
80 struct shader_stats {
81    const char *scheduler_mode;
82    unsigned promoted_constants;
83    unsigned spill_count;
84    unsigned fill_count;
85    unsigned max_register_pressure;
86 };
87 
88 /** Register numbers for thread payload fields. */
89 struct elk_elk_thread_payload {
90    /** The number of thread payload registers the hardware will supply. */
91    uint8_t num_regs;
92 
93    virtual ~elk_elk_thread_payload() = default;
94 
95 protected:
elk_elk_thread_payloadelk_elk_thread_payload96    elk_elk_thread_payload() : num_regs() {}
97 };
98 
99 struct elk_vs_thread_payload : public elk_elk_thread_payload {
100    elk_vs_thread_payload(const elk_fs_visitor &v);
101 
102    elk_fs_reg urb_handles;
103 };
104 
105 struct elk_tcs_thread_payload : public elk_elk_thread_payload {
106    elk_tcs_thread_payload(const elk_fs_visitor &v);
107 
108    elk_fs_reg patch_urb_output;
109    elk_fs_reg primitive_id;
110    elk_fs_reg icp_handle_start;
111 };
112 
113 struct elk_tes_thread_payload : public elk_elk_thread_payload {
114    elk_tes_thread_payload(const elk_fs_visitor &v);
115 
116    elk_fs_reg patch_urb_input;
117    elk_fs_reg primitive_id;
118    elk_fs_reg coords[3];
119    elk_fs_reg urb_output;
120 };
121 
122 struct elk_gs_thread_payload : public elk_elk_thread_payload {
123    elk_gs_thread_payload(elk_fs_visitor &v);
124 
125    elk_fs_reg urb_handles;
126    elk_fs_reg primitive_id;
127    elk_fs_reg instance_id;
128    elk_fs_reg icp_handle_start;
129 };
130 
131 struct elk_fs_thread_payload : public elk_elk_thread_payload {
132    elk_fs_thread_payload(const elk_fs_visitor &v,
133                      bool &source_depth_to_render_target,
134                      bool &runtime_check_aads_emit);
135 
136    uint8_t subspan_coord_reg[2];
137    uint8_t source_depth_reg[2];
138    uint8_t source_w_reg[2];
139    uint8_t aa_dest_stencil_reg[2];
140    uint8_t dest_depth_reg[2];
141    uint8_t sample_pos_reg[2];
142    uint8_t sample_mask_in_reg[2];
143    uint8_t depth_w_coef_reg;
144    uint8_t barycentric_coord_reg[ELK_BARYCENTRIC_MODE_COUNT][2];
145 };
146 
147 struct elk_cs_thread_payload : public elk_elk_thread_payload {
148    elk_cs_thread_payload(const elk_fs_visitor &v);
149 
150    void load_subgroup_id(const elk::fs_builder &bld, elk_fs_reg &dest) const;
151 
152    elk_fs_reg local_invocation_id[3];
153 
154 protected:
155    elk_fs_reg subgroup_id_;
156 };
157 
158 class elk_fs_instruction_scheduler;
159 
160 /**
161  * The fragment shader front-end.
162  *
163  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
164  */
165 class elk_fs_visitor : public elk_backend_shader
166 {
167 public:
168    elk_fs_visitor(const struct elk_compiler *compiler,
169               const struct elk_compile_params *params,
170               const elk_base_prog_key *key,
171               struct elk_stage_prog_data *prog_data,
172               const nir_shader *shader,
173               unsigned dispatch_width,
174               bool needs_register_pressure,
175               bool debug_enabled);
176    elk_fs_visitor(const struct elk_compiler *compiler,
177               const struct elk_compile_params *params,
178               const elk_wm_prog_key *key,
179               struct elk_wm_prog_data *prog_data,
180               const nir_shader *shader,
181               unsigned dispatch_width,
182               bool needs_register_pressure,
183               bool debug_enabled);
184    elk_fs_visitor(const struct elk_compiler *compiler,
185               const struct elk_compile_params *params,
186               struct elk_gs_compile *gs_compile,
187               struct elk_gs_prog_data *prog_data,
188               const nir_shader *shader,
189               bool needs_register_pressure,
190               bool debug_enabled);
191    elk_fs_visitor(const elk_fs_visitor &) = delete;
192    void init();
193    ~elk_fs_visitor();
194 
195    elk_fs_visitor & operator=(const elk_fs_visitor &) = delete;
196 
197    elk_fs_reg vgrf(const glsl_type *const type);
198    void import_uniforms(elk_fs_visitor *v);
199 
200    void VARYING_PULL_CONSTANT_LOAD(const elk::fs_builder &bld,
201                                    const elk_fs_reg &dst,
202                                    const elk_fs_reg &surface,
203                                    const elk_fs_reg &surface_handle,
204                                    const elk_fs_reg &varying_offset,
205                                    uint32_t const_offset,
206                                    uint8_t alignment,
207                                    unsigned components);
208    void DEP_RESOLVE_MOV(const elk::fs_builder &bld, int grf);
209 
210    bool run_fs(bool allow_spilling, bool do_rep_send);
211    bool run_vs();
212    bool run_tcs();
213    bool run_tes();
214    bool run_gs();
215    bool run_cs(bool allow_spilling);
216    void optimize();
217    void allocate_registers(bool allow_spilling);
218    uint32_t compute_max_register_pressure();
219    void fixup_3src_null_dest();
220    void assign_curb_setup();
221    void assign_urb_setup();
222    void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
223    void assign_vs_urb_setup();
224    void assign_tcs_urb_setup();
225    void assign_tes_urb_setup();
226    void assign_gs_urb_setup();
227    bool assign_regs(bool allow_spilling, bool spill_all);
228    void assign_regs_trivial();
229    void calculate_payload_ranges(unsigned payload_node_count,
230                                  int *payload_last_use_ip) const;
231    bool split_virtual_grfs();
232    bool compact_virtual_grfs();
233    void assign_constant_locations();
234    bool get_pull_locs(const elk_fs_reg &src, unsigned *out_surf_index,
235                       unsigned *out_pull_index);
236    bool lower_constant_loads();
237    virtual void invalidate_analysis(elk::analysis_dependency_class c);
238 
239 #ifndef NDEBUG
240    void validate();
241 #else
validate()242    void validate() {}
243 #endif
244 
245    bool opt_algebraic();
246    bool opt_redundant_halt();
247    bool opt_cse();
248    bool opt_cse_local(const elk::fs_live_variables &live, elk_bblock_t *block, int &ip);
249 
250    bool opt_copy_propagation();
251    bool opt_bank_conflicts();
252    bool register_coalesce();
253    bool compute_to_mrf();
254    bool eliminate_find_live_channel();
255    bool dead_code_eliminate();
256    bool remove_duplicate_mrf_writes();
257    bool remove_extra_rounding_modes();
258 
259    elk_fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
260    void schedule_instructions_pre_ra(elk_fs_instruction_scheduler *sched,
261                                      instruction_scheduler_mode mode);
262    void schedule_instructions_post_ra();
263 
264    void insert_gfx4_send_dependency_workarounds();
265    void insert_gfx4_pre_send_dependency_workarounds(elk_bblock_t *block,
266                                                     elk_fs_inst *inst);
267    void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
268                                                      elk_fs_inst *inst);
269    void vfail(const char *msg, va_list args);
270    void fail(const char *msg, ...);
271    void limit_dispatch_width(unsigned n, const char *msg);
272    bool lower_uniform_pull_constant_loads();
273    bool lower_load_payload();
274    bool lower_pack();
275    bool lower_regioning();
276    bool lower_logical_sends();
277    bool lower_integer_multiplication();
278    bool lower_minmax();
279    bool lower_simd_width();
280    bool lower_barycentrics();
281    bool lower_find_live_channel();
282    bool lower_scoreboard();
283    bool lower_sub_sat();
284    bool opt_combine_constants();
285 
286    void emit_repclear_shader();
287    void emit_interpolation_setup_gfx4();
288    void emit_interpolation_setup_gfx6();
289    bool opt_peephole_sel();
290    bool opt_saturate_propagation();
291    bool opt_cmod_propagation();
292    bool opt_zero_samples();
293 
294    void set_tcs_invocation_id();
295 
296    void emit_alpha_test();
297    elk_fs_inst *emit_single_fb_write(const elk::fs_builder &bld,
298                                  elk_fs_reg color1, elk_fs_reg color2,
299                                  elk_fs_reg src0_alpha, unsigned components);
300    void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
301    void emit_fb_writes();
302    void emit_urb_writes(const elk_fs_reg &gs_vertex_count = elk_fs_reg());
303    void emit_gs_control_data_bits(const elk_fs_reg &vertex_count);
304    void emit_gs_thread_end();
305    bool mark_last_urb_write_with_eot();
306    void emit_tcs_thread_end();
307    void emit_urb_fence();
308    void emit_cs_terminate();
309 
310    elk_fs_reg interp_reg(const elk::fs_builder &bld, unsigned location,
311                      unsigned channel, unsigned comp);
312    elk_fs_reg per_primitive_reg(const elk::fs_builder &bld,
313                             int location, unsigned comp);
314 
315    virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
316    virtual void dump_instructions_to_file(FILE *file) const;
317 
318    const elk_base_prog_key *const key;
319    const struct elk_sampler_prog_key_data *key_tex;
320 
321    struct elk_gs_compile *gs_compile;
322 
323    struct elk_stage_prog_data *prog_data;
324 
325    elk_analysis<elk::fs_live_variables, elk_backend_shader> live_analysis;
326    elk_analysis<elk::register_pressure, elk_fs_visitor> regpressure_analysis;
327    elk_analysis<elk::performance, elk_fs_visitor> performance_analysis;
328 
329    /** Number of uniform variable components visited. */
330    unsigned uniforms;
331 
332    /** Byte-offset for the next available spot in the scratch space buffer. */
333    unsigned last_scratch;
334 
335    /**
336     * Array mapping UNIFORM register numbers to the push parameter index,
337     * or -1 if this uniform register isn't being uploaded as a push constant.
338     */
339    int *push_constant_loc;
340 
341    elk_fs_reg frag_depth;
342    elk_fs_reg frag_stencil;
343    elk_fs_reg sample_mask;
344    elk_fs_reg outputs[VARYING_SLOT_MAX];
345    elk_fs_reg dual_src_output;
346    int first_non_payload_grf;
347    /** Either ELK_MAX_GRF or GFX7_MRF_HACK_START */
348    unsigned max_grf;
349 
350    bool failed;
351    char *fail_msg;
352 
353    elk_elk_thread_payload *payload_;
354 
payload()355    elk_elk_thread_payload &payload() {
356       return *this->payload_;
357    }
358 
vs_payload()359    elk_vs_thread_payload &vs_payload() {
360       assert(stage == MESA_SHADER_VERTEX);
361       return *static_cast<elk_vs_thread_payload *>(this->payload_);
362    }
363 
tcs_payload()364    elk_tcs_thread_payload &tcs_payload() {
365       assert(stage == MESA_SHADER_TESS_CTRL);
366       return *static_cast<elk_tcs_thread_payload *>(this->payload_);
367    }
368 
tes_payload()369    elk_tes_thread_payload &tes_payload() {
370       assert(stage == MESA_SHADER_TESS_EVAL);
371       return *static_cast<elk_tes_thread_payload *>(this->payload_);
372    }
373 
gs_payload()374    elk_gs_thread_payload &gs_payload() {
375       assert(stage == MESA_SHADER_GEOMETRY);
376       return *static_cast<elk_gs_thread_payload *>(this->payload_);
377    }
378 
fs_payload()379    elk_fs_thread_payload &fs_payload() {
380       assert(stage == MESA_SHADER_FRAGMENT);
381       return *static_cast<elk_fs_thread_payload *>(this->payload_);
382    };
383 
cs_payload()384    elk_cs_thread_payload &cs_payload() {
385       assert(gl_shader_stage_uses_workgroup(stage));
386       return *static_cast<elk_cs_thread_payload *>(this->payload_);
387    }
388 
389    bool source_depth_to_render_target;
390    bool runtime_check_aads_emit;
391 
392    elk_fs_reg pixel_x;
393    elk_fs_reg pixel_y;
394    elk_fs_reg pixel_z;
395    elk_fs_reg wpos_w;
396    elk_fs_reg pixel_w;
397    elk_fs_reg delta_xy[ELK_BARYCENTRIC_MODE_COUNT];
398    elk_fs_reg final_gs_vertex_count;
399    elk_fs_reg control_data_bits;
400    elk_fs_reg invocation_id;
401 
402    unsigned grf_used;
403    bool spilled_any_registers;
404    bool needs_register_pressure;
405 
406    const unsigned dispatch_width; /**< 8, 16 or 32 */
407    unsigned max_dispatch_width;
408 
409    /* The API selected subgroup size */
410    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
411 
412    struct shader_stats shader_stats;
413 
414    void lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block);
415    void lower_mul_qword_inst(elk_fs_inst *inst, elk_bblock_t *block);
416    void lower_mulh_inst(elk_fs_inst *inst, elk_bblock_t *block);
417 
418    unsigned workgroup_size() const;
419 
420    void debug_optimizer(const nir_shader *nir,
421                         const char *pass_name,
422                         int iteration, int pass_num) const;
423 };
424 
425 /**
426  * Return the flag register used in fragment shaders to keep track of live
427  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
428  * dispatch mode, while earlier generations are constrained to f0.1, which
429  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
430  */
431 static inline unsigned
sample_mask_flag_subreg(const elk_fs_visitor & s)432 sample_mask_flag_subreg(const elk_fs_visitor &s)
433 {
434    assert(s.stage == MESA_SHADER_FRAGMENT);
435    return s.devinfo->ver >= 7 ? 2 : 1;
436 }
437 
438 /**
439  * The fragment shader code generator.
440  *
441  * Translates FS IR to actual i965 assembly code.
442  */
443 class elk_fs_generator
444 {
445 public:
446    elk_fs_generator(const struct elk_compiler *compiler,
447                 const struct elk_compile_params *params,
448                 struct elk_stage_prog_data *prog_data,
449                 bool runtime_check_aads_emit,
450                 gl_shader_stage stage);
451    ~elk_fs_generator();
452 
453    void enable_debug(const char *shader_name);
454    int generate_code(const elk_cfg_t *cfg, int dispatch_width,
455                      struct shader_stats shader_stats,
456                      const elk::performance &perf,
457                      struct elk_compile_stats *stats);
458    void add_const_data(void *data, unsigned size);
459    const unsigned *get_assembly();
460 
461 private:
462    void fire_fb_write(elk_fs_inst *inst,
463                       struct elk_reg payload,
464                       struct elk_reg implied_header,
465                       GLuint nr);
466    void generate_send(elk_fs_inst *inst,
467                       struct elk_reg dst,
468                       struct elk_reg desc,
469                       struct elk_reg payload);
470    void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
471    void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
472    void generate_barrier(elk_fs_inst *inst, struct elk_reg src);
473    bool generate_linterp(elk_fs_inst *inst, struct elk_reg dst,
474 			 struct elk_reg *src);
475    void generate_tex(elk_fs_inst *inst, struct elk_reg dst,
476                      struct elk_reg surface_index,
477                      struct elk_reg sampler_index);
478    void generate_ddx(const elk_fs_inst *inst,
479                      struct elk_reg dst, struct elk_reg src);
480    void generate_ddy(const elk_fs_inst *inst,
481                      struct elk_reg dst, struct elk_reg src);
482    void generate_scratch_write(elk_fs_inst *inst, struct elk_reg src);
483    void generate_scratch_read(elk_fs_inst *inst, struct elk_reg dst);
484    void generate_scratch_read_gfx7(elk_fs_inst *inst, struct elk_reg dst);
485    void generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst);
486    void generate_uniform_pull_constant_load(elk_fs_inst *inst, struct elk_reg dst,
487                                             struct elk_reg index,
488                                             struct elk_reg offset);
489    void generate_varying_pull_constant_load_gfx4(elk_fs_inst *inst,
490                                                  struct elk_reg dst,
491                                                  struct elk_reg index);
492 
493    void generate_set_sample_id(elk_fs_inst *inst,
494                                struct elk_reg dst,
495                                struct elk_reg src0,
496                                struct elk_reg src1);
497 
498    void generate_halt(elk_fs_inst *inst);
499 
500    void generate_mov_indirect(elk_fs_inst *inst,
501                               struct elk_reg dst,
502                               struct elk_reg reg,
503                               struct elk_reg indirect_byte_offset);
504 
505    void generate_shuffle(elk_fs_inst *inst,
506                          struct elk_reg dst,
507                          struct elk_reg src,
508                          struct elk_reg idx);
509 
510    void generate_quad_swizzle(const elk_fs_inst *inst,
511                               struct elk_reg dst, struct elk_reg src,
512                               unsigned swiz);
513 
514    bool patch_halt_jumps();
515 
516    const struct elk_compiler *compiler;
517    const struct elk_compile_params *params;
518 
519    const struct intel_device_info *devinfo;
520 
521    struct elk_codegen *p;
522    struct elk_stage_prog_data * const prog_data;
523 
524    unsigned dispatch_width; /**< 8, 16 or 32 */
525 
526    exec_list discard_halt_patches;
527    bool runtime_check_aads_emit;
528    bool debug_flag;
529    const char *shader_name;
530    gl_shader_stage stage;
531    void *mem_ctx;
532 };
533 
534 namespace elk {
535    elk_fs_reg
536    fetch_payload_reg(const elk::fs_builder &bld, uint8_t regs[2],
537                      elk_reg_type type = ELK_REGISTER_TYPE_F,
538                      unsigned n = 1);
539 
540    elk_fs_reg
541    fetch_barycentric_reg(const elk::fs_builder &bld, uint8_t regs[2]);
542 
543    inline elk_fs_reg
dynamic_msaa_flags(const struct elk_wm_prog_data * wm_prog_data)544    dynamic_msaa_flags(const struct elk_wm_prog_data *wm_prog_data)
545    {
546       return elk_fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
547                     ELK_REGISTER_TYPE_UD);
548    }
549 
550    void
551    check_dynamic_msaa_flag(const fs_builder &bld,
552                            const struct elk_wm_prog_data *wm_prog_data,
553                            enum intel_msaa_flags flag);
554 
555    bool
556    lower_src_modifiers(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, unsigned i);
557 }
558 
559 void elk_shuffle_from_32bit_read(const elk::fs_builder &bld,
560                              const elk_fs_reg &dst,
561                              const elk_fs_reg &src,
562                              uint32_t first_component,
563                              uint32_t components);
564 
565 elk_fs_reg elk_setup_imm_df(const elk::fs_builder &bld,
566                     double v);
567 
568 elk_fs_reg elk_setup_imm_b(const elk::fs_builder &bld,
569                    int8_t v);
570 
571 elk_fs_reg elk_setup_imm_ub(const elk::fs_builder &bld,
572                    uint8_t v);
573 
574 enum elk_barycentric_mode elk_barycentric_mode(nir_intrinsic_instr *intr);
575 
576 uint32_t elk_fb_write_msg_control(const elk_fs_inst *inst,
577                                   const struct elk_wm_prog_data *prog_data);
578 
579 void elk_compute_urb_setup_index(struct elk_wm_prog_data *wm_prog_data);
580 
581 bool elk_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
582 
583 elk_fs_reg elk_sample_mask_reg(const elk::fs_builder &bld);
584 void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst *inst);
585 
586 int elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
587                                     const elk_stage_prog_data *prog_data);
588 
589 void nir_to_elk(elk_fs_visitor *s);
590 
591 #endif /* ELK_FS_H */
592