xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_vec4.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ELK_VEC4_H
25 #define ELK_VEC4_H
26 
27 #include "elk_shader.h"
28 
29 #ifdef __cplusplus
30 #include "elk_ir_vec4.h"
31 #include "elk_ir_performance.h"
32 #include "elk_vec4_builder.h"
33 #include "elk_vec4_live_variables.h"
34 #endif
35 
36 #include "compiler/glsl/ir.h"
37 #include "compiler/nir/nir.h"
38 
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 const unsigned *
45 elk_vec4_generate_assembly(const struct elk_compiler *compiler,
46                            const struct elk_compile_params *params,
47                            const nir_shader *nir,
48                            struct elk_vue_prog_data *prog_data,
49                            const struct elk_cfg_t *cfg,
50                            const elk::performance &perf,
51                            bool debug_enabled);
52 
53 #ifdef __cplusplus
54 } /* extern "C" */
55 
56 namespace elk {
57 /**
58  * The vertex shader front-end.
59  *
60  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
61  * fixed-function) into VS IR.
62  */
63 class vec4_visitor : public elk_backend_shader
64 {
65 public:
66    vec4_visitor(const struct elk_compiler *compiler,
67                 const struct elk_compile_params *params,
68                 const struct elk_sampler_prog_key_data *key,
69                 struct elk_vue_prog_data *prog_data,
70                 const nir_shader *shader,
71                 bool no_spills,
72                 bool debug_enabled);
73 
dst_null_f()74    dst_reg dst_null_f()
75    {
76       return dst_reg(elk_null_reg());
77    }
78 
dst_null_df()79    dst_reg dst_null_df()
80    {
81       return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_DF));
82    }
83 
dst_null_d()84    dst_reg dst_null_d()
85    {
86       return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_D));
87    }
88 
dst_null_ud()89    dst_reg dst_null_ud()
90    {
91       return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_UD));
92    }
93 
94    const struct elk_sampler_prog_key_data * const key_tex;
95    struct elk_vue_prog_data * const prog_data;
96    char *fail_msg;
97    bool failed;
98 
99    /**
100     * GLSL IR currently being processed, which is associated with our
101     * driver IR instructions for debugging purposes.
102     */
103    const void *base_ir;
104    const char *current_annotation;
105 
106    int first_non_payload_grf;
107    unsigned ubo_push_start[4];
108    unsigned push_length;
109    unsigned int max_grf;
110    elk_analysis<elk::vec4_live_variables, elk_backend_shader> live_analysis;
111    elk_analysis<elk::performance, vec4_visitor> performance_analysis;
112 
113    /* Regs for vertex results.  Generated at ir_variable visiting time
114     * for the ir->location's used.
115     */
116    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
117    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
118    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
119    int uniforms;
120 
121    bool run();
122    void fail(const char *msg, ...);
123 
124    int setup_uniforms(int payload_reg);
125 
126    bool reg_allocate_trivial();
127    bool reg_allocate();
128    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
129    int choose_spill_reg(struct ra_graph *g);
130    void spill_reg(unsigned spill_reg);
131    void move_grf_array_access_to_scratch();
132    void split_uniform_registers();
133    void setup_push_ranges();
134    virtual void invalidate_analysis(elk::analysis_dependency_class c);
135    void split_virtual_grfs();
136    bool opt_vector_float();
137    bool opt_reduce_swizzle();
138    bool dead_code_eliminate();
139    bool opt_cmod_propagation();
140    bool opt_copy_propagation(bool do_constant_prop = true);
141    bool opt_cse_local(elk_bblock_t *block, const vec4_live_variables &live);
142    bool opt_cse();
143    bool opt_algebraic();
144    bool opt_register_coalesce();
145    bool eliminate_find_live_channel();
146    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
147    void opt_set_dependency_control();
148    void opt_schedule_instructions();
149    void convert_to_hw_regs();
150    void fixup_3src_null_dest();
151 
152    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
153    bool lower_simd_width();
154    bool scalarize_df();
155    bool lower_64bit_mad_to_mul_add();
156    void apply_logical_swizzle(struct elk_reg *hw_reg,
157                               vec4_instruction *inst, int arg);
158 
159    vec4_instruction *emit(vec4_instruction *inst);
160 
161    vec4_instruction *emit(enum elk_opcode opcode);
162    vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst);
163    vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst,
164                           const src_reg &src0);
165    vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst,
166                           const src_reg &src0, const src_reg &src1);
167    vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst,
168                           const src_reg &src0, const src_reg &src1,
169                           const src_reg &src2);
170 
171    vec4_instruction *emit_before(elk_bblock_t *block,
172                                  vec4_instruction *inst,
173 				 vec4_instruction *new_inst);
174 
175 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
176 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
177 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
178    EMIT1(MOV)
179    EMIT1(NOT)
180    EMIT1(RNDD)
181    EMIT1(RNDE)
182    EMIT1(RNDZ)
183    EMIT1(FRC)
184    EMIT1(F32TO16)
185    EMIT1(F16TO32)
186    EMIT2(ADD)
187    EMIT2(MUL)
188    EMIT2(MACH)
189    EMIT2(MAC)
190    EMIT2(AND)
191    EMIT2(OR)
192    EMIT2(XOR)
193    EMIT2(DP3)
194    EMIT2(DP4)
195    EMIT2(DPH)
196    EMIT2(SHL)
197    EMIT2(SHR)
198    EMIT2(ASR)
199    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
200 			 enum elk_conditional_mod condition);
201    vec4_instruction *IF(src_reg src0, src_reg src1,
202                         enum elk_conditional_mod condition);
203    vec4_instruction *IF(enum elk_predicate predicate);
204    EMIT1(SCRATCH_READ)
205    EMIT2(SCRATCH_WRITE)
206    EMIT3(LRP)
207    EMIT1(BFREV)
208    EMIT3(BFE)
209    EMIT2(BFI1)
210    EMIT3(BFI2)
211    EMIT1(FBH)
212    EMIT1(FBL)
213    EMIT1(CBIT)
214    EMIT1(LZD)
215    EMIT3(MAD)
216    EMIT2(ADDC)
217    EMIT2(SUBB)
218    EMIT1(DIM)
219 
220 #undef EMIT1
221 #undef EMIT2
222 #undef EMIT3
223 
224    vec4_instruction *emit_minmax(enum elk_conditional_mod conditionalmod, dst_reg dst,
225                                  src_reg src0, src_reg src1);
226 
227    /**
228     * Copy any live channel from \p src to the first channel of the
229     * result.
230     */
231    src_reg emit_uniformize(const src_reg &src);
232 
233    /** Fix all float operands of a 3-source instruction. */
234    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
235 
236    src_reg fix_3src_operand(const src_reg &src);
237 
238    vec4_instruction *emit_math(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0,
239                                const src_reg &src1 = src_reg());
240 
241    src_reg fix_math_operand(const src_reg &src);
242 
243    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
244    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
245    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
246    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
247    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
248    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
249 
250    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
251                           src_reg surface);
252 
253    void emit_ndc_computation();
254    void emit_psiz_and_flags(dst_reg reg);
255    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
256    virtual void emit_urb_slot(dst_reg reg, int varying);
257 
258    src_reg get_scratch_offset(elk_bblock_t *block, vec4_instruction *inst,
259 			      src_reg *reladdr, int reg_offset);
260    void emit_scratch_read(elk_bblock_t *block, vec4_instruction *inst,
261 			  dst_reg dst,
262 			  src_reg orig_src,
263 			  int base_offset);
264    void emit_scratch_write(elk_bblock_t *block, vec4_instruction *inst,
265 			   int base_offset);
266    void emit_pull_constant_load_reg(dst_reg dst,
267                                     src_reg surf_index,
268                                     src_reg offset,
269                                     elk_bblock_t *before_block,
270                                     vec4_instruction *before_inst);
271    src_reg emit_resolve_reladdr(int scratch_loc[], elk_bblock_t *block,
272                                 vec4_instruction *inst, src_reg src);
273 
274    void resolve_ud_negate(src_reg *reg);
275 
276    void emit_shader_float_controls_execution_mode();
277 
278    bool lower_minmax();
279 
280    src_reg get_timestamp();
281 
282    virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
283 
284    bool optimize_predicate(nir_alu_instr *instr, enum elk_predicate *predicate);
285 
286    void emit_conversion_from_double(dst_reg dst, src_reg src);
287    void emit_conversion_to_double(dst_reg dst, src_reg src);
288 
289    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
290                                         bool for_write,
291                                         bool for_scratch = false,
292                                         elk_bblock_t *block = NULL,
293                                         vec4_instruction *ref = NULL);
294 
295    virtual void emit_nir_code();
296    virtual void nir_setup_uniforms();
297    virtual void nir_emit_impl(nir_function_impl *impl);
298    virtual void nir_emit_cf_list(exec_list *list);
299    virtual void nir_emit_if(nir_if *if_stmt);
300    virtual void nir_emit_loop(nir_loop *loop);
301    virtual void nir_emit_block(nir_block *block);
302    virtual void nir_emit_instr(nir_instr *instr);
303    virtual void nir_emit_load_const(nir_load_const_instr *instr);
304    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
305    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
306    virtual void nir_emit_alu(nir_alu_instr *instr);
307    virtual void nir_emit_jump(nir_jump_instr *instr);
308    virtual void nir_emit_texture(nir_tex_instr *instr);
309    virtual void nir_emit_undef(nir_undef_instr *instr);
310    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
311 
312    dst_reg get_nir_def(const nir_def &def, enum elk_reg_type type);
313    dst_reg get_nir_def(const nir_def &def, nir_alu_type type);
314    dst_reg get_nir_def(const nir_def &def);
315    src_reg get_nir_src(const nir_src &src, enum elk_reg_type type,
316                        unsigned num_components = 4);
317    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
318                        unsigned num_components = 4);
319    src_reg get_nir_src(const nir_src &src,
320                        unsigned num_components = 4);
321    src_reg get_nir_src_imm(const nir_src &src);
322    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
323 
324    dst_reg *nir_ssa_values;
325 
326 protected:
327    void emit_vertex();
328    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
329                                    int reg_node_count);
330    virtual void setup_payload() = 0;
331    virtual void emit_prolog() = 0;
332    virtual void emit_thread_end() = 0;
333    virtual void emit_urb_write_header(int mrf) = 0;
334    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
335    virtual void gs_emit_vertex(int stream_id);
336    virtual void gs_end_primitive();
337 
338 private:
339    /**
340     * If true, then register allocation should fail instead of spilling.
341     */
342    const bool no_spills;
343 
344    unsigned last_scratch; /**< measured in 32-byte (register size) units */
345 };
346 
347 } /* namespace elk */
348 #endif /* __cplusplus */
349 
350 #endif /* ELK_VEC4_H */
351