1 /* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef ELK_VEC4_H 25 #define ELK_VEC4_H 26 27 #include "elk_shader.h" 28 29 #ifdef __cplusplus 30 #include "elk_ir_vec4.h" 31 #include "elk_ir_performance.h" 32 #include "elk_vec4_builder.h" 33 #include "elk_vec4_live_variables.h" 34 #endif 35 36 #include "compiler/glsl/ir.h" 37 #include "compiler/nir/nir.h" 38 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 const unsigned * 45 elk_vec4_generate_assembly(const struct elk_compiler *compiler, 46 const struct elk_compile_params *params, 47 const nir_shader *nir, 48 struct elk_vue_prog_data *prog_data, 49 const struct elk_cfg_t *cfg, 50 const elk::performance &perf, 51 bool debug_enabled); 52 53 #ifdef __cplusplus 54 } /* extern "C" */ 55 56 namespace elk { 57 /** 58 * The vertex shader front-end. 59 * 60 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and 61 * fixed-function) into VS IR. 62 */ 63 class vec4_visitor : public elk_backend_shader 64 { 65 public: 66 vec4_visitor(const struct elk_compiler *compiler, 67 const struct elk_compile_params *params, 68 const struct elk_sampler_prog_key_data *key, 69 struct elk_vue_prog_data *prog_data, 70 const nir_shader *shader, 71 bool no_spills, 72 bool debug_enabled); 73 dst_null_f()74 dst_reg dst_null_f() 75 { 76 return dst_reg(elk_null_reg()); 77 } 78 dst_null_df()79 dst_reg dst_null_df() 80 { 81 return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_DF)); 82 } 83 dst_null_d()84 dst_reg dst_null_d() 85 { 86 return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_D)); 87 } 88 dst_null_ud()89 dst_reg dst_null_ud() 90 { 91 return dst_reg(retype(elk_null_reg(), ELK_REGISTER_TYPE_UD)); 92 } 93 94 const struct elk_sampler_prog_key_data * const key_tex; 95 struct elk_vue_prog_data * const prog_data; 96 char *fail_msg; 97 bool failed; 98 99 /** 100 * GLSL IR currently being processed, which is associated with our 101 * driver IR instructions for debugging purposes. 102 */ 103 const void *base_ir; 104 const char *current_annotation; 105 106 int first_non_payload_grf; 107 unsigned ubo_push_start[4]; 108 unsigned push_length; 109 unsigned int max_grf; 110 elk_analysis<elk::vec4_live_variables, elk_backend_shader> live_analysis; 111 elk_analysis<elk::performance, vec4_visitor> performance_analysis; 112 113 /* Regs for vertex results. Generated at ir_variable visiting time 114 * for the ir->location's used. 115 */ 116 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; 117 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; 118 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; 119 int uniforms; 120 121 bool run(); 122 void fail(const char *msg, ...); 123 124 int setup_uniforms(int payload_reg); 125 126 bool reg_allocate_trivial(); 127 bool reg_allocate(); 128 void evaluate_spill_costs(float *spill_costs, bool *no_spill); 129 int choose_spill_reg(struct ra_graph *g); 130 void spill_reg(unsigned spill_reg); 131 void move_grf_array_access_to_scratch(); 132 void split_uniform_registers(); 133 void setup_push_ranges(); 134 virtual void invalidate_analysis(elk::analysis_dependency_class c); 135 void split_virtual_grfs(); 136 bool opt_vector_float(); 137 bool opt_reduce_swizzle(); 138 bool dead_code_eliminate(); 139 bool opt_cmod_propagation(); 140 bool opt_copy_propagation(bool do_constant_prop = true); 141 bool opt_cse_local(elk_bblock_t *block, const vec4_live_variables &live); 142 bool opt_cse(); 143 bool opt_algebraic(); 144 bool opt_register_coalesce(); 145 bool eliminate_find_live_channel(); 146 bool is_dep_ctrl_unsafe(const vec4_instruction *inst); 147 void opt_set_dependency_control(); 148 void opt_schedule_instructions(); 149 void convert_to_hw_regs(); 150 void fixup_3src_null_dest(); 151 152 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg); 153 bool lower_simd_width(); 154 bool scalarize_df(); 155 bool lower_64bit_mad_to_mul_add(); 156 void apply_logical_swizzle(struct elk_reg *hw_reg, 157 vec4_instruction *inst, int arg); 158 159 vec4_instruction *emit(vec4_instruction *inst); 160 161 vec4_instruction *emit(enum elk_opcode opcode); 162 vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst); 163 vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst, 164 const src_reg &src0); 165 vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst, 166 const src_reg &src0, const src_reg &src1); 167 vec4_instruction *emit(enum elk_opcode opcode, const dst_reg &dst, 168 const src_reg &src0, const src_reg &src1, 169 const src_reg &src2); 170 171 vec4_instruction *emit_before(elk_bblock_t *block, 172 vec4_instruction *inst, 173 vec4_instruction *new_inst); 174 175 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &); 176 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &); 177 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &); 178 EMIT1(MOV) 179 EMIT1(NOT) 180 EMIT1(RNDD) 181 EMIT1(RNDE) 182 EMIT1(RNDZ) 183 EMIT1(FRC) 184 EMIT1(F32TO16) 185 EMIT1(F16TO32) 186 EMIT2(ADD) 187 EMIT2(MUL) 188 EMIT2(MACH) 189 EMIT2(MAC) 190 EMIT2(AND) 191 EMIT2(OR) 192 EMIT2(XOR) 193 EMIT2(DP3) 194 EMIT2(DP4) 195 EMIT2(DPH) 196 EMIT2(SHL) 197 EMIT2(SHR) 198 EMIT2(ASR) 199 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 200 enum elk_conditional_mod condition); 201 vec4_instruction *IF(src_reg src0, src_reg src1, 202 enum elk_conditional_mod condition); 203 vec4_instruction *IF(enum elk_predicate predicate); 204 EMIT1(SCRATCH_READ) 205 EMIT2(SCRATCH_WRITE) 206 EMIT3(LRP) 207 EMIT1(BFREV) 208 EMIT3(BFE) 209 EMIT2(BFI1) 210 EMIT3(BFI2) 211 EMIT1(FBH) 212 EMIT1(FBL) 213 EMIT1(CBIT) 214 EMIT1(LZD) 215 EMIT3(MAD) 216 EMIT2(ADDC) 217 EMIT2(SUBB) 218 EMIT1(DIM) 219 220 #undef EMIT1 221 #undef EMIT2 222 #undef EMIT3 223 224 vec4_instruction *emit_minmax(enum elk_conditional_mod conditionalmod, dst_reg dst, 225 src_reg src0, src_reg src1); 226 227 /** 228 * Copy any live channel from \p src to the first channel of the 229 * result. 230 */ 231 src_reg emit_uniformize(const src_reg &src); 232 233 /** Fix all float operands of a 3-source instruction. */ 234 void fix_float_operands(src_reg op[3], nir_alu_instr *instr); 235 236 src_reg fix_3src_operand(const src_reg &src); 237 238 vec4_instruction *emit_math(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0, 239 const src_reg &src1 = src_reg()); 240 241 src_reg fix_math_operand(const src_reg &src); 242 243 void emit_pack_half_2x16(dst_reg dst, src_reg src0); 244 void emit_unpack_half_2x16(dst_reg dst, src_reg src0); 245 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); 246 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); 247 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); 248 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); 249 250 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, 251 src_reg surface); 252 253 void emit_ndc_computation(); 254 void emit_psiz_and_flags(dst_reg reg); 255 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); 256 virtual void emit_urb_slot(dst_reg reg, int varying); 257 258 src_reg get_scratch_offset(elk_bblock_t *block, vec4_instruction *inst, 259 src_reg *reladdr, int reg_offset); 260 void emit_scratch_read(elk_bblock_t *block, vec4_instruction *inst, 261 dst_reg dst, 262 src_reg orig_src, 263 int base_offset); 264 void emit_scratch_write(elk_bblock_t *block, vec4_instruction *inst, 265 int base_offset); 266 void emit_pull_constant_load_reg(dst_reg dst, 267 src_reg surf_index, 268 src_reg offset, 269 elk_bblock_t *before_block, 270 vec4_instruction *before_inst); 271 src_reg emit_resolve_reladdr(int scratch_loc[], elk_bblock_t *block, 272 vec4_instruction *inst, src_reg src); 273 274 void resolve_ud_negate(src_reg *reg); 275 276 void emit_shader_float_controls_execution_mode(); 277 278 bool lower_minmax(); 279 280 src_reg get_timestamp(); 281 282 virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const; 283 284 bool optimize_predicate(nir_alu_instr *instr, enum elk_predicate *predicate); 285 286 void emit_conversion_from_double(dst_reg dst, src_reg src); 287 void emit_conversion_to_double(dst_reg dst, src_reg src); 288 289 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, 290 bool for_write, 291 bool for_scratch = false, 292 elk_bblock_t *block = NULL, 293 vec4_instruction *ref = NULL); 294 295 virtual void emit_nir_code(); 296 virtual void nir_setup_uniforms(); 297 virtual void nir_emit_impl(nir_function_impl *impl); 298 virtual void nir_emit_cf_list(exec_list *list); 299 virtual void nir_emit_if(nir_if *if_stmt); 300 virtual void nir_emit_loop(nir_loop *loop); 301 virtual void nir_emit_block(nir_block *block); 302 virtual void nir_emit_instr(nir_instr *instr); 303 virtual void nir_emit_load_const(nir_load_const_instr *instr); 304 src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr); 305 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); 306 virtual void nir_emit_alu(nir_alu_instr *instr); 307 virtual void nir_emit_jump(nir_jump_instr *instr); 308 virtual void nir_emit_texture(nir_tex_instr *instr); 309 virtual void nir_emit_undef(nir_undef_instr *instr); 310 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); 311 312 dst_reg get_nir_def(const nir_def &def, enum elk_reg_type type); 313 dst_reg get_nir_def(const nir_def &def, nir_alu_type type); 314 dst_reg get_nir_def(const nir_def &def); 315 src_reg get_nir_src(const nir_src &src, enum elk_reg_type type, 316 unsigned num_components = 4); 317 src_reg get_nir_src(const nir_src &src, nir_alu_type type, 318 unsigned num_components = 4); 319 src_reg get_nir_src(const nir_src &src, 320 unsigned num_components = 4); 321 src_reg get_nir_src_imm(const nir_src &src); 322 src_reg get_indirect_offset(nir_intrinsic_instr *instr); 323 324 dst_reg *nir_ssa_values; 325 326 protected: 327 void emit_vertex(); 328 void setup_payload_interference(struct ra_graph *g, int first_payload_node, 329 int reg_node_count); 330 virtual void setup_payload() = 0; 331 virtual void emit_prolog() = 0; 332 virtual void emit_thread_end() = 0; 333 virtual void emit_urb_write_header(int mrf) = 0; 334 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; 335 virtual void gs_emit_vertex(int stream_id); 336 virtual void gs_end_primitive(); 337 338 private: 339 /** 340 * If true, then register allocation should fail instead of spilling. 341 */ 342 const bool no_spills; 343 344 unsigned last_scratch; /**< measured in 32-byte (register size) units */ 345 }; 346 347 } /* namespace elk */ 348 #endif /* __cplusplus */ 349 350 #endif /* ELK_VEC4_H */ 351