xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_vec4_tes.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file elk_vec4_tes.cpp
26  *
27  * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28  */
29 
30 #include "elk_vec4_tes.h"
31 #include "elk_cfg.h"
32 #include "dev/intel_debug.h"
33 
34 namespace elk {
35 
vec4_tes_visitor(const struct elk_compiler * compiler,const struct elk_compile_params * params,const struct elk_tes_prog_key * key,struct elk_tes_prog_data * prog_data,const nir_shader * shader,bool debug_enabled)36 vec4_tes_visitor::vec4_tes_visitor(const struct elk_compiler *compiler,
37                                    const struct elk_compile_params *params,
38                                   const struct elk_tes_prog_key *key,
39                                   struct elk_tes_prog_data *prog_data,
40                                   const nir_shader *shader,
41                                   bool debug_enabled)
42    : vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
43                   shader, false, debug_enabled)
44 {
45 }
46 
47 void
setup_payload()48 vec4_tes_visitor::setup_payload()
49 {
50    int reg = 0;
51 
52    /* The payload always contains important data in r0 and r1, which contains
53     * the URB handles that are passed on to the URB write at the end
54     * of the thread.
55     */
56    reg += 2;
57 
58    reg = setup_uniforms(reg);
59 
60    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
61       for (int i = 0; i < 3; i++) {
62          if (inst->src[i].file != ATTR)
63             continue;
64 
65          unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
66          struct elk_reg grf = elk_vec4_grf(reg + slot / 2, 4 * (slot % 2));
67          grf = stride(grf, 0, 4, 1);
68          grf.swizzle = inst->src[i].swizzle;
69          grf.type = inst->src[i].type;
70          grf.abs = inst->src[i].abs;
71          grf.negate = inst->src[i].negate;
72          inst->src[i] = grf;
73       }
74    }
75 
76    reg += 8 * prog_data->urb_read_length;
77 
78    this->first_non_payload_grf = reg;
79 }
80 
81 
82 void
emit_prolog()83 vec4_tes_visitor::emit_prolog()
84 {
85    input_read_header = src_reg(this, glsl_uvec4_type());
86    emit(ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
87 
88    this->current_annotation = NULL;
89 }
90 
91 
92 void
emit_urb_write_header(int mrf)93 vec4_tes_visitor::emit_urb_write_header(int mrf)
94 {
95    /* No need to do anything for DS; an implied write to this MRF will be
96     * performed by ELK_VEC4_VS_OPCODE_URB_WRITE.
97     */
98    (void) mrf;
99 }
100 
101 
102 vec4_instruction *
emit_urb_write_opcode(bool complete)103 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
104 {
105    vec4_instruction *inst = emit(ELK_VEC4_VS_OPCODE_URB_WRITE);
106    inst->urb_write_flags = complete ?
107       ELK_URB_WRITE_EOT_COMPLETE : ELK_URB_WRITE_NO_FLAGS;
108 
109    return inst;
110 }
111 
112 void
nir_emit_intrinsic(nir_intrinsic_instr * instr)113 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
114 {
115    const struct elk_tes_prog_data *tes_prog_data =
116       (const struct elk_tes_prog_data *) prog_data;
117 
118    switch (instr->intrinsic) {
119    case nir_intrinsic_load_tess_coord:
120       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
121       emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
122                src_reg(elk_vec8_grf(1, 0))));
123       break;
124    case nir_intrinsic_load_tess_level_outer:
125       if (tes_prog_data->domain == INTEL_TESS_DOMAIN_ISOLINE) {
126          emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
127                   swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
128                           ELK_SWIZZLE_ZWZW)));
129       } else {
130          emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
131                   swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
132                           ELK_SWIZZLE_WZYX)));
133       }
134       break;
135    case nir_intrinsic_load_tess_level_inner:
136       if (tes_prog_data->domain == INTEL_TESS_DOMAIN_QUAD) {
137          emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
138                   swizzle(src_reg(ATTR, 0, glsl_vec4_type()),
139                           ELK_SWIZZLE_WZYX)));
140       } else {
141          emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_F),
142                   src_reg(ATTR, 1, glsl_float_type())));
143       }
144       break;
145    case nir_intrinsic_load_primitive_id:
146       emit(ELK_TES_OPCODE_GET_PRIMITIVE_ID,
147            get_nir_def(instr->def, ELK_REGISTER_TYPE_UD));
148       break;
149 
150    case nir_intrinsic_load_input:
151    case nir_intrinsic_load_per_vertex_input: {
152       assert(instr->def.bit_size == 32);
153       src_reg indirect_offset = get_indirect_offset(instr);
154       unsigned imm_offset = instr->const_index[0];
155       src_reg header = input_read_header;
156       unsigned first_component = nir_intrinsic_component(instr);
157 
158       if (indirect_offset.file != BAD_FILE) {
159          src_reg clamped_indirect_offset = src_reg(this, glsl_uvec4_type());
160 
161          /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
162           * valid range of the offset is [0, 0FFFFFFFh].
163           */
164          emit_minmax(ELK_CONDITIONAL_L,
165                      dst_reg(clamped_indirect_offset),
166                      retype(indirect_offset, ELK_REGISTER_TYPE_UD),
167                      elk_imm_ud(0x0fffffffu));
168 
169          header = src_reg(this, glsl_uvec4_type());
170          emit(ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
171               input_read_header, clamped_indirect_offset);
172       } else {
173          /* Arbitrarily only push up to 24 vec4 slots worth of data,
174           * which is 12 registers (since each holds 2 vec4 slots).
175           */
176          const unsigned max_push_slots = 24;
177          if (imm_offset < max_push_slots) {
178             src_reg src = src_reg(ATTR, imm_offset, glsl_ivec4_type());
179             src.swizzle = ELK_SWZ_COMP_INPUT(first_component);
180 
181             emit(MOV(get_nir_def(instr->def, ELK_REGISTER_TYPE_D), src));
182 
183             prog_data->urb_read_length =
184                MAX2(prog_data->urb_read_length,
185                     DIV_ROUND_UP(imm_offset + 1, 2));
186             break;
187          }
188       }
189 
190       dst_reg temp(this, glsl_ivec4_type());
191       vec4_instruction *read =
192          emit(ELK_VEC4_OPCODE_URB_READ, temp, src_reg(header));
193       read->offset = imm_offset;
194       read->urb_write_flags = ELK_URB_WRITE_PER_SLOT_OFFSET;
195 
196       src_reg src = src_reg(temp);
197       src.swizzle = ELK_SWZ_COMP_INPUT(first_component);
198 
199       /* Copy to target.  We might end up with some funky writemasks landing
200        * in here, but we really don't want them in the above pseudo-ops.
201        */
202       dst_reg dst = get_nir_def(instr->def, ELK_REGISTER_TYPE_D);
203       dst.writemask = elk_writemask_for_size(instr->num_components);
204       emit(MOV(dst, src));
205       break;
206    }
207    default:
208       vec4_visitor::nir_emit_intrinsic(instr);
209    }
210 }
211 
212 
213 void
emit_thread_end()214 vec4_tes_visitor::emit_thread_end()
215 {
216    /* For DS, we always end the thread by emitting a single vertex.
217     * emit_urb_write_opcode() will take care of setting the eot flag on the
218     * SEND instruction.
219     */
220    emit_vertex();
221 }
222 
223 } /* namespace elk */
224