xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_shader_gs.h"
8 
9 #include "sfn_debug.h"
10 #include "sfn_instr_fetch.h"
11 
12 namespace r600 {
13 
GeometryShader(const r600_shader_key & key)14 GeometryShader::GeometryShader(const r600_shader_key& key):
15     Shader("GS", key.gs.first_atomic_counter),
16     m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
17 {
18 }
19 
20 bool
do_scan_instruction(nir_instr * instr)21 GeometryShader::do_scan_instruction(nir_instr *instr)
22 {
23    if (instr->type != nir_instr_type_intrinsic)
24       return false;
25 
26    nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
27 
28    switch (ii->intrinsic) {
29    case nir_intrinsic_store_output:
30       return process_store_output(ii);
31    case nir_intrinsic_load_per_vertex_input:
32       return process_load_input(ii);
33    default:
34       return false;
35    }
36 }
37 
38 bool
process_store_output(nir_intrinsic_instr * instr)39 GeometryShader::process_store_output(nir_intrinsic_instr *instr)
40 {
41    auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
42    auto index = nir_src_as_const_value(instr->src[1]);
43    assert(index);
44 
45    auto driver_location = nir_intrinsic_base(instr) + index->u32;
46 
47    if (location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
48        (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
49        (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7) ||
50        location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
51        location == VARYING_SLOT_PNTC || location == VARYING_SLOT_CLIP_VERTEX ||
52        location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
53        location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_POS ||
54        location == VARYING_SLOT_PSIZ || location == VARYING_SLOT_LAYER ||
55        location == VARYING_SLOT_VIEWPORT || location == VARYING_SLOT_FOGC) {
56 
57       auto write_mask = nir_intrinsic_write_mask(instr);
58       ShaderOutput output(driver_location, write_mask, location);
59 
60       if (nir_intrinsic_io_semantics(instr).no_varying)
61          output.set_no_varying(true);
62       if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
63          add_output(output);
64 
65       if (location == VARYING_SLOT_VIEWPORT) {
66          m_out_viewport = true;
67          m_out_misc_write = true;
68       }
69 
70       if (location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) {
71          auto write_mask = nir_intrinsic_write_mask(instr);
72          m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
73          m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
74       }
75 
76       if (m_noutputs <= driver_location &&
77           nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
78          m_noutputs = driver_location + 1;
79 
80       return true;
81    }
82    return false;
83 }
84 
85 bool
process_load_input(nir_intrinsic_instr * instr)86 GeometryShader::process_load_input(nir_intrinsic_instr *instr)
87 {
88    auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
89    auto index = nir_src_as_const_value(instr->src[1]);
90    assert(index);
91 
92    auto driver_location = nir_intrinsic_base(instr) + index->u32;
93 
94    if (location == VARYING_SLOT_POS || location == VARYING_SLOT_PSIZ ||
95        location == VARYING_SLOT_FOGC || location == VARYING_SLOT_CLIP_VERTEX ||
96        location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
97        location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
98        location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
99        location == VARYING_SLOT_PNTC ||
100        (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
101        (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7)) {
102 
103       uint64_t bit = 1ull << location;
104       if (!(bit & m_input_mask)) {
105          ShaderInput input(driver_location, location);
106          input.set_ring_offset(16 * driver_location);
107          add_input(input);
108          m_next_input_ring_offset += 16;
109          m_input_mask |= bit;
110       }
111       return true;
112    }
113    return false;
114 }
115 
116 int
do_allocate_reserved_registers()117 GeometryShader::do_allocate_reserved_registers()
118 {
119    const int sel[6] = {0, 0, 0, 1, 1, 1};
120    const int chan[6] = {0, 1, 3, 0, 1, 2};
121 
122    /* Reserve registers used by the shaders (should check how many
123     * components are actually used */
124    for (int i = 0; i < 6; ++i) {
125       m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
126    }
127 
128    m_primitive_id = value_factory().allocate_pinned_register(0, 2);
129    m_invocation_id = value_factory().allocate_pinned_register(1, 3);
130 
131    value_factory().set_virtual_register_base(2);
132 
133    auto zero = value_factory().inline_const(ALU_SRC_0, 0);
134 
135    for (int i = 0; i < 4; ++i) {
136       m_export_base[i] = value_factory().temp_register(0, false);
137       emit_instruction(
138          new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
139    }
140 
141    m_ring_item_sizes[0] = m_next_input_ring_offset;
142 
143    /* GS thread with no output workaround - emit a cut at start of GS */
144    if (chip_class() == ISA_CC_R600) {
145       emit_instruction(new EmitVertexInstr(0, true));
146       start_new_block(0);
147    }
148 
149    if (m_tri_strip_adj_fix)
150       emit_adj_fix();
151 
152    return value_factory().next_register_index();
153 }
154 
155 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)156 GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
157 {
158    switch (intr->intrinsic) {
159    case nir_intrinsic_emit_vertex:
160       return emit_vertex(intr, false);
161    case nir_intrinsic_end_primitive:
162       return emit_vertex(intr, true);
163    case nir_intrinsic_load_primitive_id:
164       return emit_simple_mov(intr->def, 0, m_primitive_id);
165    case nir_intrinsic_load_invocation_id:
166       return emit_simple_mov(intr->def, 0, m_invocation_id);
167    case nir_intrinsic_load_per_vertex_input:
168       return emit_load_per_vertex_input(intr);
169    default:;
170    }
171    return false;
172 }
173 
174 bool
emit_vertex(nir_intrinsic_instr * instr,bool cut)175 GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut)
176 {
177    int stream = nir_intrinsic_stream_id(instr);
178    assert(stream < 4);
179 
180    auto cut_instr = new EmitVertexInstr(stream, cut);
181 
182    for (auto v : m_streamout_data) {
183       if (stream == 0 || v.first != VARYING_SLOT_POS) {
184          v.second->patch_ring(stream, m_export_base[stream]);
185          cut_instr->add_required_instr(v.second);
186          emit_instruction(v.second);
187       } else
188          delete v.second;
189    }
190    m_streamout_data.clear();
191 
192    emit_instruction(cut_instr);
193    start_new_block(0);
194 
195    if (!cut) {
196       auto ir = new AluInstr(op2_add_int,
197                              m_export_base[stream],
198                              m_export_base[stream],
199                              value_factory().literal(m_noutputs),
200                              AluInstr::last_write);
201       emit_instruction(ir);
202    }
203 
204    return true;
205 }
206 
207 bool
store_output(nir_intrinsic_instr * instr)208 GeometryShader::store_output(nir_intrinsic_instr *instr)
209 {
210    if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
211       return true;
212 
213    auto location = nir_intrinsic_io_semantics(instr).location;
214    auto index = nir_src_as_const_value(instr->src[1]);
215    assert(index);
216    auto driver_location = nir_intrinsic_base(instr) + index->u32;
217 
218    uint32_t write_mask = nir_intrinsic_write_mask(instr);
219    uint32_t shift = nir_intrinsic_component(instr);
220 
221    RegisterVec4::Swizzle src_swz{7, 7, 7, 7};
222    for (unsigned i = shift; i < 4; ++i) {
223       src_swz[i] = (1 << i) & (write_mask << shift) ? i - shift : 7;
224    }
225 
226    auto out_value = value_factory().src_vec4(instr->src[0], pin_free, src_swz);
227 
228    AluInstr *ir = nullptr;
229    if (m_streamout_data[location]) {
230       const auto& value = m_streamout_data[location]->value();
231       auto tmp = value_factory().temp_vec4(pin_chgr);
232       for (unsigned i = 0; i < 4 - shift; ++i) {
233          if (!(write_mask & (1 << i)))
234             continue;
235          if (out_value[i + shift]->chan() < 4) {
236             ir = new AluInstr(op1_mov,
237                               tmp[i + shift],
238                               out_value[i + shift],
239                               AluInstr::write);
240          } else if (value[i]->chan() < 4) {
241             ir = new AluInstr(op1_mov, tmp[i + shift], value[i], AluInstr::write);
242          } else
243             continue;
244          emit_instruction(ir);
245       }
246       ir->set_alu_flag(alu_last_instr);
247       m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
248                                                        MemRingOutInstr::mem_write_ind,
249                                                        tmp,
250                                                        4 * driver_location,
251                                                        instr->num_components,
252                                                        m_export_base[0]);
253    } else {
254 
255       sfn_log << SfnLog::io << "None-streamout ";
256       bool need_copy = shift != 0;
257       if (!need_copy) {
258          for (int i = 0; i < 4; ++i) {
259             if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
260                need_copy = true;
261                break;
262             }
263          }
264       }
265 
266       if (need_copy) {
267          auto tmp = value_factory().temp_vec4(pin_chgr);
268          for (unsigned i = 0; i < 4 - shift; ++i) {
269             if (out_value[i]->chan() < 4) {
270                ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
271                emit_instruction(ir);
272             }
273          }
274          ir->set_alu_flag(alu_last_instr);
275          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
276                                                           MemRingOutInstr::mem_write_ind,
277                                                           tmp,
278                                                           4 * driver_location,
279                                                           instr->num_components,
280                                                           m_export_base[0]);
281       } else {
282          for (auto i = 0; i < 4; ++i)
283             out_value[i]->set_pin(pin_chgr);
284          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
285                                                           MemRingOutInstr::mem_write_ind,
286                                                           out_value,
287                                                           4 * driver_location,
288                                                           instr->num_components,
289                                                           m_export_base[0]);
290       }
291    }
292 
293    return true;
294 }
295 
296 bool
emit_load_per_vertex_input(nir_intrinsic_instr * instr)297 GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
298 {
299    auto dest = value_factory().dest_vec4(instr->def, pin_group);
300 
301    RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
302    for (unsigned i = 0; i < instr->def.num_components; ++i) {
303       dest_swz[i] = i + nir_intrinsic_component(instr);
304    }
305 
306    auto literal_index = nir_src_as_const_value(instr->src[0]);
307 
308    if (!literal_index) {
309       sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
310       return false;
311    }
312    assert(literal_index->u32 < 6);
313    assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
314 
315    EVTXDataFormat fmt =
316       chip_class() >= ISA_CC_EVERGREEN ? fmt_invalid : fmt_32_32_32_32_float;
317 
318    auto addr = m_per_vertex_offsets[literal_index->u32];
319    auto fetch = new LoadFromBuffer(dest,
320                                    dest_swz,
321                                    addr,
322                                    16 * nir_intrinsic_base(instr),
323                                    R600_GS_RING_CONST_BUFFER,
324                                    nullptr,
325                                    fmt);
326 
327    if (chip_class() >= ISA_CC_EVERGREEN)
328       fetch->set_fetch_flag(FetchInstr::use_const_field);
329 
330    fetch->set_num_format(vtx_nf_norm);
331    fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
332 
333    emit_instruction(fetch);
334    return true;
335 }
336 
337 void
do_finalize()338 GeometryShader::do_finalize()
339 {
340 }
341 
342 void
do_get_shader_info(r600_shader * sh_info)343 GeometryShader::do_get_shader_info(r600_shader *sh_info)
344 {
345    sh_info->processor_type = PIPE_SHADER_GEOMETRY;
346    sh_info->ring_item_sizes[0] = m_ring_item_sizes[0];
347    sh_info->cc_dist_mask = m_cc_dist_mask;
348    sh_info->clip_dist_write = m_clip_dist_write;
349 }
350 
351 bool
read_prop(std::istream & is)352 GeometryShader::read_prop(std::istream& is)
353 {
354    (void)is;
355    return true;
356 }
357 
358 void
do_print_properties(std::ostream & os) const359 GeometryShader::do_print_properties(std::ostream& os) const
360 {
361    (void)os;
362 }
363 
364 void
emit_adj_fix()365 GeometryShader::emit_adj_fix()
366 {
367    auto adjhelp0 = value_factory().temp_register();
368 
369    emit_instruction(new AluInstr(op2_and_int,
370                                  adjhelp0,
371                                  m_primitive_id,
372                                  value_factory().one_i(),
373                                  AluInstr::last_write));
374 
375    int reg_indices[6];
376    int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
377 
378    reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
379    reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
380 
381    std::array<PRegister, 6> adjhelp;
382 
383    AluInstr *ir = nullptr;
384    for (int i = 0; i < 6; i++) {
385       adjhelp[i] = value_factory().temp_register();
386       ir = new AluInstr(op3_cnde_int,
387                         adjhelp[i],
388                         adjhelp0,
389                         m_per_vertex_offsets[i],
390                         m_per_vertex_offsets[rotate_indices[i]],
391                         AluInstr::write);
392 
393       emit_instruction(ir);
394    }
395    ir->set_alu_flag(alu_last_instr);
396 
397    for (int i = 0; i < 6; i++)
398       m_per_vertex_offsets[i] = adjhelp[i];
399 }
400 
401 } // namespace r600
402