1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_shader_gs.h"
8
9 #include "sfn_debug.h"
10 #include "sfn_instr_fetch.h"
11
12 namespace r600 {
13
GeometryShader(const r600_shader_key & key)14 GeometryShader::GeometryShader(const r600_shader_key& key):
15 Shader("GS", key.gs.first_atomic_counter),
16 m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
17 {
18 }
19
20 bool
do_scan_instruction(nir_instr * instr)21 GeometryShader::do_scan_instruction(nir_instr *instr)
22 {
23 if (instr->type != nir_instr_type_intrinsic)
24 return false;
25
26 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
27
28 switch (ii->intrinsic) {
29 case nir_intrinsic_store_output:
30 return process_store_output(ii);
31 case nir_intrinsic_load_per_vertex_input:
32 return process_load_input(ii);
33 default:
34 return false;
35 }
36 }
37
38 bool
process_store_output(nir_intrinsic_instr * instr)39 GeometryShader::process_store_output(nir_intrinsic_instr *instr)
40 {
41 auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
42 auto index = nir_src_as_const_value(instr->src[1]);
43 assert(index);
44
45 auto driver_location = nir_intrinsic_base(instr) + index->u32;
46
47 if (location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
48 (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
49 (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7) ||
50 location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
51 location == VARYING_SLOT_PNTC || location == VARYING_SLOT_CLIP_VERTEX ||
52 location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
53 location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_POS ||
54 location == VARYING_SLOT_PSIZ || location == VARYING_SLOT_LAYER ||
55 location == VARYING_SLOT_VIEWPORT || location == VARYING_SLOT_FOGC) {
56
57 auto write_mask = nir_intrinsic_write_mask(instr);
58 ShaderOutput output(driver_location, write_mask, location);
59
60 if (nir_intrinsic_io_semantics(instr).no_varying)
61 output.set_no_varying(true);
62 if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
63 add_output(output);
64
65 if (location == VARYING_SLOT_VIEWPORT) {
66 m_out_viewport = true;
67 m_out_misc_write = true;
68 }
69
70 if (location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) {
71 auto write_mask = nir_intrinsic_write_mask(instr);
72 m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
73 m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
74 }
75
76 if (m_noutputs <= driver_location &&
77 nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
78 m_noutputs = driver_location + 1;
79
80 return true;
81 }
82 return false;
83 }
84
85 bool
process_load_input(nir_intrinsic_instr * instr)86 GeometryShader::process_load_input(nir_intrinsic_instr *instr)
87 {
88 auto location = static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(instr).location);
89 auto index = nir_src_as_const_value(instr->src[1]);
90 assert(index);
91
92 auto driver_location = nir_intrinsic_base(instr) + index->u32;
93
94 if (location == VARYING_SLOT_POS || location == VARYING_SLOT_PSIZ ||
95 location == VARYING_SLOT_FOGC || location == VARYING_SLOT_CLIP_VERTEX ||
96 location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1 ||
97 location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1 ||
98 location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1 ||
99 location == VARYING_SLOT_PNTC ||
100 (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) ||
101 (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7)) {
102
103 uint64_t bit = 1ull << location;
104 if (!(bit & m_input_mask)) {
105 ShaderInput input(driver_location, location);
106 input.set_ring_offset(16 * driver_location);
107 add_input(input);
108 m_next_input_ring_offset += 16;
109 m_input_mask |= bit;
110 }
111 return true;
112 }
113 return false;
114 }
115
116 int
do_allocate_reserved_registers()117 GeometryShader::do_allocate_reserved_registers()
118 {
119 const int sel[6] = {0, 0, 0, 1, 1, 1};
120 const int chan[6] = {0, 1, 3, 0, 1, 2};
121
122 /* Reserve registers used by the shaders (should check how many
123 * components are actually used */
124 for (int i = 0; i < 6; ++i) {
125 m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
126 }
127
128 m_primitive_id = value_factory().allocate_pinned_register(0, 2);
129 m_invocation_id = value_factory().allocate_pinned_register(1, 3);
130
131 value_factory().set_virtual_register_base(2);
132
133 auto zero = value_factory().inline_const(ALU_SRC_0, 0);
134
135 for (int i = 0; i < 4; ++i) {
136 m_export_base[i] = value_factory().temp_register(0, false);
137 emit_instruction(
138 new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
139 }
140
141 m_ring_item_sizes[0] = m_next_input_ring_offset;
142
143 /* GS thread with no output workaround - emit a cut at start of GS */
144 if (chip_class() == ISA_CC_R600) {
145 emit_instruction(new EmitVertexInstr(0, true));
146 start_new_block(0);
147 }
148
149 if (m_tri_strip_adj_fix)
150 emit_adj_fix();
151
152 return value_factory().next_register_index();
153 }
154
155 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)156 GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
157 {
158 switch (intr->intrinsic) {
159 case nir_intrinsic_emit_vertex:
160 return emit_vertex(intr, false);
161 case nir_intrinsic_end_primitive:
162 return emit_vertex(intr, true);
163 case nir_intrinsic_load_primitive_id:
164 return emit_simple_mov(intr->def, 0, m_primitive_id);
165 case nir_intrinsic_load_invocation_id:
166 return emit_simple_mov(intr->def, 0, m_invocation_id);
167 case nir_intrinsic_load_per_vertex_input:
168 return emit_load_per_vertex_input(intr);
169 default:;
170 }
171 return false;
172 }
173
174 bool
emit_vertex(nir_intrinsic_instr * instr,bool cut)175 GeometryShader::emit_vertex(nir_intrinsic_instr *instr, bool cut)
176 {
177 int stream = nir_intrinsic_stream_id(instr);
178 assert(stream < 4);
179
180 auto cut_instr = new EmitVertexInstr(stream, cut);
181
182 for (auto v : m_streamout_data) {
183 if (stream == 0 || v.first != VARYING_SLOT_POS) {
184 v.second->patch_ring(stream, m_export_base[stream]);
185 cut_instr->add_required_instr(v.second);
186 emit_instruction(v.second);
187 } else
188 delete v.second;
189 }
190 m_streamout_data.clear();
191
192 emit_instruction(cut_instr);
193 start_new_block(0);
194
195 if (!cut) {
196 auto ir = new AluInstr(op2_add_int,
197 m_export_base[stream],
198 m_export_base[stream],
199 value_factory().literal(m_noutputs),
200 AluInstr::last_write);
201 emit_instruction(ir);
202 }
203
204 return true;
205 }
206
207 bool
store_output(nir_intrinsic_instr * instr)208 GeometryShader::store_output(nir_intrinsic_instr *instr)
209 {
210 if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
211 return true;
212
213 auto location = nir_intrinsic_io_semantics(instr).location;
214 auto index = nir_src_as_const_value(instr->src[1]);
215 assert(index);
216 auto driver_location = nir_intrinsic_base(instr) + index->u32;
217
218 uint32_t write_mask = nir_intrinsic_write_mask(instr);
219 uint32_t shift = nir_intrinsic_component(instr);
220
221 RegisterVec4::Swizzle src_swz{7, 7, 7, 7};
222 for (unsigned i = shift; i < 4; ++i) {
223 src_swz[i] = (1 << i) & (write_mask << shift) ? i - shift : 7;
224 }
225
226 auto out_value = value_factory().src_vec4(instr->src[0], pin_free, src_swz);
227
228 AluInstr *ir = nullptr;
229 if (m_streamout_data[location]) {
230 const auto& value = m_streamout_data[location]->value();
231 auto tmp = value_factory().temp_vec4(pin_chgr);
232 for (unsigned i = 0; i < 4 - shift; ++i) {
233 if (!(write_mask & (1 << i)))
234 continue;
235 if (out_value[i + shift]->chan() < 4) {
236 ir = new AluInstr(op1_mov,
237 tmp[i + shift],
238 out_value[i + shift],
239 AluInstr::write);
240 } else if (value[i]->chan() < 4) {
241 ir = new AluInstr(op1_mov, tmp[i + shift], value[i], AluInstr::write);
242 } else
243 continue;
244 emit_instruction(ir);
245 }
246 ir->set_alu_flag(alu_last_instr);
247 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
248 MemRingOutInstr::mem_write_ind,
249 tmp,
250 4 * driver_location,
251 instr->num_components,
252 m_export_base[0]);
253 } else {
254
255 sfn_log << SfnLog::io << "None-streamout ";
256 bool need_copy = shift != 0;
257 if (!need_copy) {
258 for (int i = 0; i < 4; ++i) {
259 if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
260 need_copy = true;
261 break;
262 }
263 }
264 }
265
266 if (need_copy) {
267 auto tmp = value_factory().temp_vec4(pin_chgr);
268 for (unsigned i = 0; i < 4 - shift; ++i) {
269 if (out_value[i]->chan() < 4) {
270 ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
271 emit_instruction(ir);
272 }
273 }
274 ir->set_alu_flag(alu_last_instr);
275 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
276 MemRingOutInstr::mem_write_ind,
277 tmp,
278 4 * driver_location,
279 instr->num_components,
280 m_export_base[0]);
281 } else {
282 for (auto i = 0; i < 4; ++i)
283 out_value[i]->set_pin(pin_chgr);
284 m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring,
285 MemRingOutInstr::mem_write_ind,
286 out_value,
287 4 * driver_location,
288 instr->num_components,
289 m_export_base[0]);
290 }
291 }
292
293 return true;
294 }
295
296 bool
emit_load_per_vertex_input(nir_intrinsic_instr * instr)297 GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
298 {
299 auto dest = value_factory().dest_vec4(instr->def, pin_group);
300
301 RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
302 for (unsigned i = 0; i < instr->def.num_components; ++i) {
303 dest_swz[i] = i + nir_intrinsic_component(instr);
304 }
305
306 auto literal_index = nir_src_as_const_value(instr->src[0]);
307
308 if (!literal_index) {
309 sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
310 return false;
311 }
312 assert(literal_index->u32 < 6);
313 assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
314
315 EVTXDataFormat fmt =
316 chip_class() >= ISA_CC_EVERGREEN ? fmt_invalid : fmt_32_32_32_32_float;
317
318 auto addr = m_per_vertex_offsets[literal_index->u32];
319 auto fetch = new LoadFromBuffer(dest,
320 dest_swz,
321 addr,
322 16 * nir_intrinsic_base(instr),
323 R600_GS_RING_CONST_BUFFER,
324 nullptr,
325 fmt);
326
327 if (chip_class() >= ISA_CC_EVERGREEN)
328 fetch->set_fetch_flag(FetchInstr::use_const_field);
329
330 fetch->set_num_format(vtx_nf_norm);
331 fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
332
333 emit_instruction(fetch);
334 return true;
335 }
336
337 void
do_finalize()338 GeometryShader::do_finalize()
339 {
340 }
341
342 void
do_get_shader_info(r600_shader * sh_info)343 GeometryShader::do_get_shader_info(r600_shader *sh_info)
344 {
345 sh_info->processor_type = PIPE_SHADER_GEOMETRY;
346 sh_info->ring_item_sizes[0] = m_ring_item_sizes[0];
347 sh_info->cc_dist_mask = m_cc_dist_mask;
348 sh_info->clip_dist_write = m_clip_dist_write;
349 }
350
351 bool
read_prop(std::istream & is)352 GeometryShader::read_prop(std::istream& is)
353 {
354 (void)is;
355 return true;
356 }
357
358 void
do_print_properties(std::ostream & os) const359 GeometryShader::do_print_properties(std::ostream& os) const
360 {
361 (void)os;
362 }
363
364 void
emit_adj_fix()365 GeometryShader::emit_adj_fix()
366 {
367 auto adjhelp0 = value_factory().temp_register();
368
369 emit_instruction(new AluInstr(op2_and_int,
370 adjhelp0,
371 m_primitive_id,
372 value_factory().one_i(),
373 AluInstr::last_write));
374
375 int reg_indices[6];
376 int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
377
378 reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
379 reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
380
381 std::array<PRegister, 6> adjhelp;
382
383 AluInstr *ir = nullptr;
384 for (int i = 0; i < 6; i++) {
385 adjhelp[i] = value_factory().temp_register();
386 ir = new AluInstr(op3_cnde_int,
387 adjhelp[i],
388 adjhelp0,
389 m_per_vertex_offsets[i],
390 m_per_vertex_offsets[rotate_indices[i]],
391 AluInstr::write);
392
393 emit_instruction(ir);
394 }
395 ir->set_alu_flag(alu_last_instr);
396
397 for (int i = 0; i < 6; i++)
398 m_per_vertex_offsets[i] = adjhelp[i];
399 }
400
401 } // namespace r600
402