1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_shader.h"
8
9 #include "gallium/drivers/r600/r600_shader.h"
10 #include "nir.h"
11 #include "nir_intrinsics.h"
12 #include "nir_intrinsics_indices.h"
13 #include "sfn_debug.h"
14 #include "sfn_instr.h"
15 #include "sfn_instr_alu.h"
16 #include "sfn_instr_alugroup.h"
17 #include "sfn_instr_controlflow.h"
18 #include "sfn_instr_export.h"
19 #include "sfn_instr_fetch.h"
20 #include "sfn_instr_lds.h"
21 #include "sfn_instr_mem.h"
22 #include "sfn_instr_tex.h"
23 #include "sfn_liverangeevaluator.h"
24 #include "sfn_shader_cs.h"
25 #include "sfn_shader_fs.h"
26 #include "sfn_shader_gs.h"
27 #include "sfn_shader_tess.h"
28 #include "sfn_shader_vs.h"
29 #include "util/u_math.h"
30
31 #include <numeric>
32 #include <sstream>
33
34 namespace r600 {
35
36 using std::string;
37
38 void
print(std::ostream & os) const39 ShaderIO::print(std::ostream& os) const
40 {
41 os << m_type << " LOC:" << m_location;
42 if (m_varying_slot != NUM_TOTAL_VARYING_SLOTS)
43 os << " VARYING_SLOT:" << static_cast<int>(m_varying_slot);
44 if (m_no_varying)
45 os << " NO_VARYING";
46 do_print(os);
47 }
48
49 int
spi_sid() const50 ShaderIO::spi_sid() const
51 {
52 if (no_varying())
53 return 0;
54
55 switch (varying_slot()) {
56 case NUM_TOTAL_VARYING_SLOTS:
57 case VARYING_SLOT_POS:
58 case VARYING_SLOT_PSIZ:
59 case VARYING_SLOT_EDGE:
60 case VARYING_SLOT_FACE:
61 case VARYING_SLOT_CLIP_VERTEX:
62 return 0;
63 default:
64 static_assert(static_cast<int>(NUM_TOTAL_VARYING_SLOTS) <= 0x100 - 1,
65 "All varying slots plus 1 must be usable as 8-bit SPI semantic IDs");
66 return static_cast<int>(varying_slot()) + 1;
67 }
68 }
69
ShaderIO(const char * type,int loc,gl_varying_slot varying_slot)70 ShaderIO::ShaderIO(const char *type, int loc, gl_varying_slot varying_slot):
71 m_type(type),
72 m_location(loc),
73 m_varying_slot(varying_slot)
74 {
75 }
76
ShaderOutput(int location,int writemask,gl_varying_slot varying_slot)77 ShaderOutput::ShaderOutput(int location, int writemask, gl_varying_slot varying_slot):
78 ShaderIO("OUTPUT", location, varying_slot),
79 m_writemask(writemask)
80 {
81 }
82
ShaderOutput()83 ShaderOutput::ShaderOutput():
84 ShaderOutput(-1, 0)
85 {
86 }
87
88 void
do_print(std::ostream & os) const89 ShaderOutput::do_print(std::ostream& os) const
90 {
91 if (m_frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
92 os << " FRAG_RESULT:" << static_cast<int>(m_frag_result);
93 os << " MASK:" << m_writemask;
94 }
95
ShaderInput(int location,gl_varying_slot varying_slot)96 ShaderInput::ShaderInput(int location, gl_varying_slot varying_slot):
97 ShaderIO("INPUT", location, varying_slot)
98 {
99 }
100
ShaderInput()101 ShaderInput::ShaderInput():
102 ShaderInput(-1)
103 {
104 }
105
106 void
do_print(std::ostream & os) const107 ShaderInput::do_print(std::ostream& os) const
108 {
109 if (m_system_value != SYSTEM_VALUE_MAX)
110 os << " SYSVALUE: " << static_cast<int>(m_system_value);
111 if (m_interpolator)
112 os << " INTERP:" << m_interpolator;
113 if (m_interpolate_loc)
114 os << " ILOC:" << m_interpolate_loc;
115 if (m_uses_interpolate_at_centroid)
116 os << " USE_CENTROID";
117 }
118
119 void
set_interpolator(int interp,int interp_loc,bool uses_interpolate_at_centroid)120 ShaderInput::set_interpolator(int interp,
121 int interp_loc,
122 bool uses_interpolate_at_centroid)
123 {
124 m_interpolator = interp;
125 m_interpolate_loc = interp_loc;
126 m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
127 }
128
129 void
set_uses_interpolate_at_centroid()130 ShaderInput::set_uses_interpolate_at_centroid()
131 {
132 m_uses_interpolate_at_centroid = true;
133 }
134
135 int64_t Shader::s_next_shader_id = 1;
136
Shader(const char * type_id,unsigned atomic_base)137 Shader::Shader(const char *type_id, unsigned atomic_base):
138 m_current_block(nullptr),
139 m_type_id(type_id),
140 m_chip_class(ISA_CC_R600),
141 m_next_block(0),
142 m_atomic_base(atomic_base),
143 m_shader_id(s_next_shader_id++)
144 {
145 m_instr_factory = new InstrFactory();
146 m_chain_instr.this_shader = this;
147 start_new_block(0);
148 }
149
150 void
set_input_gpr(int driver_lcation,int gpr)151 Shader::set_input_gpr(int driver_lcation, int gpr)
152 {
153 auto i = m_inputs.find(driver_lcation);
154 assert(i != m_inputs.end());
155 i->second.set_gpr(gpr);
156 }
157
158 bool
add_info_from_string(std::istream & is)159 Shader::add_info_from_string(std::istream& is)
160 {
161 std::string type;
162 is >> type;
163
164 if (type == "CHIPCLASS")
165 return read_chipclass(is);
166 if (type == "FAMILY")
167 return read_family(is);
168 if (type == "OUTPUT")
169 return read_output(is);
170 if (type == "INPUT")
171 return read_input(is);
172 if (type == "PROP")
173 return read_prop(is);
174 if (type == "SYSVALUES")
175 return allocate_registers_from_string(is, pin_fully);
176 if (type == "REGISTERS")
177 return allocate_registers_from_string(is, pin_free);
178 if (type == "ARRAYS")
179 return allocate_arrays_from_string(is);
180
181 return false;
182 }
183
184 void
emit_instruction_from_string(const std::string & s)185 Shader::emit_instruction_from_string(const std::string& s)
186 {
187
188 sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
189 if (s == "BLOCK_START") {
190 if (!m_current_block->empty()) {
191 start_new_block(m_current_block->nesting_offset());
192 sfn_log << SfnLog::instr << " Emit start block\n";
193 }
194 return;
195 }
196
197 if (s == "BLOCK_END") {
198 return;
199 }
200
201 auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth(),
202 m_chip_class == ISA_CC_CAYMAN);
203 if (ir) {
204 emit_instruction(ir);
205 if (ir->end_block())
206 start_new_block(ir->nesting_offset());
207 sfn_log << SfnLog::instr << " " << *ir << "\n";
208 }
209 }
210
211 bool
read_output(std::istream & is)212 Shader::read_output(std::istream& is)
213 {
214 ShaderOutput output;
215
216 std::string token;
217 for (is >> token; !token.empty(); token.clear(), is >> token) {
218 int value;
219 if (int_from_string_with_prefix_optional(token, "LOC:", value))
220 output.set_location(value);
221 else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
222 output.set_varying_slot(static_cast<gl_varying_slot>(value));
223 else if (token == "NO_VARYING")
224 output.set_no_varying(true);
225 else if (int_from_string_with_prefix_optional(token, "FRAG_RESULT:", value))
226 output.set_frag_result(static_cast<gl_frag_result>(value));
227 else if (int_from_string_with_prefix_optional(token, "MASK:", value))
228 output.set_writemask(value);
229 else {
230 std::cerr << "Unknown parse value '" << token << "'";
231 assert(!"Unknown parse value in read_output");
232 }
233 }
234
235 add_output(output);
236 return true;
237 }
238
239 bool
read_input(std::istream & is)240 Shader::read_input(std::istream& is)
241 {
242 ShaderInput input;
243
244 int interp = 0;
245 int interp_loc = 0;
246 bool use_centroid = false;
247
248 std::string token;
249 for (is >> token; !token.empty(); token.clear(), is >> token) {
250 int value;
251 if (int_from_string_with_prefix_optional(token, "LOC:", value))
252 input.set_location(value);
253 else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
254 input.set_varying_slot(static_cast<gl_varying_slot>(value));
255 else if (token == "NO_VARYING")
256 input.set_no_varying(true);
257 else if (int_from_string_with_prefix_optional(token, "SYSVALUE:", value))
258 input.set_system_value(static_cast<gl_system_value>(value));
259 else if (int_from_string_with_prefix_optional(token, "INTERP:", interp))
260 ;
261 else if (int_from_string_with_prefix_optional(token, "ILOC:", interp_loc))
262 ;
263 else if (token == "USE_CENTROID")
264 use_centroid = true;
265 else {
266 std::cerr << "Unknown parse value '" << token << "'";
267 assert(!"Unknown parse value in read_input");
268 }
269 }
270
271 input.set_interpolator(interp, interp_loc, use_centroid);
272
273 add_input(input);
274 return true;
275 }
276
277 bool
allocate_registers_from_string(std::istream & is,Pin pin)278 Shader::allocate_registers_from_string(std::istream& is, Pin pin)
279 {
280 std::string line;
281 if (!std::getline(is, line))
282 return false;
283
284 std::istringstream iline(line);
285
286 while (!iline.eof()) {
287 string reg_str;
288 iline >> reg_str;
289
290 if (reg_str.empty())
291 break;
292
293 if (strchr(reg_str.c_str(), '@') ||
294 reg_str == "AR" ||
295 reg_str.substr(0,3) == "IDX") {
296 value_factory().dest_from_string(reg_str);
297 } else {
298 RegisterVec4::Swizzle swz = {0, 1, 2, 3};
299 auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
300 for (int i = 0; i < 4; ++i) {
301 if (swz[i] < 4 && pin == pin_fully) {
302 regs[i]->set_flag(Register::pin_start);
303 }
304 }
305 }
306 }
307 return true;
308 }
309
310 bool
allocate_arrays_from_string(std::istream & is)311 Shader::allocate_arrays_from_string(std::istream& is)
312 {
313 std::string line;
314 if (!std::getline(is, line))
315 return false;
316
317 std::istringstream iline(line);
318
319 while (!iline.eof()) {
320 string reg_str;
321 iline >> reg_str;
322
323 if (reg_str.empty())
324 break;
325
326 value_factory().array_from_string(reg_str);
327 }
328 return true;
329 }
330
331 bool
read_chipclass(std::istream & is)332 Shader::read_chipclass(std::istream& is)
333 {
334 string name;
335 is >> name;
336 if (name == "R600")
337 m_chip_class = ISA_CC_R600;
338 else if (name == "R700")
339 m_chip_class = ISA_CC_R700;
340 else if (name == "EVERGREEN")
341 m_chip_class = ISA_CC_EVERGREEN;
342 else if (name == "CAYMAN")
343 m_chip_class = ISA_CC_CAYMAN;
344 else
345 return false;
346 return true;
347 }
348
349 bool
read_family(std::istream & is)350 Shader::read_family(std::istream& is)
351 {
352 string name;
353 is >> name;
354 #define CHECK_FAMILY(F) if (name == #F) m_chip_family = CHIP_ ## F
355
356 CHECK_FAMILY(R600);
357 else CHECK_FAMILY(R600);
358 else CHECK_FAMILY(RV610);
359 else CHECK_FAMILY(RV630);
360 else CHECK_FAMILY(RV670);
361 else CHECK_FAMILY(RV620);
362 else CHECK_FAMILY(RV635);
363 else CHECK_FAMILY(RS780);
364 else CHECK_FAMILY(RS880);
365 /* GFX3 (R7xx) */
366 else CHECK_FAMILY(RV770);
367 else CHECK_FAMILY(RV730);
368 else CHECK_FAMILY(RV710);
369 else CHECK_FAMILY(RV740);
370 /* GFX4 (Evergreen) */
371 else CHECK_FAMILY(CEDAR);
372 else CHECK_FAMILY(REDWOOD);
373 else CHECK_FAMILY(JUNIPER);
374 else CHECK_FAMILY(CYPRESS);
375 else CHECK_FAMILY(HEMLOCK);
376 else CHECK_FAMILY(PALM);
377 else CHECK_FAMILY(SUMO);
378 else CHECK_FAMILY(SUMO2);
379 else CHECK_FAMILY(BARTS);
380 else CHECK_FAMILY(TURKS);
381 else CHECK_FAMILY(CAICOS);
382 /* GFX5 (Northern Islands) */
383 else CHECK_FAMILY(CAYMAN);
384 else CHECK_FAMILY(ARUBA);
385 else
386 return false;
387 return true;
388 }
389
390 void
allocate_reserved_registers()391 Shader::allocate_reserved_registers()
392 {
393 m_instr_factory->value_factory().set_virtual_register_base(0);
394 auto reserved_registers_end = do_allocate_reserved_registers();
395 m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
396 if (!m_atomics.empty()) {
397 m_atomic_update = value_factory().temp_register();
398 auto alu = new AluInstr(op1_mov,
399 m_atomic_update,
400 value_factory().one_i(),
401 AluInstr::last_write);
402 alu->set_alu_flag(alu_no_schedule_bias);
403 emit_instruction(alu);
404 }
405
406 if (m_flags.test(sh_needs_sbo_ret_address)) {
407 m_rat_return_address = value_factory().temp_register(0);
408 auto temp0 = value_factory().temp_register(0);
409 auto temp1 = value_factory().temp_register(1);
410 auto temp2 = value_factory().temp_register(2);
411
412 auto group = new AluGroup();
413 group->add_instruction(new AluInstr(
414 op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
415 group->add_instruction(new AluInstr(
416 op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
417 emit_instruction(group);
418 emit_instruction(new AluInstr(op3_muladd_uint24,
419 temp2,
420 value_factory().inline_const(ALU_SRC_SE_ID, 0),
421 value_factory().literal(256),
422 value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0),
423 {alu_write, alu_last_instr}));
424 emit_instruction(new AluInstr(op3_muladd_uint24,
425 m_rat_return_address,
426 temp2,
427 value_factory().literal(0x40),
428 temp0,
429 {alu_write, alu_last_instr}));
430 }
431 }
432
433 Shader *
translate_from_nir(nir_shader * nir,const pipe_stream_output_info * so_info,struct r600_shader * gs_shader,const r600_shader_key & key,r600_chip_class chip_class,radeon_family family)434 Shader::translate_from_nir(nir_shader *nir,
435 const pipe_stream_output_info *so_info,
436 struct r600_shader *gs_shader,
437 const r600_shader_key& key,
438 r600_chip_class chip_class,
439 radeon_family family)
440 {
441 Shader *shader = nullptr;
442
443 switch (nir->info.stage) {
444 case MESA_SHADER_FRAGMENT:
445 if (chip_class >= ISA_CC_EVERGREEN)
446 shader = new FragmentShaderEG(key);
447 else
448 shader = new FragmentShaderR600(key);
449 break;
450 case MESA_SHADER_VERTEX:
451 shader = new VertexShader(so_info, gs_shader, key);
452 break;
453 case MESA_SHADER_GEOMETRY:
454 shader = new GeometryShader(key);
455 break;
456 case MESA_SHADER_TESS_CTRL:
457 shader = new TCSShader(key);
458 break;
459 case MESA_SHADER_TESS_EVAL:
460 shader = new TESShader(so_info, gs_shader, key);
461 break;
462 case MESA_SHADER_KERNEL:
463 case MESA_SHADER_COMPUTE:
464 shader = new ComputeShader(key, BITSET_COUNT(nir->info.samplers_used));
465 break;
466 default:
467 return nullptr;
468 }
469
470 shader->set_info(nir);
471
472 shader->set_chip_class(chip_class);
473 shader->set_chip_family(family);
474
475 if (!shader->process(nir))
476 return nullptr;
477
478 return shader;
479 }
480
481 void
set_info(nir_shader * nir)482 Shader::set_info(nir_shader *nir)
483 {
484 m_scratch_size = nir->scratch_size;
485 }
486
487 ValueFactory&
value_factory()488 Shader::value_factory()
489 {
490 return m_instr_factory->value_factory();
491 }
492
493 bool
process(nir_shader * nir)494 Shader::process(nir_shader *nir)
495 {
496 m_ssbo_image_offset = nir->info.num_images;
497
498 if (nir->info.use_legacy_math_rules)
499 set_flag(sh_legacy_math_rules);
500
501 nir_foreach_uniform_variable(var, nir) scan_uniforms(var);
502
503 // at this point all functions should be inlined
504 const nir_function *func =
505 reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
506
507 if (!scan_shader(func))
508 return false;
509
510 allocate_reserved_registers();
511
512 value_factory().allocate_registers(m_register_allocations);
513 m_required_registers = value_factory().array_registers();
514
515 sfn_log << SfnLog::trans << "Process shader \n";
516 foreach_list_typed(nir_cf_node, node, node, &func->impl->body)
517 {
518 if (!process_cf_node(node))
519 return false;
520 }
521
522 finalize();
523
524 return true;
525 }
526
527 bool
scan_shader(const nir_function * func)528 Shader::scan_shader(const nir_function *func)
529 {
530
531 nir_foreach_block(block, func->impl)
532 {
533 nir_foreach_instr(instr, block)
534 {
535 if (!scan_instruction(instr)) {
536 fprintf(stderr, "Unhandled sysvalue access ");
537 nir_print_instr(instr, stderr);
538 fprintf(stderr, "\n");
539 return false;
540 }
541 }
542 }
543
544 int lds_pos = 0;
545 for (auto& [index, input] : m_inputs) {
546 if (input.need_lds_pos()) {
547 if (chip_class() < ISA_CC_EVERGREEN)
548 input.set_gpr(lds_pos);
549 input.set_lds_pos(lds_pos++);
550 }
551 }
552
553 int export_param = 0;
554 for (auto& [index, out] : m_outputs) {
555 if (out.spi_sid())
556 out.set_export_param(export_param++);
557 }
558
559 return true;
560 }
561
562 bool
scan_uniforms(nir_variable * uniform)563 Shader::scan_uniforms(nir_variable *uniform)
564 {
565 if (glsl_contains_atomic(uniform->type)) {
566 int natomics = glsl_atomic_size(uniform->type) / 4; /* ATOMIC_COUNTER_SIZE */
567 m_nhwatomic += natomics;
568
569 if (glsl_type_is_array(uniform->type))
570 m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
571
572 m_flags.set(sh_uses_atomics);
573
574 r600_shader_atomic atom = {0};
575
576 atom.buffer_id = uniform->data.binding;
577 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
578
579 atom.start = uniform->data.offset >> 2;
580 atom.end = atom.start + natomics - 1;
581
582 if (m_atomic_base_map.find(uniform->data.binding) == m_atomic_base_map.end())
583 m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
584
585 m_next_hwatomic_loc += natomics;
586
587 m_atomic_file_count += atom.end - atom.start + 1;
588
589 sfn_log << SfnLog::io << "HW_ATOMIC file count: " << m_atomic_file_count << "\n";
590
591 m_atomics.push_back(atom);
592 }
593
594 auto type = glsl_without_array(uniform->type);
595 if (glsl_type_is_image(type) || uniform->data.mode == nir_var_mem_ssbo) {
596 m_flags.set(sh_uses_images);
597 if (glsl_type_is_array(uniform->type) && !(uniform->data.mode == nir_var_mem_ssbo))
598 m_indirect_files |= 1 << TGSI_FILE_IMAGE;
599 }
600
601 return true;
602 }
603
604 bool
scan_instruction(nir_instr * instr)605 Shader::scan_instruction(nir_instr *instr)
606 {
607 if (do_scan_instruction(instr))
608 return true;
609
610 if (instr->type != nir_instr_type_intrinsic)
611 return true;
612
613 auto intr = nir_instr_as_intrinsic(instr);
614
615 // handle unhandled instructions
616 switch (intr->intrinsic) {
617 case nir_intrinsic_ssbo_atomic:
618 case nir_intrinsic_ssbo_atomic_swap:
619 case nir_intrinsic_image_load:
620 case nir_intrinsic_image_atomic:
621 case nir_intrinsic_image_atomic_swap:
622 m_flags.set(sh_needs_sbo_ret_address);
623 FALLTHROUGH;
624 case nir_intrinsic_image_store:
625 case nir_intrinsic_store_ssbo:
626 m_flags.set(sh_writes_memory);
627 m_flags.set(sh_uses_images);
628 break;
629 case nir_intrinsic_barrier:
630 m_chain_instr.prepare_mem_barrier |=
631 (nir_intrinsic_memory_modes(intr) &
632 (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) &&
633 nir_intrinsic_memory_scope(intr) != SCOPE_NONE);
634 break;
635 case nir_intrinsic_decl_reg:
636 m_register_allocations.push_back(intr);
637 break;
638 default:;
639 }
640 return true;
641 }
642
643 bool
process_cf_node(nir_cf_node * node)644 Shader::process_cf_node(nir_cf_node *node)
645 {
646 SFN_TRACE_FUNC(SfnLog::flow, "CF");
647
648 switch (node->type) {
649 case nir_cf_node_block:
650 return process_block(nir_cf_node_as_block(node));
651 case nir_cf_node_if:
652 return process_if(nir_cf_node_as_if(node));
653 case nir_cf_node_loop:
654 return process_loop(nir_cf_node_as_loop(node));
655 default:
656 return false;
657 }
658 }
659
660 static bool
child_block_empty(const exec_list & list)661 child_block_empty(const exec_list& list)
662 {
663 if (list.is_empty())
664 return true;
665
666 bool result = true;
667
668 foreach_list_typed(nir_cf_node, n, node, &list)
669 {
670
671 if (n->type == nir_cf_node_block) {
672 if (!nir_cf_node_as_block(n)->instr_list.is_empty())
673 return false;
674 }
675 if (n->type == nir_cf_node_if)
676 return false;
677 }
678 return result;
679 }
680
value_has_non_const_source(VirtualValue * value)681 static bool value_has_non_const_source(VirtualValue *value)
682 {
683 auto reg = value->as_register();
684 if (reg) {
685 // Non-ssa registers are probably the result of some control flow
686 // that makes the values non-uniform across the work group
687 if (!reg->has_flag(Register::ssa))
688 return true;
689
690 for (const auto& p : reg->parents()) {
691 auto alu = p->as_alu();
692 if (alu) {
693 for (auto& s : p->as_alu()->sources()) {
694 return value_has_non_const_source(s);
695 }
696 } else {
697 return true;
698 }
699 }
700 }
701 return false;
702 }
703
704 bool
process_if(nir_if * if_stmt)705 Shader::process_if(nir_if *if_stmt)
706 {
707 SFN_TRACE_FUNC(SfnLog::flow, "IF");
708
709 auto value = value_factory().src(if_stmt->condition, 0);
710
711 bool non_const_cond = value_has_non_const_source(value);
712
713 EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int :
714 op2_pred_setne_int;
715
716 AluInstr *pred = new AluInstr(op,
717 value_factory().temp_register(),
718 value,
719 value_factory().zero(),
720 AluInstr::last);
721 pred->set_alu_flag(alu_update_exec);
722 pred->set_alu_flag(alu_update_pred);
723 pred->set_cf_type(cf_alu_push_before);
724
725 IfInstr *ir = new IfInstr(pred);
726 emit_instruction(ir);
727 if (non_const_cond)
728 ++m_control_flow_depth;
729 start_new_block(1);
730
731 if (!child_block_empty(if_stmt->then_list)) {
732 foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
733 {
734 SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
735 if (!process_cf_node(n))
736 return false;
737 }
738 if (!child_block_empty(if_stmt->else_list)) {
739 if (!emit_control_flow(ControlFlowInstr::cf_else))
740 return false;
741 foreach_list_typed(nir_cf_node,
742 n,
743 node,
744 &if_stmt->else_list)
745 if (!process_cf_node(n)) return false;
746 }
747 } else {
748 assert(!child_block_empty(if_stmt->else_list));
749 foreach_list_typed(nir_cf_node,
750 n,
751 node,
752 &if_stmt->else_list)
753 if (!process_cf_node(n)) return false;
754 }
755
756 if (!emit_control_flow(ControlFlowInstr::cf_endif))
757 return false;
758
759 if (non_const_cond)
760 --m_control_flow_depth;
761
762 return true;
763 }
764
765 bool
emit_control_flow(ControlFlowInstr::CFType type)766 Shader::emit_control_flow(ControlFlowInstr::CFType type)
767 {
768 auto ir = new ControlFlowInstr(type);
769 emit_instruction(ir);
770 int depth = 0;
771 switch (type) {
772 case ControlFlowInstr::cf_loop_begin:
773 m_loops.push_back(ir);
774 m_nloops++;
775 depth = 1;
776 break;
777 case ControlFlowInstr::cf_loop_end:
778 m_loops.pop_back();
779 FALLTHROUGH;
780 case ControlFlowInstr::cf_endif:
781 depth = -1;
782 break;
783 default:;
784 }
785
786 start_new_block(depth);
787 return true;
788 }
789
790 bool
process_loop(nir_loop * node)791 Shader::process_loop(nir_loop *node)
792 {
793 assert(!nir_loop_has_continue_construct(node));
794 SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
795 if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
796 return false;
797
798 foreach_list_typed(nir_cf_node,
799 n,
800 node,
801 &node->body) if (!process_cf_node(n)) return false;
802
803 if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
804 return false;
805
806 return true;
807 }
808
809 bool
process_block(nir_block * block)810 Shader::process_block(nir_block *block)
811 {
812 SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
813
814 nir_foreach_instr(instr, block)
815 {
816 sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
817 bool r = process_instr(instr);
818 if (!r) {
819 sfn_log << SfnLog::err << "R600: Unsupported instruction: " << *instr << "\n";
820 return false;
821 }
822 }
823 return true;
824 }
825
826 bool
process_instr(nir_instr * instr)827 Shader::process_instr(nir_instr *instr)
828 {
829 return m_instr_factory->from_nir(instr, *this);
830 }
831
832 bool
emit_tex_fdd(const nir_intrinsic_instr * intr,int opcode,bool fine)833 Shader::emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine)
834 {
835 auto& value_factory_ = value_factory();
836
837 int ncomp = intr->def.num_components;
838 RegisterVec4::Swizzle src_swz = {7, 7, 7, 7};
839 RegisterVec4::Swizzle tmp_swz = {7, 7, 7, 7};
840 for (auto i = 0; i < ncomp; ++i) {
841 src_swz[i] = i;
842 tmp_swz[i] = i;
843 }
844
845 auto src = value_factory_.src_vec4(intr->src[0], pin_none, src_swz);
846
847 auto tmp = value_factory_.temp_vec4(pin_group, tmp_swz);
848 AluInstr *mv = nullptr;
849 for (int i = 0; i < ncomp; ++i) {
850 mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
851 emit_instruction(mv);
852 }
853 if (mv)
854 mv->set_alu_flag(alu_last_instr);
855
856 auto dst = value_factory_.dest_vec4(intr->def, pin_group);
857 RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7};
858 for (auto i = 0; i < ncomp; ++i) {
859 dst_swz[i] = i;
860 }
861
862 auto tex = new TexInstr((TexInstr::Opcode)opcode, dst, dst_swz, tmp, R600_MAX_CONST_BUFFERS, nullptr);
863
864 if (fine)
865 tex->set_tex_flag(TexInstr::grad_fine);
866
867 emit_instruction(tex);
868
869 return true;
870 }
871
872 bool
process_intrinsic(nir_intrinsic_instr * intr)873 Shader::process_intrinsic(nir_intrinsic_instr *intr)
874 {
875 if (process_stage_intrinsic(intr))
876 return true;
877
878 if (GDSInstr::emit_atomic_counter(intr, *this)) {
879 set_flag(sh_writes_memory);
880 return true;
881 }
882
883 if (RatInstr::emit(intr, *this))
884 return true;
885
886 switch (intr->intrinsic) {
887 case nir_intrinsic_store_output:
888 return store_output(intr);
889 case nir_intrinsic_load_input:
890 return load_input(intr);
891 case nir_intrinsic_load_ubo_vec4:
892 return load_ubo(intr);
893 case nir_intrinsic_store_scratch:
894 return emit_store_scratch(intr);
895 case nir_intrinsic_load_scratch:
896 return emit_load_scratch(intr);
897 case nir_intrinsic_store_local_shared_r600:
898 return emit_local_store(intr);
899 case nir_intrinsic_load_global:
900 case nir_intrinsic_load_global_constant:
901 return emit_load_global(intr);
902 case nir_intrinsic_load_local_shared_r600:
903 return emit_local_load(intr);
904 case nir_intrinsic_load_tcs_in_param_base_r600:
905 return emit_load_tcs_param_base(intr, 0);
906 case nir_intrinsic_load_tcs_out_param_base_r600:
907 return emit_load_tcs_param_base(intr, 16);
908 case nir_intrinsic_barrier:
909 return emit_barrier(intr);
910 case nir_intrinsic_shared_atomic:
911 case nir_intrinsic_shared_atomic_swap:
912 return emit_atomic_local_shared(intr);
913 case nir_intrinsic_shader_clock:
914 return emit_shader_clock(intr);
915 case nir_intrinsic_ddx:
916 case nir_intrinsic_ddx_coarse:
917 return emit_tex_fdd(intr, TexInstr::get_gradient_h, false);
918 case nir_intrinsic_ddx_fine:
919 return emit_tex_fdd(intr, TexInstr::get_gradient_h, true);
920 case nir_intrinsic_ddy:
921 case nir_intrinsic_ddy_coarse:
922 return emit_tex_fdd(intr, TexInstr::get_gradient_v, false);
923 case nir_intrinsic_ddy_fine:
924 return emit_tex_fdd(intr, TexInstr::get_gradient_v, true);
925 case nir_intrinsic_load_reg:
926 return emit_load_reg(intr);
927 case nir_intrinsic_load_reg_indirect:
928 return emit_load_reg_indirect(intr);
929 case nir_intrinsic_store_reg:
930 return emit_store_reg(intr);
931 case nir_intrinsic_store_reg_indirect:
932 return emit_store_reg_indirect(intr);
933 case nir_intrinsic_decl_reg:
934 // Registers and arrays are allocated at
935 // conversion startup time
936 return true;
937 default:
938 return false;
939 }
940 }
941
942 static ESDOp
lds_op_from_intrinsic(nir_atomic_op op,bool ret)943 lds_op_from_intrinsic(nir_atomic_op op, bool ret)
944 {
945 switch (op) {
946 case nir_atomic_op_iadd:
947 return ret ? LDS_ADD_RET : LDS_ADD;
948 case nir_atomic_op_iand:
949 return ret ? LDS_AND_RET : LDS_AND;
950 case nir_atomic_op_ior:
951 return ret ? LDS_OR_RET : LDS_OR;
952 case nir_atomic_op_imax:
953 return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
954 case nir_atomic_op_umax:
955 return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
956 case nir_atomic_op_imin:
957 return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
958 case nir_atomic_op_umin:
959 return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
960 case nir_atomic_op_ixor:
961 return ret ? LDS_XOR_RET : LDS_XOR;
962 case nir_atomic_op_xchg:
963 return LDS_XCHG_RET;
964 case nir_atomic_op_cmpxchg:
965 return LDS_CMP_XCHG_RET;
966 default:
967 unreachable("Unsupported shared atomic_op opcode");
968 }
969 }
970
971 PRegister
emit_load_to_register(PVirtualValue src,int chan)972 Shader::emit_load_to_register(PVirtualValue src, int chan)
973 {
974 assert(src);
975 PRegister dest = src->as_register();
976
977 if (!dest || chan >= 0) {
978 dest = value_factory().temp_register(chan);
979 dest->set_pin(pin_free);
980 emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
981 }
982 return dest;
983 }
984
985 // add visitor to resolve array and register
986 class RegisterAccessHandler : public RegisterVisitor {
987
988 public:
989 RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr);
990
visit(LocalArrayValue & value)991 void visit(LocalArrayValue& value) override {(void)value; assert(0);}
visit(UniformValue & value)992 void visit(UniformValue& value) override {(void)value; assert(0);}
visit(LiteralConstant & value)993 void visit(LiteralConstant& value) override {(void)value; assert(0);}
visit(InlineConstant & value)994 void visit(InlineConstant& value) override {(void)value; assert(0);}
995
996 Shader& sh;
997 nir_intrinsic_instr *ir;
998 PVirtualValue addr{nullptr};
999 bool success{true};
1000 };
1001
1002 class RegisterReadHandler : public RegisterAccessHandler {
1003
1004 public:
1005 using RegisterAccessHandler::RegisterAccessHandler;
1006 using RegisterAccessHandler::visit;
1007
1008 void visit(LocalArray& value) override;
1009 void visit(Register& value) override;
1010 };
1011
emit_load_reg(nir_intrinsic_instr * intr)1012 bool Shader::emit_load_reg(nir_intrinsic_instr *intr)
1013 {
1014 RegisterReadHandler visitor(*this, intr);
1015 auto handle = value_factory().src(intr->src[0], 0);
1016 handle->accept(visitor);
1017 return visitor.success;
1018 }
1019
emit_load_reg_indirect(nir_intrinsic_instr * intr)1020 bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr)
1021 {
1022 RegisterReadHandler visitor(*this, intr);
1023 visitor.addr = value_factory().src(intr->src[1], 0);
1024 auto handle = value_factory().src(intr->src[0], 0);
1025 handle->accept(visitor);
1026 return visitor.success;
1027 }
1028
1029 class RegisterWriteHandler : public RegisterAccessHandler {
1030
1031 public:
1032 using RegisterAccessHandler::RegisterAccessHandler;
1033 using RegisterAccessHandler::visit;
1034
1035 void visit(LocalArray& value) override;
1036 void visit(Register& value) override;
1037 };
1038
1039
emit_store_reg(nir_intrinsic_instr * intr)1040 bool Shader::emit_store_reg(nir_intrinsic_instr *intr)
1041 {
1042 RegisterWriteHandler visitor(*this, intr);
1043 auto handle = value_factory().src(intr->src[1], 0);
1044 handle->accept(visitor);
1045 return visitor.success;
1046 }
1047
emit_store_reg_indirect(nir_intrinsic_instr * intr)1048 bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr)
1049 {
1050 RegisterWriteHandler visitor(*this, intr);
1051 visitor.addr = value_factory().src(intr->src[2], 0);
1052
1053 auto handle = value_factory().src(intr->src[1], 0);
1054 handle->accept(visitor);
1055 return visitor.success;
1056 }
1057
RegisterAccessHandler(Shader & shader,nir_intrinsic_instr * intr)1058 RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr):
1059 sh(shader),
1060 ir(intr)
1061 {}
1062
visit(LocalArray & array)1063 void RegisterReadHandler::visit(LocalArray& array)
1064 {
1065 int slots = ir->def.bit_size / 32;
1066 auto pin = ir->def.num_components > 1 ? pin_none : pin_free;
1067 for (int i = 0; i < ir->def.num_components; ++i) {
1068 for (int s = 0; s < slots; ++s) {
1069 int chan = i * slots + s;
1070 auto dest = sh.value_factory().dest(ir->def, chan, pin);
1071 auto src = array.element(nir_intrinsic_base(ir), addr, chan);
1072 sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1073 }
1074 }
1075 }
1076
visit(Register & reg)1077 void RegisterReadHandler::visit(Register& reg)
1078 {
1079 auto dest = sh.value_factory().dest(ir->def, 0, pin_free);
1080 sh.emit_instruction(new AluInstr(op1_mov, dest, ®, AluInstr::write));
1081 }
1082
visit(LocalArray & array)1083 void RegisterWriteHandler::visit(LocalArray& array)
1084 {
1085 int writemask = nir_intrinsic_write_mask(ir);
1086 int slots = ir->src->ssa->bit_size / 32;
1087
1088 for (int i = 0; i < ir->num_components; ++i) {
1089 if (!(writemask & (1 << i)))
1090 continue;
1091 for (int s = 0; s < slots; ++s) {
1092 int chan = i * slots + s;
1093
1094 auto dest = array.element(nir_intrinsic_base(ir), addr, chan);
1095 auto src = sh.value_factory().src(ir->src[0], chan);
1096 sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1097 }
1098 }
1099 }
1100
visit(Register & dest)1101 void RegisterWriteHandler::visit(Register& dest)
1102 {
1103 int writemask = nir_intrinsic_write_mask(ir);
1104 assert(writemask == 1);
1105 auto src = sh.value_factory().src(ir->src[0], 0);
1106 sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write));
1107 }
1108
1109 bool
emit_atomic_local_shared(nir_intrinsic_instr * instr)1110 Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr)
1111 {
1112 bool uses_retval = !list_is_empty(&instr->def.uses);
1113
1114 auto& vf = value_factory();
1115
1116 auto dest_value = uses_retval ? vf.dest(instr->def, 0, pin_free) : nullptr;
1117
1118 auto op = lds_op_from_intrinsic(nir_intrinsic_atomic_op(instr), uses_retval);
1119
1120 /* For these two instructions we don't have opcodes that don't read back
1121 * the result, so we have to add a dummy-readback to remove the the return
1122 * value from read queue. */
1123 if (!uses_retval &&
1124 (op == LDS_XCHG_RET || op == LDS_CMP_XCHG_RET)) {
1125 dest_value = vf.dest(instr->def, 0, pin_free);
1126 }
1127
1128 auto address = vf.src(instr->src[0], 0);
1129
1130 AluInstr::SrcValues src;
1131 src.push_back(vf.src(instr->src[1], 0));
1132
1133 if (unlikely(instr->intrinsic == nir_intrinsic_shared_atomic_swap))
1134 src.push_back(vf.src(instr->src[2], 0));
1135 emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
1136 return true;
1137 }
1138
1139 auto
evaluate_resource_offset(nir_intrinsic_instr * instr,int src_id)1140 Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id)
1141 -> std::pair<int, PRegister>
1142 {
1143 auto& vf = value_factory();
1144
1145 PRegister uav_id{nullptr};
1146 int offset = nir_intrinsic_has_range_base(instr) ?
1147 nir_intrinsic_range_base(instr) : 0;
1148
1149 auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
1150 if (uav_id_const) {
1151 offset += uav_id_const->u32;
1152 } else {
1153 auto uav_id_val = vf.src(instr->src[src_id], 0);
1154 if (uav_id_val->as_register()) {
1155 uav_id = uav_id_val->as_register();
1156 } else {
1157 uav_id = vf.temp_register();
1158 emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write));
1159 }
1160 }
1161 return std::make_pair(offset, uav_id);
1162 }
1163
1164 bool
emit_store_scratch(nir_intrinsic_instr * intr)1165 Shader::emit_store_scratch(nir_intrinsic_instr *intr)
1166 {
1167 auto& vf = m_instr_factory->value_factory();
1168
1169 int writemask = nir_intrinsic_write_mask(intr);
1170
1171 RegisterVec4::Swizzle swz = {7, 7, 7, 7};
1172
1173 for (unsigned i = 0; i < intr->num_components; ++i)
1174 swz[i] = (1 << i) & writemask ? i : 7;
1175
1176 auto value = vf.temp_vec4(pin_group, swz);
1177 AluInstr *ir = nullptr;
1178 for (unsigned i = 0; i < intr->num_components; ++i) {
1179 if (value[i]->chan() < 4) {
1180 ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
1181 ir->set_alu_flag(alu_no_schedule_bias);
1182 emit_instruction(ir);
1183 }
1184 }
1185 if (!ir)
1186 return true;
1187
1188 ir->set_alu_flag(alu_last_instr);
1189
1190 auto address = vf.src(intr->src[1], 0);
1191
1192 int align = nir_intrinsic_align_mul(intr);
1193 int align_offset = nir_intrinsic_align_offset(intr);
1194
1195 ScratchIOInstr *ws_ir = nullptr;
1196
1197 int offset = -1;
1198 if (address->as_literal()) {
1199 offset = address->as_literal()->value();
1200 } else if (address->as_inline_const()) {
1201 auto il = address->as_inline_const();
1202 if (il->sel() == ALU_SRC_0)
1203 offset = 0;
1204 else if (il->sel() == ALU_SRC_1_INT)
1205 offset = 1;
1206 }
1207
1208 if (offset >= 0) {
1209 ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
1210 } else {
1211 auto addr_temp = vf.temp_register(0);
1212 auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
1213 load_addr->set_alu_flag(alu_no_schedule_bias);
1214 emit_instruction(load_addr);
1215
1216 ws_ir = new ScratchIOInstr(
1217 value, addr_temp, align, align_offset, writemask, m_scratch_size);
1218 }
1219 emit_instruction(ws_ir);
1220
1221 m_flags.set(sh_needs_scratch_space);
1222 return true;
1223 }
1224
1225 bool
emit_load_scratch(nir_intrinsic_instr * intr)1226 Shader::emit_load_scratch(nir_intrinsic_instr *intr)
1227 {
1228 auto addr = value_factory().src(intr->src[0], 0);
1229 auto dest = value_factory().dest_vec4(intr->def, pin_group);
1230
1231 if (chip_class() >= ISA_CC_R700) {
1232 RegisterVec4::Swizzle dest_swz = {7, 7, 7, 7};
1233
1234 for (unsigned i = 0; i < intr->num_components; ++i)
1235 dest_swz[i] = i;
1236
1237 auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
1238 emit_instruction(ir);
1239 chain_scratch_read(ir);
1240 } else {
1241 int align = nir_intrinsic_align_mul(intr);
1242 int align_offset = nir_intrinsic_align_offset(intr);
1243
1244 int offset = -1;
1245 if (addr->as_literal()) {
1246 offset = addr->as_literal()->value();
1247 } else if (addr->as_inline_const()) {
1248 auto il = addr->as_inline_const();
1249 if (il->sel() == ALU_SRC_0)
1250 offset = 0;
1251 else if (il->sel() == ALU_SRC_1_INT)
1252 offset = 1;
1253 }
1254
1255 ScratchIOInstr *ir = nullptr;
1256 if (offset >= 0) {
1257 ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
1258 } else {
1259 auto addr_temp = value_factory().temp_register(0);
1260 auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1261 load_addr->set_alu_flag(alu_no_schedule_bias);
1262 emit_instruction(load_addr);
1263
1264 ir = new ScratchIOInstr(
1265 dest, addr_temp, align, align_offset, 0xf, m_scratch_size, true);
1266 }
1267 emit_instruction(ir);
1268 }
1269
1270 m_flags.set(sh_needs_scratch_space);
1271
1272 return true;
1273 }
1274
emit_load_global(nir_intrinsic_instr * intr)1275 bool Shader::emit_load_global(nir_intrinsic_instr *intr)
1276 {
1277 auto dest = value_factory().dest_vec4(intr->def, pin_group);
1278
1279 auto src_value = value_factory().src(intr->src[0], 0);
1280 auto src = src_value->as_register();
1281 if (!src) {
1282 src = value_factory().temp_register();
1283 emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write));
1284 }
1285 auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32);
1286 load->set_mfc(4);
1287 load->set_num_format(vtx_nf_int);
1288 load->reset_fetch_flag(FetchInstr::format_comp_signed);
1289
1290 emit_instruction(load);
1291 return true;
1292 }
1293
1294 bool
emit_local_store(nir_intrinsic_instr * instr)1295 Shader::emit_local_store(nir_intrinsic_instr *instr)
1296 {
1297 unsigned write_mask = nir_intrinsic_write_mask(instr);
1298
1299 auto address = value_factory().src(instr->src[1], 0);
1300 int swizzle_base = 0;
1301 unsigned w = write_mask;
1302 while (!(w & 1)) {
1303 ++swizzle_base;
1304 w >>= 1;
1305 }
1306 write_mask = write_mask >> swizzle_base;
1307
1308 if ((write_mask & 3) != 3) {
1309 auto value = value_factory().src(instr->src[0], swizzle_base);
1310 emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1311 } else {
1312 auto value = value_factory().src(instr->src[0], swizzle_base);
1313 auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1314 emit_instruction(
1315 new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1316 }
1317 return true;
1318 }
1319
1320 bool
emit_local_load(nir_intrinsic_instr * instr)1321 Shader::emit_local_load(nir_intrinsic_instr *instr)
1322 {
1323 auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1324 auto dest_value = value_factory().dest_vec(instr->def, instr->num_components);
1325 emit_instruction(new LDSReadInstr(dest_value, address));
1326 return true;
1327 }
1328
1329 void
chain_scratch_read(Instr * instr)1330 Shader::chain_scratch_read(Instr *instr)
1331 {
1332 m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1333 }
1334
1335 void
chain_ssbo_read(Instr * instr)1336 Shader::chain_ssbo_read(Instr *instr)
1337 {
1338 m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1339 }
1340
1341 bool
emit_wait_ack()1342 Shader::emit_wait_ack()
1343 {
1344 start_new_block(0);
1345 emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1346 start_new_block(0);
1347 return true;
1348 }
1349
get_array_hash(const VirtualValue & value)1350 static uint32_t get_array_hash(const VirtualValue& value)
1351 {
1352 assert (value.pin() == pin_array);
1353 const LocalArrayValue& av = static_cast<const LocalArrayValue&>(value);
1354 return av.chan() | (av.array().base_sel() << 2);
1355 }
1356
visit(AluInstr * instr)1357 void Shader::InstructionChain::visit(AluInstr *instr)
1358 {
1359 if (instr->is_kill()) {
1360 last_kill_instr = instr;
1361
1362 // these instructions have side effects, they should
1363 // not be re-order with kill
1364 if (last_gds_instr)
1365 instr->add_required_instr(last_gds_instr);
1366
1367 if (last_ssbo_instr)
1368 instr->add_required_instr(last_ssbo_instr);
1369 }
1370
1371 /* Make sure array reads and writes depends on the last indirect access
1372 * so that we don't overwrite array elements too early */
1373
1374 if (auto d = instr->dest()) {
1375 if (d->pin() == pin_array) {
1376 if (d->addr()) {
1377 last_alu_with_indirect_reg[get_array_hash(*d)] = instr;
1378 return;
1379 }
1380 auto pos = last_alu_with_indirect_reg.find(get_array_hash(*d));
1381 if (pos != last_alu_with_indirect_reg.end()) {
1382 instr->add_required_instr(pos->second);
1383 }
1384 }
1385 }
1386
1387 for (auto& s : instr->sources()) {
1388 if (s->pin() == pin_array) {
1389 if (s->get_addr()) {
1390 last_alu_with_indirect_reg[get_array_hash(*s)] = instr;
1391 return;
1392 }
1393 auto pos = last_alu_with_indirect_reg.find(get_array_hash(*s));
1394 if (pos != last_alu_with_indirect_reg.end()) {
1395 instr->add_required_instr(pos->second);
1396 }
1397 }
1398 }
1399
1400 if (instr->has_lds_access()) {
1401 last_lds_access = instr;
1402 if (last_group_barrier)
1403 instr->add_required_instr(last_group_barrier);
1404 }
1405
1406 if (!instr->has_alu_flag(alu_is_lds) &&
1407 instr->opcode() == op0_group_barrier) {
1408 last_group_barrier = instr;
1409 if (last_lds_access)
1410 instr->add_required_instr(last_group_barrier);
1411 if (last_ssbo_instr)
1412 instr->add_required_instr(last_ssbo_instr);
1413 }
1414
1415 }
1416
1417 void
visit(ScratchIOInstr * instr)1418 Shader::InstructionChain::visit(ScratchIOInstr *instr)
1419 {
1420 apply(instr, &last_scratch_instr);
1421 }
1422
1423 void
visit(GDSInstr * instr)1424 Shader::InstructionChain::visit(GDSInstr *instr)
1425 {
1426 apply(instr, &last_gds_instr);
1427 Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1428 for (auto& loop : this_shader->m_loops) {
1429 loop->set_instr_flag(flag);
1430 }
1431 if (last_kill_instr)
1432 instr->add_required_instr(last_kill_instr);
1433
1434 }
1435
1436 void
visit(RatInstr * instr)1437 Shader::InstructionChain::visit(RatInstr *instr)
1438 {
1439 apply(instr, &last_ssbo_instr);
1440 Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1441 for (auto& loop : this_shader->m_loops) {
1442 loop->set_instr_flag(flag);
1443 }
1444
1445 if (prepare_mem_barrier)
1446 instr->set_ack();
1447
1448 if (this_shader->m_current_block->inc_rat_emitted() > 15)
1449 this_shader->start_new_block(0);
1450
1451 if (last_kill_instr)
1452 instr->add_required_instr(last_kill_instr);
1453
1454 if (last_group_barrier)
1455 instr->add_required_instr(last_group_barrier);
1456 }
1457
1458 void
apply(Instr * current,Instr ** last)1459 Shader::InstructionChain::apply(Instr *current, Instr **last)
1460 {
1461 if (*last)
1462 current->add_required_instr(*last);
1463 *last = current;
1464 }
1465
1466 void
emit_instruction(PInst instr)1467 Shader::emit_instruction(PInst instr)
1468 {
1469 sfn_log << SfnLog::instr << " " << *instr << "\n";
1470 instr->accept(m_chain_instr);
1471 m_current_block->push_back(instr);
1472 }
1473
1474 bool
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)1475 Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset)
1476 {
1477 auto src = value_factory().temp_register();
1478 emit_instruction(
1479 new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
1480
1481 auto dest = value_factory().dest_vec4(instr->def, pin_group);
1482 auto fetch = new LoadFromBuffer(dest,
1483 {0, 1, 2, 3},
1484 src,
1485 offset,
1486 R600_LDS_INFO_CONST_BUFFER,
1487 nullptr,
1488 fmt_32_32_32_32);
1489
1490 fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1491 emit_instruction(fetch);
1492
1493 return true;
1494 }
1495
1496 bool
emit_shader_clock(nir_intrinsic_instr * instr)1497 Shader::emit_shader_clock(nir_intrinsic_instr *instr)
1498 {
1499 auto& vf = value_factory();
1500 auto group = new AluGroup();
1501 group->add_instruction(new AluInstr(op1_mov,
1502 vf.dest(instr->def, 0, pin_chan),
1503 vf.inline_const(ALU_SRC_TIME_LO, 0),
1504 AluInstr::write));
1505 group->add_instruction(new AluInstr(op1_mov,
1506 vf.dest(instr->def, 1, pin_chan),
1507 vf.inline_const(ALU_SRC_TIME_HI, 0),
1508 AluInstr::last_write));
1509 emit_instruction(group);
1510 return true;
1511 }
1512
1513 bool
emit_group_barrier(nir_intrinsic_instr * intr)1514 Shader::emit_group_barrier(nir_intrinsic_instr *intr)
1515 {
1516 assert(m_control_flow_depth == 0);
1517 (void)intr;
1518 auto op = new AluInstr(op0_group_barrier, 0);
1519 op->set_alu_flag(alu_last_instr);
1520 emit_instruction(op);
1521 return true;
1522 }
1523
emit_barrier(nir_intrinsic_instr * intr)1524 bool Shader::emit_barrier(nir_intrinsic_instr *intr)
1525 {
1526
1527 if ((nir_intrinsic_execution_scope(intr) == SCOPE_WORKGROUP)) {
1528 if (!emit_group_barrier(intr))
1529 return false;
1530 }
1531
1532 /* We don't check nir_var_mem_shared because we don't emit a real barrier -
1533 * for this we need to implement GWS (Global Wave Sync).
1534 * Here we just emit a wait_ack - this is no real barrier,
1535 * it's just a wait for RAT writes to be finished (if they
1536 * are emitted with the _ACK opcode and the `mark` flag set - it
1537 * is very likely that WAIT_ACK is also only relevant for this
1538 * shader instance). */
1539 auto full_barrier_mem_modes = nir_var_mem_ssbo | nir_var_image | nir_var_mem_global;
1540
1541 if ((nir_intrinsic_memory_scope(intr) != SCOPE_NONE) &&
1542 (nir_intrinsic_memory_modes(intr) & full_barrier_mem_modes)) {
1543 return emit_wait_ack();
1544 }
1545
1546 return true;
1547 }
1548
1549 bool
load_ubo(nir_intrinsic_instr * instr)1550 Shader::load_ubo(nir_intrinsic_instr *instr)
1551 {
1552 auto bufid = nir_src_as_const_value(instr->src[0]);
1553 auto buf_offset = nir_src_as_const_value(instr->src[1]);
1554 auto base_id = nir_intrinsic_base(instr);
1555
1556 if (!buf_offset) {
1557 /* TODO: if bufid is constant then this can also be solved by using the
1558 * CF index on the ALU block, and this would probably make sense when
1559 * there are more then one loads with the same buffer ID. */
1560
1561 auto addr = value_factory().src(instr->src[1], 0)->as_register();
1562 RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
1563 auto dest = value_factory().dest_vec4(instr->def, pin_group);
1564
1565 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1566 dest_swz[i] = i + nir_intrinsic_component(instr);
1567 }
1568
1569 LoadFromBuffer *ir;
1570 if (bufid) {
1571 ir = new LoadFromBuffer(
1572 dest, dest_swz, addr, 0, bufid->u32, nullptr, fmt_32_32_32_32_float);
1573 } else {
1574 auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1575 ir = new LoadFromBuffer(
1576 dest, dest_swz, addr, 0, base_id, buffer_id, fmt_32_32_32_32_float);
1577 }
1578 emit_instruction(ir);
1579 return true;
1580 }
1581
1582 /* direct load using the constant cache */
1583 if (bufid) {
1584 int buf_cmp = nir_intrinsic_component(instr);
1585
1586 AluInstr *ir = nullptr;
1587 auto pin = instr->def.num_components == 1
1588 ? pin_free
1589 : pin_none;
1590 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1591
1592 sfn_log << SfnLog::io << "UBO[" << bufid << "] " << instr->def.index
1593 << " const[" << i << "]: " << instr->const_index[i] << "\n";
1594
1595 auto uniform =
1596 value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32);
1597 ir = new AluInstr(op1_mov,
1598 value_factory().dest(instr->def, i, pin),
1599 uniform,
1600 {alu_write});
1601 emit_instruction(ir);
1602 }
1603 if (ir)
1604 ir->set_alu_flag(alu_last_instr);
1605 return true;
1606 } else {
1607 int buf_cmp = nir_intrinsic_component(instr);
1608 AluInstr *ir = nullptr;
1609 auto kc_id = value_factory().src(instr->src[0], 0);
1610
1611 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1612 int cmp = buf_cmp + i;
1613 auto u =
1614 new UniformValue(512 + buf_offset->u32, cmp, kc_id, nir_intrinsic_base(instr));
1615 auto dest = value_factory().dest(instr->def, i, pin_none);
1616 ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
1617 emit_instruction(ir);
1618 }
1619 if (ir)
1620 ir->set_alu_flag(alu_last_instr);
1621 m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1622 return true;
1623 }
1624 }
1625
1626 void
start_new_block(int depth)1627 Shader::start_new_block(int depth)
1628 {
1629 int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1630 m_current_block = new Block(depth + depth_offset, m_next_block++);
1631 m_root.push_back(m_current_block);
1632 }
1633
1634 bool
emit_simple_mov(nir_def & def,int chan,PVirtualValue src,Pin pin)1635 Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin)
1636 {
1637 auto dst = value_factory().dest(def, chan, pin);
1638 emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1639 return true;
1640 }
1641
1642 void
print(std::ostream & os) const1643 Shader::print(std::ostream& os) const
1644 {
1645 print_header(os);
1646
1647 for (auto& [dummy, i] : m_inputs) {
1648 i.print(os);
1649 os << "\n";
1650 }
1651
1652 for (auto& [dummy, o] : m_outputs) {
1653 o.print(os);
1654 os << "\n";
1655 }
1656
1657 os << "SHADER\n";
1658 for (auto& b : m_root)
1659 b->print(os);
1660 }
1661
1662 const char *chip_class_names[] = {"R600", "R700", "EVERGREEN", "CAYMAN"};
1663
1664 void
print_header(std::ostream & os) const1665 Shader::print_header(std::ostream& os) const
1666 {
1667 assert(m_chip_class <= ISA_CC_CAYMAN);
1668 os << "Shader: " << m_shader_id << "\n";
1669 os << m_type_id << "\n";
1670 os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1671 print_properties(os);
1672 }
1673
1674 void
print_properties(std::ostream & os) const1675 Shader::print_properties(std::ostream& os) const
1676 {
1677 do_print_properties(os);
1678 }
1679
1680 bool
equal_to(const Shader & other) const1681 Shader::equal_to(const Shader& other) const
1682 {
1683 if (m_root.size() != other.m_root.size())
1684 return false;
1685 return std::inner_product(
1686 m_root.begin(),
1687 m_root.end(),
1688 other.m_root.begin(),
1689 true,
1690 [](bool lhs, bool rhs) { return lhs & rhs; },
1691 [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1692 return lhs->is_equal_to(*rhs);
1693 });
1694 }
1695
1696 void
get_shader_info(r600_shader * sh_info)1697 Shader::get_shader_info(r600_shader *sh_info)
1698 {
1699 sh_info->ninput = m_inputs.size();
1700 sh_info->nlds = 0;
1701 int input_array_array_loc = 0;
1702 for (auto& [index, info] : m_inputs) {
1703 r600_shader_io& io = sh_info->input[input_array_array_loc++];
1704
1705 io.varying_slot = info.varying_slot();
1706 io.system_value = info.system_value();
1707 io.gpr = info.gpr();
1708 io.spi_sid = info.spi_sid();
1709 io.ij_index = info.ij_index();
1710 io.interpolate = info.interpolator();
1711 io.interpolate_location = info.interpolate_loc();
1712 if (info.need_lds_pos()) {
1713 io.lds_pos = info.lds_pos();
1714 sh_info->nlds = MAX2(unsigned(info.lds_pos() + 1), sh_info->nlds);
1715 } else {
1716 io.lds_pos = 0;
1717 }
1718
1719 io.ring_offset = info.ring_offset();
1720 io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1721
1722 sfn_log << SfnLog::io << "Emit input [" << index << "]";
1723 if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1724 sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1725 if (io.system_value != SYSTEM_VALUE_MAX)
1726 sfn_log << " system_value:" << static_cast<int>(io.system_value);
1727 sfn_log << " spi_sid:" << io.spi_sid << "\n";
1728 assert(io.spi_sid >= 0);
1729 }
1730
1731 sh_info->noutput = m_outputs.size();
1732 /* VS is required to export at least one parameter. */
1733 sh_info->highest_export_param = 0;
1734 sh_info->num_loops = m_nloops;
1735 int output_array_array_loc = 0;
1736
1737 for (auto& [index, info] : m_outputs) {
1738 r600_shader_io& io = sh_info->output[output_array_array_loc++];
1739 io.varying_slot = info.varying_slot();
1740 io.frag_result = info.frag_result();
1741 io.gpr = info.gpr();
1742 io.spi_sid = info.spi_sid();
1743 io.write_mask = info.writemask();
1744 io.export_param = info.export_param();
1745 if (info.export_param() >= 0)
1746 sh_info->highest_export_param = MAX2(unsigned(info.export_param()),
1747 sh_info->highest_export_param);
1748
1749 sfn_log << SfnLog::io << "Emit output[" << index << "]";
1750 if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1751 sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1752 if (io.frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
1753 sfn_log << " frag_result:" << static_cast<int>(io.frag_result);
1754 sfn_log << " spi_sid:" << io.spi_sid << " write_mask:" << io.write_mask << "\n";
1755 assert(io.spi_sid >= 0);
1756 }
1757
1758 sh_info->nhwatomic = m_nhwatomic;
1759 sh_info->atomic_base = m_atomic_base;
1760 sh_info->nhwatomic_ranges = m_atomics.size();
1761 for (unsigned i = 0; i < m_atomics.size(); ++i)
1762 sh_info->atomics[i] = m_atomics[i];
1763
1764 if (m_flags.test(sh_indirect_const_file))
1765 sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1766
1767 if (m_flags.test(sh_indirect_atomic))
1768 sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1769
1770 sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1771
1772 value_factory().get_shader_info(sh_info);
1773
1774 sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1775 sh_info->uses_images = m_flags.test(sh_uses_images);
1776 sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1777 sh_info->disable_sb = m_flags.test(sh_disble_sb);
1778 sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1779 sh_info->indirect_files = m_indirect_files;
1780 do_get_shader_info(sh_info);
1781 }
1782
1783 PRegister
atomic_update()1784 Shader::atomic_update()
1785 {
1786 assert(m_atomic_update);
1787 return m_atomic_update;
1788 }
1789
1790 int
remap_atomic_base(int base)1791 Shader::remap_atomic_base(int base)
1792 {
1793 return m_atomic_base_map[base];
1794 }
1795
1796 void
do_get_shader_info(r600_shader * sh_info)1797 Shader::do_get_shader_info(r600_shader *sh_info)
1798 {
1799 sh_info->uses_atomics = m_nhwatomic > 0;
1800 }
1801
1802 const ShaderInput&
input(int base) const1803 Shader::input(int base) const
1804 {
1805 auto io = m_inputs.find(base);
1806 assert(io != m_inputs.end());
1807 return io->second;
1808 }
1809
1810 const ShaderOutput&
output(int base) const1811 Shader::output(int base) const
1812 {
1813 auto io = m_outputs.find(base);
1814 assert(io != m_outputs.end());
1815 return io->second;
1816 }
1817
1818 LiveRangeMap
prepare_live_range_map()1819 Shader::prepare_live_range_map()
1820 {
1821 return m_instr_factory->value_factory().prepare_live_range_map();
1822 }
1823
1824 void
reset_function(ShaderBlocks & new_root)1825 Shader::reset_function(ShaderBlocks& new_root)
1826 {
1827 std::swap(m_root, new_root);
1828 }
1829
1830 void
finalize()1831 Shader::finalize()
1832 {
1833 do_finalize();
1834 }
1835
1836 void
do_finalize()1837 Shader::do_finalize()
1838 {
1839 }
1840
1841 } // namespace r600
1842