xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_shader.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_shader.h"
8 
9 #include "gallium/drivers/r600/r600_shader.h"
10 #include "nir.h"
11 #include "nir_intrinsics.h"
12 #include "nir_intrinsics_indices.h"
13 #include "sfn_debug.h"
14 #include "sfn_instr.h"
15 #include "sfn_instr_alu.h"
16 #include "sfn_instr_alugroup.h"
17 #include "sfn_instr_controlflow.h"
18 #include "sfn_instr_export.h"
19 #include "sfn_instr_fetch.h"
20 #include "sfn_instr_lds.h"
21 #include "sfn_instr_mem.h"
22 #include "sfn_instr_tex.h"
23 #include "sfn_liverangeevaluator.h"
24 #include "sfn_shader_cs.h"
25 #include "sfn_shader_fs.h"
26 #include "sfn_shader_gs.h"
27 #include "sfn_shader_tess.h"
28 #include "sfn_shader_vs.h"
29 #include "util/u_math.h"
30 
31 #include <numeric>
32 #include <sstream>
33 
34 namespace r600 {
35 
36 using std::string;
37 
38 void
print(std::ostream & os) const39 ShaderIO::print(std::ostream& os) const
40 {
41    os << m_type << " LOC:" << m_location;
42    if (m_varying_slot != NUM_TOTAL_VARYING_SLOTS)
43       os << " VARYING_SLOT:" << static_cast<int>(m_varying_slot);
44    if (m_no_varying)
45       os << " NO_VARYING";
46    do_print(os);
47 }
48 
49 int
spi_sid() const50 ShaderIO::spi_sid() const
51 {
52    if (no_varying())
53       return 0;
54 
55    switch (varying_slot()) {
56    case NUM_TOTAL_VARYING_SLOTS:
57    case VARYING_SLOT_POS:
58    case VARYING_SLOT_PSIZ:
59    case VARYING_SLOT_EDGE:
60    case VARYING_SLOT_FACE:
61    case VARYING_SLOT_CLIP_VERTEX:
62       return 0;
63    default:
64       static_assert(static_cast<int>(NUM_TOTAL_VARYING_SLOTS) <= 0x100 - 1,
65                     "All varying slots plus 1 must be usable as 8-bit SPI semantic IDs");
66       return static_cast<int>(varying_slot()) + 1;
67    }
68 }
69 
ShaderIO(const char * type,int loc,gl_varying_slot varying_slot)70 ShaderIO::ShaderIO(const char *type, int loc, gl_varying_slot varying_slot):
71     m_type(type),
72     m_location(loc),
73     m_varying_slot(varying_slot)
74 {
75 }
76 
ShaderOutput(int location,int writemask,gl_varying_slot varying_slot)77 ShaderOutput::ShaderOutput(int location, int writemask, gl_varying_slot varying_slot):
78     ShaderIO("OUTPUT", location, varying_slot),
79     m_writemask(writemask)
80 {
81 }
82 
ShaderOutput()83 ShaderOutput::ShaderOutput():
84     ShaderOutput(-1, 0)
85 {
86 }
87 
88 void
do_print(std::ostream & os) const89 ShaderOutput::do_print(std::ostream& os) const
90 {
91    if (m_frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
92       os << " FRAG_RESULT:" << static_cast<int>(m_frag_result);
93    os << " MASK:" << m_writemask;
94 }
95 
ShaderInput(int location,gl_varying_slot varying_slot)96 ShaderInput::ShaderInput(int location, gl_varying_slot varying_slot):
97     ShaderIO("INPUT", location, varying_slot)
98 {
99 }
100 
ShaderInput()101 ShaderInput::ShaderInput():
102     ShaderInput(-1)
103 {
104 }
105 
106 void
do_print(std::ostream & os) const107 ShaderInput::do_print(std::ostream& os) const
108 {
109    if (m_system_value != SYSTEM_VALUE_MAX)
110       os << " SYSVALUE: " << static_cast<int>(m_system_value);
111    if (m_interpolator)
112       os << " INTERP:" << m_interpolator;
113    if (m_interpolate_loc)
114       os << " ILOC:" << m_interpolate_loc;
115    if (m_uses_interpolate_at_centroid)
116       os << " USE_CENTROID";
117 }
118 
119 void
set_interpolator(int interp,int interp_loc,bool uses_interpolate_at_centroid)120 ShaderInput::set_interpolator(int interp,
121                               int interp_loc,
122                               bool uses_interpolate_at_centroid)
123 {
124    m_interpolator = interp;
125    m_interpolate_loc = interp_loc;
126    m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
127 }
128 
129 void
set_uses_interpolate_at_centroid()130 ShaderInput::set_uses_interpolate_at_centroid()
131 {
132    m_uses_interpolate_at_centroid = true;
133 }
134 
135 int64_t Shader::s_next_shader_id = 1;
136 
Shader(const char * type_id,unsigned atomic_base)137 Shader::Shader(const char *type_id, unsigned atomic_base):
138     m_current_block(nullptr),
139     m_type_id(type_id),
140     m_chip_class(ISA_CC_R600),
141     m_next_block(0),
142     m_atomic_base(atomic_base),
143     m_shader_id(s_next_shader_id++)
144 {
145    m_instr_factory = new InstrFactory();
146    m_chain_instr.this_shader = this;
147    start_new_block(0);
148 }
149 
150 void
set_input_gpr(int driver_lcation,int gpr)151 Shader::set_input_gpr(int driver_lcation, int gpr)
152 {
153    auto i = m_inputs.find(driver_lcation);
154    assert(i != m_inputs.end());
155    i->second.set_gpr(gpr);
156 }
157 
158 bool
add_info_from_string(std::istream & is)159 Shader::add_info_from_string(std::istream& is)
160 {
161    std::string type;
162    is >> type;
163 
164    if (type == "CHIPCLASS")
165       return read_chipclass(is);
166    if (type == "FAMILY")
167       return read_family(is);
168    if (type == "OUTPUT")
169       return read_output(is);
170    if (type == "INPUT")
171       return read_input(is);
172    if (type == "PROP")
173       return read_prop(is);
174    if (type == "SYSVALUES")
175       return allocate_registers_from_string(is, pin_fully);
176    if (type == "REGISTERS")
177       return allocate_registers_from_string(is, pin_free);
178    if (type == "ARRAYS")
179       return allocate_arrays_from_string(is);
180 
181    return false;
182 }
183 
184 void
emit_instruction_from_string(const std::string & s)185 Shader::emit_instruction_from_string(const std::string& s)
186 {
187 
188    sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
189    if (s == "BLOCK_START") {
190       if (!m_current_block->empty()) {
191          start_new_block(m_current_block->nesting_offset());
192          sfn_log << SfnLog::instr << "   Emit start block\n";
193       }
194       return;
195    }
196 
197    if (s == "BLOCK_END") {
198       return;
199    }
200 
201    auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth(),
202                                           m_chip_class == ISA_CC_CAYMAN);
203    if (ir) {
204       emit_instruction(ir);
205       if (ir->end_block())
206          start_new_block(ir->nesting_offset());
207       sfn_log << SfnLog::instr << "   " << *ir << "\n";
208    }
209 }
210 
211 bool
read_output(std::istream & is)212 Shader::read_output(std::istream& is)
213 {
214    ShaderOutput output;
215 
216    std::string token;
217    for (is >> token; !token.empty(); token.clear(), is >> token) {
218       int value;
219       if (int_from_string_with_prefix_optional(token, "LOC:", value))
220          output.set_location(value);
221       else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
222          output.set_varying_slot(static_cast<gl_varying_slot>(value));
223       else if (token == "NO_VARYING")
224          output.set_no_varying(true);
225       else if (int_from_string_with_prefix_optional(token, "FRAG_RESULT:", value))
226          output.set_frag_result(static_cast<gl_frag_result>(value));
227       else if (int_from_string_with_prefix_optional(token, "MASK:", value))
228          output.set_writemask(value);
229       else {
230          std::cerr << "Unknown parse value '" << token << "'";
231          assert(!"Unknown parse value in read_output");
232       }
233    }
234 
235    add_output(output);
236    return true;
237 }
238 
239 bool
read_input(std::istream & is)240 Shader::read_input(std::istream& is)
241 {
242    ShaderInput input;
243 
244    int interp = 0;
245    int interp_loc = 0;
246    bool use_centroid = false;
247 
248    std::string token;
249    for (is >> token; !token.empty(); token.clear(), is >> token) {
250       int value;
251       if (int_from_string_with_prefix_optional(token, "LOC:", value))
252          input.set_location(value);
253       else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
254          input.set_varying_slot(static_cast<gl_varying_slot>(value));
255       else if (token == "NO_VARYING")
256          input.set_no_varying(true);
257       else if (int_from_string_with_prefix_optional(token, "SYSVALUE:", value))
258          input.set_system_value(static_cast<gl_system_value>(value));
259       else if (int_from_string_with_prefix_optional(token, "INTERP:", interp))
260          ;
261       else if (int_from_string_with_prefix_optional(token, "ILOC:", interp_loc))
262          ;
263       else if (token == "USE_CENTROID")
264          use_centroid = true;
265       else {
266          std::cerr << "Unknown parse value '" << token << "'";
267          assert(!"Unknown parse value in read_input");
268       }
269    }
270 
271    input.set_interpolator(interp, interp_loc, use_centroid);
272 
273    add_input(input);
274    return true;
275 }
276 
277 bool
allocate_registers_from_string(std::istream & is,Pin pin)278 Shader::allocate_registers_from_string(std::istream& is, Pin pin)
279 {
280    std::string line;
281    if (!std::getline(is, line))
282       return false;
283 
284    std::istringstream iline(line);
285 
286    while (!iline.eof()) {
287       string reg_str;
288       iline >> reg_str;
289 
290       if (reg_str.empty())
291          break;
292 
293       if (strchr(reg_str.c_str(), '@') ||
294           reg_str == "AR" ||
295           reg_str.substr(0,3) == "IDX") {
296          value_factory().dest_from_string(reg_str);
297       } else {
298          RegisterVec4::Swizzle swz = {0, 1, 2, 3};
299          auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
300          for (int i = 0; i < 4; ++i) {
301             if (swz[i] < 4 && pin == pin_fully) {
302                regs[i]->set_flag(Register::pin_start);
303             }
304          }
305       }
306    }
307    return true;
308 }
309 
310 bool
allocate_arrays_from_string(std::istream & is)311 Shader::allocate_arrays_from_string(std::istream& is)
312 {
313    std::string line;
314    if (!std::getline(is, line))
315       return false;
316 
317    std::istringstream iline(line);
318 
319    while (!iline.eof()) {
320       string reg_str;
321       iline >> reg_str;
322 
323       if (reg_str.empty())
324          break;
325 
326       value_factory().array_from_string(reg_str);
327    }
328    return true;
329 }
330 
331 bool
read_chipclass(std::istream & is)332 Shader::read_chipclass(std::istream& is)
333 {
334    string name;
335    is >> name;
336    if (name == "R600")
337       m_chip_class = ISA_CC_R600;
338    else if (name == "R700")
339       m_chip_class = ISA_CC_R700;
340    else if (name == "EVERGREEN")
341       m_chip_class = ISA_CC_EVERGREEN;
342    else if (name == "CAYMAN")
343       m_chip_class = ISA_CC_CAYMAN;
344    else
345       return false;
346    return true;
347 }
348 
349 bool
read_family(std::istream & is)350 Shader::read_family(std::istream& is)
351 {
352    string name;
353    is >> name;
354 #define CHECK_FAMILY(F) if (name == #F) m_chip_family = CHIP_ ## F
355 
356    CHECK_FAMILY(R600);
357    else CHECK_FAMILY(R600);
358    else CHECK_FAMILY(RV610);
359    else CHECK_FAMILY(RV630);
360    else CHECK_FAMILY(RV670);
361    else CHECK_FAMILY(RV620);
362    else CHECK_FAMILY(RV635);
363    else CHECK_FAMILY(RS780);
364    else CHECK_FAMILY(RS880);
365    /* GFX3 (R7xx) */
366    else CHECK_FAMILY(RV770);
367    else CHECK_FAMILY(RV730);
368    else CHECK_FAMILY(RV710);
369    else CHECK_FAMILY(RV740);
370    /* GFX4 (Evergreen) */
371    else CHECK_FAMILY(CEDAR);
372    else CHECK_FAMILY(REDWOOD);
373    else CHECK_FAMILY(JUNIPER);
374    else CHECK_FAMILY(CYPRESS);
375    else CHECK_FAMILY(HEMLOCK);
376    else CHECK_FAMILY(PALM);
377    else CHECK_FAMILY(SUMO);
378    else CHECK_FAMILY(SUMO2);
379    else CHECK_FAMILY(BARTS);
380    else CHECK_FAMILY(TURKS);
381    else CHECK_FAMILY(CAICOS);
382    /* GFX5 (Northern Islands) */
383    else CHECK_FAMILY(CAYMAN);
384    else CHECK_FAMILY(ARUBA);
385    else
386       return false;
387    return true;
388 }
389 
390 void
allocate_reserved_registers()391 Shader::allocate_reserved_registers()
392 {
393    m_instr_factory->value_factory().set_virtual_register_base(0);
394    auto reserved_registers_end = do_allocate_reserved_registers();
395    m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
396    if (!m_atomics.empty()) {
397       m_atomic_update = value_factory().temp_register();
398       auto alu = new AluInstr(op1_mov,
399                               m_atomic_update,
400                               value_factory().one_i(),
401                               AluInstr::last_write);
402       alu->set_alu_flag(alu_no_schedule_bias);
403       emit_instruction(alu);
404    }
405 
406    if (m_flags.test(sh_needs_sbo_ret_address)) {
407       m_rat_return_address = value_factory().temp_register(0);
408       auto temp0 = value_factory().temp_register(0);
409       auto temp1 = value_factory().temp_register(1);
410       auto temp2 = value_factory().temp_register(2);
411 
412       auto group = new AluGroup();
413       group->add_instruction(new AluInstr(
414          op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
415       group->add_instruction(new AluInstr(
416          op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
417       emit_instruction(group);
418       emit_instruction(new AluInstr(op3_muladd_uint24,
419                                     temp2,
420                                     value_factory().inline_const(ALU_SRC_SE_ID, 0),
421                                     value_factory().literal(256),
422                                     value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0),
423                                     {alu_write, alu_last_instr}));
424       emit_instruction(new AluInstr(op3_muladd_uint24,
425                                     m_rat_return_address,
426                                     temp2,
427                                     value_factory().literal(0x40),
428                                     temp0,
429                                     {alu_write, alu_last_instr}));
430    }
431 }
432 
433 Shader *
translate_from_nir(nir_shader * nir,const pipe_stream_output_info * so_info,struct r600_shader * gs_shader,const r600_shader_key & key,r600_chip_class chip_class,radeon_family family)434 Shader::translate_from_nir(nir_shader *nir,
435                            const pipe_stream_output_info *so_info,
436                            struct r600_shader *gs_shader,
437                            const r600_shader_key& key,
438                            r600_chip_class chip_class,
439                            radeon_family family)
440 {
441    Shader *shader = nullptr;
442 
443    switch (nir->info.stage) {
444    case MESA_SHADER_FRAGMENT:
445       if (chip_class >= ISA_CC_EVERGREEN)
446          shader = new FragmentShaderEG(key);
447       else
448          shader = new FragmentShaderR600(key);
449       break;
450    case MESA_SHADER_VERTEX:
451       shader = new VertexShader(so_info, gs_shader, key);
452       break;
453    case MESA_SHADER_GEOMETRY:
454       shader = new GeometryShader(key);
455       break;
456    case MESA_SHADER_TESS_CTRL:
457       shader = new TCSShader(key);
458       break;
459    case MESA_SHADER_TESS_EVAL:
460       shader = new TESShader(so_info, gs_shader, key);
461       break;
462    case MESA_SHADER_KERNEL:
463    case MESA_SHADER_COMPUTE:
464       shader = new ComputeShader(key, BITSET_COUNT(nir->info.samplers_used));
465       break;
466    default:
467       return nullptr;
468    }
469 
470    shader->set_info(nir);
471 
472    shader->set_chip_class(chip_class);
473    shader->set_chip_family(family);
474 
475    if (!shader->process(nir))
476       return nullptr;
477 
478    return shader;
479 }
480 
481 void
set_info(nir_shader * nir)482 Shader::set_info(nir_shader *nir)
483 {
484    m_scratch_size = nir->scratch_size;
485 }
486 
487 ValueFactory&
value_factory()488 Shader::value_factory()
489 {
490    return m_instr_factory->value_factory();
491 }
492 
493 bool
process(nir_shader * nir)494 Shader::process(nir_shader *nir)
495 {
496    m_ssbo_image_offset = nir->info.num_images;
497 
498    if (nir->info.use_legacy_math_rules)
499       set_flag(sh_legacy_math_rules);
500 
501    nir_foreach_uniform_variable(var, nir) scan_uniforms(var);
502 
503    // at this point all functions should be inlined
504    const nir_function *func =
505       reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
506 
507    if (!scan_shader(func))
508       return false;
509 
510    allocate_reserved_registers();
511 
512    value_factory().allocate_registers(m_register_allocations);
513    m_required_registers = value_factory().array_registers();
514 
515    sfn_log << SfnLog::trans << "Process shader \n";
516    foreach_list_typed(nir_cf_node, node, node, &func->impl->body)
517    {
518       if (!process_cf_node(node))
519          return false;
520    }
521 
522    finalize();
523 
524    return true;
525 }
526 
527 bool
scan_shader(const nir_function * func)528 Shader::scan_shader(const nir_function *func)
529 {
530 
531    nir_foreach_block(block, func->impl)
532    {
533       nir_foreach_instr(instr, block)
534       {
535          if (!scan_instruction(instr)) {
536             fprintf(stderr, "Unhandled sysvalue access ");
537             nir_print_instr(instr, stderr);
538             fprintf(stderr, "\n");
539             return false;
540          }
541       }
542    }
543 
544    int lds_pos = 0;
545    for (auto& [index, input] : m_inputs) {
546       if (input.need_lds_pos()) {
547          if (chip_class() < ISA_CC_EVERGREEN)
548             input.set_gpr(lds_pos);
549          input.set_lds_pos(lds_pos++);
550       }
551    }
552 
553    int export_param = 0;
554    for (auto& [index, out] : m_outputs) {
555       if (out.spi_sid())
556          out.set_export_param(export_param++);
557    }
558 
559    return true;
560 }
561 
562 bool
scan_uniforms(nir_variable * uniform)563 Shader::scan_uniforms(nir_variable *uniform)
564 {
565    if (glsl_contains_atomic(uniform->type)) {
566       int natomics = glsl_atomic_size(uniform->type) / 4; /* ATOMIC_COUNTER_SIZE */
567       m_nhwatomic += natomics;
568 
569       if (glsl_type_is_array(uniform->type))
570          m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
571 
572       m_flags.set(sh_uses_atomics);
573 
574       r600_shader_atomic atom = {0};
575 
576       atom.buffer_id = uniform->data.binding;
577       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
578 
579       atom.start = uniform->data.offset >> 2;
580       atom.end = atom.start + natomics - 1;
581 
582       if (m_atomic_base_map.find(uniform->data.binding) == m_atomic_base_map.end())
583          m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
584 
585       m_next_hwatomic_loc += natomics;
586 
587       m_atomic_file_count += atom.end - atom.start + 1;
588 
589       sfn_log << SfnLog::io << "HW_ATOMIC file count: " << m_atomic_file_count << "\n";
590 
591       m_atomics.push_back(atom);
592    }
593 
594    auto type = glsl_without_array(uniform->type);
595    if (glsl_type_is_image(type) || uniform->data.mode == nir_var_mem_ssbo) {
596       m_flags.set(sh_uses_images);
597       if (glsl_type_is_array(uniform->type) && !(uniform->data.mode == nir_var_mem_ssbo))
598          m_indirect_files |= 1 << TGSI_FILE_IMAGE;
599    }
600 
601    return true;
602 }
603 
604 bool
scan_instruction(nir_instr * instr)605 Shader::scan_instruction(nir_instr *instr)
606 {
607    if (do_scan_instruction(instr))
608       return true;
609 
610    if (instr->type != nir_instr_type_intrinsic)
611       return true;
612 
613    auto intr = nir_instr_as_intrinsic(instr);
614 
615    // handle unhandled instructions
616    switch (intr->intrinsic) {
617    case nir_intrinsic_ssbo_atomic:
618    case nir_intrinsic_ssbo_atomic_swap:
619    case nir_intrinsic_image_load:
620    case nir_intrinsic_image_atomic:
621    case nir_intrinsic_image_atomic_swap:
622       m_flags.set(sh_needs_sbo_ret_address);
623       FALLTHROUGH;
624    case nir_intrinsic_image_store:
625    case nir_intrinsic_store_ssbo:
626       m_flags.set(sh_writes_memory);
627       m_flags.set(sh_uses_images);
628       break;
629    case nir_intrinsic_barrier:
630       m_chain_instr.prepare_mem_barrier |=
631             (nir_intrinsic_memory_modes(intr) &
632              (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) &&
633              nir_intrinsic_memory_scope(intr) != SCOPE_NONE);
634       break;
635    case nir_intrinsic_decl_reg:
636       m_register_allocations.push_back(intr);
637       break;
638    default:;
639    }
640    return true;
641 }
642 
643 bool
process_cf_node(nir_cf_node * node)644 Shader::process_cf_node(nir_cf_node *node)
645 {
646    SFN_TRACE_FUNC(SfnLog::flow, "CF");
647 
648    switch (node->type) {
649    case nir_cf_node_block:
650       return process_block(nir_cf_node_as_block(node));
651    case nir_cf_node_if:
652       return process_if(nir_cf_node_as_if(node));
653    case nir_cf_node_loop:
654       return process_loop(nir_cf_node_as_loop(node));
655    default:
656       return false;
657    }
658 }
659 
660 static bool
child_block_empty(const exec_list & list)661 child_block_empty(const exec_list& list)
662 {
663    if (list.is_empty())
664       return true;
665 
666    bool result = true;
667 
668    foreach_list_typed(nir_cf_node, n, node, &list)
669    {
670 
671       if (n->type == nir_cf_node_block) {
672          if (!nir_cf_node_as_block(n)->instr_list.is_empty())
673             return false;
674       }
675       if (n->type == nir_cf_node_if)
676          return false;
677    }
678    return result;
679 }
680 
value_has_non_const_source(VirtualValue * value)681 static bool value_has_non_const_source(VirtualValue *value)
682 {
683    auto reg = value->as_register();
684    if (reg) {
685       // Non-ssa registers are probably the result of some control flow
686       // that makes the values non-uniform across the work group
687       if (!reg->has_flag(Register::ssa))
688          return true;
689 
690       for (const auto& p : reg->parents()) {
691          auto alu = p->as_alu();
692          if (alu) {
693             for (auto& s : p->as_alu()->sources()) {
694                return value_has_non_const_source(s);
695             }
696          } else {
697             return true;
698          }
699       }
700    }
701    return false;
702 }
703 
704 bool
process_if(nir_if * if_stmt)705 Shader::process_if(nir_if *if_stmt)
706 {
707    SFN_TRACE_FUNC(SfnLog::flow, "IF");
708 
709    auto value = value_factory().src(if_stmt->condition, 0);
710 
711    bool non_const_cond = value_has_non_const_source(value);
712 
713    EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int :
714                                                        op2_pred_setne_int;
715 
716    AluInstr *pred = new AluInstr(op,
717                                  value_factory().temp_register(),
718                                  value,
719                                  value_factory().zero(),
720                                  AluInstr::last);
721    pred->set_alu_flag(alu_update_exec);
722    pred->set_alu_flag(alu_update_pred);
723    pred->set_cf_type(cf_alu_push_before);
724 
725    IfInstr *ir = new IfInstr(pred);
726    emit_instruction(ir);
727    if (non_const_cond)
728       ++m_control_flow_depth;
729    start_new_block(1);
730 
731    if (!child_block_empty(if_stmt->then_list)) {
732       foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
733       {
734          SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
735          if (!process_cf_node(n))
736             return false;
737       }
738       if (!child_block_empty(if_stmt->else_list)) {
739          if (!emit_control_flow(ControlFlowInstr::cf_else))
740             return false;
741          foreach_list_typed(nir_cf_node,
742                             n,
743                             node,
744                             &if_stmt->else_list)
745                if (!process_cf_node(n)) return false;
746       }
747    } else {
748       assert(!child_block_empty(if_stmt->else_list));
749       foreach_list_typed(nir_cf_node,
750                          n,
751                          node,
752                          &if_stmt->else_list)
753             if (!process_cf_node(n)) return false;
754    }
755 
756    if (!emit_control_flow(ControlFlowInstr::cf_endif))
757       return false;
758 
759    if (non_const_cond)
760       --m_control_flow_depth;
761 
762    return true;
763 }
764 
765 bool
emit_control_flow(ControlFlowInstr::CFType type)766 Shader::emit_control_flow(ControlFlowInstr::CFType type)
767 {
768    auto ir = new ControlFlowInstr(type);
769    emit_instruction(ir);
770    int depth = 0;
771    switch (type) {
772    case ControlFlowInstr::cf_loop_begin:
773       m_loops.push_back(ir);
774       m_nloops++;
775       depth = 1;
776       break;
777    case ControlFlowInstr::cf_loop_end:
778       m_loops.pop_back();
779       FALLTHROUGH;
780    case ControlFlowInstr::cf_endif:
781       depth = -1;
782       break;
783    default:;
784    }
785 
786    start_new_block(depth);
787    return true;
788 }
789 
790 bool
process_loop(nir_loop * node)791 Shader::process_loop(nir_loop *node)
792 {
793    assert(!nir_loop_has_continue_construct(node));
794    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
795    if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
796       return false;
797 
798    foreach_list_typed(nir_cf_node,
799                       n,
800                       node,
801                       &node->body) if (!process_cf_node(n)) return false;
802 
803    if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
804       return false;
805 
806    return true;
807 }
808 
809 bool
process_block(nir_block * block)810 Shader::process_block(nir_block *block)
811 {
812    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
813 
814    nir_foreach_instr(instr, block)
815    {
816       sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
817       bool r = process_instr(instr);
818       if (!r) {
819          sfn_log << SfnLog::err << "R600: Unsupported instruction: " << *instr << "\n";
820          return false;
821       }
822    }
823    return true;
824 }
825 
826 bool
process_instr(nir_instr * instr)827 Shader::process_instr(nir_instr *instr)
828 {
829    return m_instr_factory->from_nir(instr, *this);
830 }
831 
832 bool
emit_tex_fdd(const nir_intrinsic_instr * intr,int opcode,bool fine)833 Shader::emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine)
834 {
835    auto& value_factory_ = value_factory();
836 
837    int ncomp = intr->def.num_components;
838    RegisterVec4::Swizzle src_swz = {7, 7, 7, 7};
839    RegisterVec4::Swizzle tmp_swz = {7, 7, 7, 7};
840    for (auto i = 0; i < ncomp; ++i) {
841       src_swz[i] = i;
842       tmp_swz[i] = i;
843    }
844 
845    auto src = value_factory_.src_vec4(intr->src[0], pin_none, src_swz);
846 
847    auto tmp = value_factory_.temp_vec4(pin_group, tmp_swz);
848    AluInstr *mv = nullptr;
849    for (int i = 0; i < ncomp; ++i) {
850       mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
851       emit_instruction(mv);
852    }
853    if (mv)
854       mv->set_alu_flag(alu_last_instr);
855 
856    auto dst = value_factory_.dest_vec4(intr->def, pin_group);
857    RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7};
858    for (auto i = 0; i < ncomp; ++i) {
859       dst_swz[i] = i;
860    }
861 
862    auto tex = new TexInstr((TexInstr::Opcode)opcode, dst, dst_swz, tmp, R600_MAX_CONST_BUFFERS, nullptr);
863 
864    if (fine)
865       tex->set_tex_flag(TexInstr::grad_fine);
866 
867    emit_instruction(tex);
868 
869    return true;
870 }
871 
872 bool
process_intrinsic(nir_intrinsic_instr * intr)873 Shader::process_intrinsic(nir_intrinsic_instr *intr)
874 {
875    if (process_stage_intrinsic(intr))
876       return true;
877 
878    if (GDSInstr::emit_atomic_counter(intr, *this)) {
879       set_flag(sh_writes_memory);
880       return true;
881    }
882 
883    if (RatInstr::emit(intr, *this))
884       return true;
885 
886    switch (intr->intrinsic) {
887    case nir_intrinsic_store_output:
888       return store_output(intr);
889    case nir_intrinsic_load_input:
890       return load_input(intr);
891    case nir_intrinsic_load_ubo_vec4:
892       return load_ubo(intr);
893    case nir_intrinsic_store_scratch:
894       return emit_store_scratch(intr);
895    case nir_intrinsic_load_scratch:
896       return emit_load_scratch(intr);
897    case nir_intrinsic_store_local_shared_r600:
898       return emit_local_store(intr);
899    case nir_intrinsic_load_global:
900    case nir_intrinsic_load_global_constant:
901       return emit_load_global(intr);
902    case nir_intrinsic_load_local_shared_r600:
903       return emit_local_load(intr);
904    case nir_intrinsic_load_tcs_in_param_base_r600:
905       return emit_load_tcs_param_base(intr, 0);
906    case nir_intrinsic_load_tcs_out_param_base_r600:
907       return emit_load_tcs_param_base(intr, 16);
908    case nir_intrinsic_barrier:
909       return emit_barrier(intr);
910    case nir_intrinsic_shared_atomic:
911    case nir_intrinsic_shared_atomic_swap:
912       return emit_atomic_local_shared(intr);
913    case nir_intrinsic_shader_clock:
914       return emit_shader_clock(intr);
915    case nir_intrinsic_ddx:
916    case nir_intrinsic_ddx_coarse:
917       return emit_tex_fdd(intr, TexInstr::get_gradient_h, false);
918    case nir_intrinsic_ddx_fine:
919       return emit_tex_fdd(intr, TexInstr::get_gradient_h, true);
920    case nir_intrinsic_ddy:
921    case nir_intrinsic_ddy_coarse:
922       return emit_tex_fdd(intr, TexInstr::get_gradient_v, false);
923    case nir_intrinsic_ddy_fine:
924       return emit_tex_fdd(intr, TexInstr::get_gradient_v, true);
925    case nir_intrinsic_load_reg:
926       return emit_load_reg(intr);
927    case nir_intrinsic_load_reg_indirect:
928       return emit_load_reg_indirect(intr);
929    case nir_intrinsic_store_reg:
930       return emit_store_reg(intr);
931    case nir_intrinsic_store_reg_indirect:
932       return emit_store_reg_indirect(intr);
933    case nir_intrinsic_decl_reg:
934       // Registers and arrays are allocated at
935       // conversion startup time
936       return true;
937    default:
938       return false;
939    }
940 }
941 
942 static ESDOp
lds_op_from_intrinsic(nir_atomic_op op,bool ret)943 lds_op_from_intrinsic(nir_atomic_op op, bool ret)
944 {
945    switch (op) {
946    case nir_atomic_op_iadd:
947       return ret ? LDS_ADD_RET : LDS_ADD;
948    case nir_atomic_op_iand:
949       return ret ? LDS_AND_RET : LDS_AND;
950    case nir_atomic_op_ior:
951       return ret ? LDS_OR_RET : LDS_OR;
952    case nir_atomic_op_imax:
953       return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
954    case nir_atomic_op_umax:
955       return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
956    case nir_atomic_op_imin:
957       return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
958    case nir_atomic_op_umin:
959       return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
960    case nir_atomic_op_ixor:
961       return ret ? LDS_XOR_RET : LDS_XOR;
962    case nir_atomic_op_xchg:
963       return LDS_XCHG_RET;
964    case nir_atomic_op_cmpxchg:
965       return LDS_CMP_XCHG_RET;
966    default:
967       unreachable("Unsupported shared atomic_op opcode");
968    }
969 }
970 
971 PRegister
emit_load_to_register(PVirtualValue src,int chan)972 Shader::emit_load_to_register(PVirtualValue src, int chan)
973 {
974    assert(src);
975    PRegister dest = src->as_register();
976 
977    if (!dest || chan >= 0) {
978       dest = value_factory().temp_register(chan);
979       dest->set_pin(pin_free);
980       emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
981    }
982    return dest;
983 }
984 
985 // add visitor to resolve array and register
986 class RegisterAccessHandler : public RegisterVisitor {
987 
988 public:
989    RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr);
990 
visit(LocalArrayValue & value)991    void visit(LocalArrayValue& value) override {(void)value; assert(0);}
visit(UniformValue & value)992    void visit(UniformValue& value) override {(void)value; assert(0);}
visit(LiteralConstant & value)993    void visit(LiteralConstant& value) override {(void)value; assert(0);}
visit(InlineConstant & value)994    void visit(InlineConstant& value) override {(void)value; assert(0);}
995 
996    Shader& sh;
997    nir_intrinsic_instr *ir;
998    PVirtualValue addr{nullptr};
999    bool success{true};
1000 };
1001 
1002 class RegisterReadHandler : public RegisterAccessHandler {
1003 
1004 public:
1005    using RegisterAccessHandler::RegisterAccessHandler;
1006    using RegisterAccessHandler::visit;
1007 
1008    void visit(LocalArray& value) override;
1009    void visit(Register& value) override;
1010 };
1011 
emit_load_reg(nir_intrinsic_instr * intr)1012 bool Shader::emit_load_reg(nir_intrinsic_instr *intr)
1013 {
1014    RegisterReadHandler visitor(*this, intr);
1015    auto handle = value_factory().src(intr->src[0], 0);
1016    handle->accept(visitor);
1017    return visitor.success;
1018 }
1019 
emit_load_reg_indirect(nir_intrinsic_instr * intr)1020 bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr)
1021 {
1022    RegisterReadHandler visitor(*this, intr);
1023    visitor.addr =  value_factory().src(intr->src[1], 0);
1024    auto handle = value_factory().src(intr->src[0], 0);
1025    handle->accept(visitor);
1026    return visitor.success;
1027 }
1028 
1029 class RegisterWriteHandler : public RegisterAccessHandler {
1030 
1031 public:
1032    using RegisterAccessHandler::RegisterAccessHandler;
1033    using RegisterAccessHandler::visit;
1034 
1035    void visit(LocalArray& value) override;
1036    void visit(Register& value) override;
1037 };
1038 
1039 
emit_store_reg(nir_intrinsic_instr * intr)1040 bool Shader::emit_store_reg(nir_intrinsic_instr *intr)
1041 {
1042    RegisterWriteHandler visitor(*this, intr);
1043    auto handle = value_factory().src(intr->src[1], 0);
1044    handle->accept(visitor);
1045    return visitor.success;
1046 }
1047 
emit_store_reg_indirect(nir_intrinsic_instr * intr)1048 bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr)
1049 {
1050    RegisterWriteHandler visitor(*this, intr);
1051    visitor.addr =  value_factory().src(intr->src[2], 0);
1052 
1053    auto handle = value_factory().src(intr->src[1], 0);
1054    handle->accept(visitor);
1055    return visitor.success;
1056 }
1057 
RegisterAccessHandler(Shader & shader,nir_intrinsic_instr * intr)1058 RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr):
1059    sh(shader),
1060    ir(intr)
1061 {}
1062 
visit(LocalArray & array)1063 void RegisterReadHandler::visit(LocalArray& array)
1064 {
1065    int slots =  ir->def.bit_size / 32;
1066    auto pin = ir->def.num_components > 1 ? pin_none : pin_free;
1067    for (int i = 0; i < ir->def.num_components; ++i) {
1068       for (int s = 0; s < slots; ++s) {
1069          int chan = i * slots + s;
1070          auto dest = sh.value_factory().dest(ir->def, chan, pin);
1071          auto src = array.element(nir_intrinsic_base(ir), addr, chan);
1072          sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1073       }
1074    }
1075 }
1076 
visit(Register & reg)1077 void RegisterReadHandler::visit(Register& reg)
1078 {
1079    auto dest = sh.value_factory().dest(ir->def, 0, pin_free);
1080    sh.emit_instruction(new AluInstr(op1_mov, dest, &reg, AluInstr::write));
1081 }
1082 
visit(LocalArray & array)1083 void RegisterWriteHandler::visit(LocalArray& array)
1084 {
1085    int writemask = nir_intrinsic_write_mask(ir);
1086    int slots =  ir->src->ssa->bit_size / 32;
1087 
1088    for (int i = 0; i < ir->num_components; ++i) {
1089       if (!(writemask & (1 << i)))
1090          continue;
1091       for (int s = 0; s < slots; ++s) {
1092          int chan = i * slots + s;
1093 
1094          auto dest = array.element(nir_intrinsic_base(ir), addr, chan);
1095          auto src = sh.value_factory().src(ir->src[0], chan);
1096          sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1097       }
1098    }
1099 }
1100 
visit(Register & dest)1101 void RegisterWriteHandler::visit(Register& dest)
1102 {
1103    int writemask = nir_intrinsic_write_mask(ir);
1104    assert(writemask == 1);
1105    auto src = sh.value_factory().src(ir->src[0], 0);
1106    sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write));
1107 }
1108 
1109 bool
emit_atomic_local_shared(nir_intrinsic_instr * instr)1110 Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr)
1111 {
1112    bool uses_retval = !list_is_empty(&instr->def.uses);
1113 
1114    auto& vf = value_factory();
1115 
1116    auto dest_value = uses_retval ? vf.dest(instr->def, 0, pin_free) : nullptr;
1117 
1118    auto op = lds_op_from_intrinsic(nir_intrinsic_atomic_op(instr), uses_retval);
1119 
1120    /* For these two instructions we don't have opcodes that don't read back
1121     * the result, so we have to add a dummy-readback to remove the the return
1122     * value from read queue. */
1123    if (!uses_retval &&
1124        (op == LDS_XCHG_RET || op == LDS_CMP_XCHG_RET)) {
1125       dest_value = vf.dest(instr->def, 0, pin_free);
1126    }
1127 
1128    auto address = vf.src(instr->src[0], 0);
1129 
1130    AluInstr::SrcValues src;
1131    src.push_back(vf.src(instr->src[1], 0));
1132 
1133    if (unlikely(instr->intrinsic == nir_intrinsic_shared_atomic_swap))
1134       src.push_back(vf.src(instr->src[2], 0));
1135    emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
1136    return true;
1137 }
1138 
1139 auto
evaluate_resource_offset(nir_intrinsic_instr * instr,int src_id)1140 Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id)
1141    -> std::pair<int, PRegister>
1142 {
1143    auto& vf = value_factory();
1144 
1145    PRegister uav_id{nullptr};
1146    int offset = nir_intrinsic_has_range_base(instr) ?
1147                    nir_intrinsic_range_base(instr) : 0;
1148 
1149    auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
1150    if (uav_id_const) {
1151       offset += uav_id_const->u32;
1152    } else {
1153       auto uav_id_val = vf.src(instr->src[src_id], 0);
1154       if (uav_id_val->as_register()) {
1155          uav_id = uav_id_val->as_register();
1156       } else {
1157          uav_id = vf.temp_register();
1158          emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write));
1159       }
1160    }
1161    return std::make_pair(offset, uav_id);
1162 }
1163 
1164 bool
emit_store_scratch(nir_intrinsic_instr * intr)1165 Shader::emit_store_scratch(nir_intrinsic_instr *intr)
1166 {
1167    auto& vf = m_instr_factory->value_factory();
1168 
1169    int writemask = nir_intrinsic_write_mask(intr);
1170 
1171    RegisterVec4::Swizzle swz = {7, 7, 7, 7};
1172 
1173    for (unsigned i = 0; i < intr->num_components; ++i)
1174       swz[i] = (1 << i) & writemask ? i : 7;
1175 
1176    auto value = vf.temp_vec4(pin_group, swz);
1177    AluInstr *ir = nullptr;
1178    for (unsigned i = 0; i < intr->num_components; ++i) {
1179       if (value[i]->chan() < 4) {
1180          ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
1181          ir->set_alu_flag(alu_no_schedule_bias);
1182          emit_instruction(ir);
1183       }
1184    }
1185    if (!ir)
1186       return true;
1187 
1188    ir->set_alu_flag(alu_last_instr);
1189 
1190    auto address = vf.src(intr->src[1], 0);
1191 
1192    int align = nir_intrinsic_align_mul(intr);
1193    int align_offset = nir_intrinsic_align_offset(intr);
1194 
1195    ScratchIOInstr *ws_ir = nullptr;
1196 
1197    int offset = -1;
1198    if (address->as_literal()) {
1199       offset = address->as_literal()->value();
1200    } else if (address->as_inline_const()) {
1201       auto il = address->as_inline_const();
1202       if (il->sel() == ALU_SRC_0)
1203          offset = 0;
1204       else if (il->sel() == ALU_SRC_1_INT)
1205          offset = 1;
1206    }
1207 
1208    if (offset >= 0) {
1209       ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
1210    } else {
1211       auto addr_temp = vf.temp_register(0);
1212       auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
1213       load_addr->set_alu_flag(alu_no_schedule_bias);
1214       emit_instruction(load_addr);
1215 
1216       ws_ir = new ScratchIOInstr(
1217          value, addr_temp, align, align_offset, writemask, m_scratch_size);
1218    }
1219    emit_instruction(ws_ir);
1220 
1221    m_flags.set(sh_needs_scratch_space);
1222    return true;
1223 }
1224 
1225 bool
emit_load_scratch(nir_intrinsic_instr * intr)1226 Shader::emit_load_scratch(nir_intrinsic_instr *intr)
1227 {
1228    auto addr = value_factory().src(intr->src[0], 0);
1229    auto dest = value_factory().dest_vec4(intr->def, pin_group);
1230 
1231    if (chip_class() >= ISA_CC_R700) {
1232       RegisterVec4::Swizzle dest_swz = {7, 7, 7, 7};
1233 
1234       for (unsigned i = 0; i < intr->num_components; ++i)
1235          dest_swz[i] = i;
1236 
1237       auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
1238       emit_instruction(ir);
1239       chain_scratch_read(ir);
1240    } else {
1241       int align = nir_intrinsic_align_mul(intr);
1242       int align_offset = nir_intrinsic_align_offset(intr);
1243 
1244       int offset = -1;
1245       if (addr->as_literal()) {
1246          offset = addr->as_literal()->value();
1247       } else if (addr->as_inline_const()) {
1248          auto il = addr->as_inline_const();
1249          if (il->sel() == ALU_SRC_0)
1250             offset = 0;
1251          else if (il->sel() == ALU_SRC_1_INT)
1252             offset = 1;
1253       }
1254 
1255       ScratchIOInstr *ir = nullptr;
1256       if (offset >= 0) {
1257          ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
1258       } else {
1259          auto addr_temp = value_factory().temp_register(0);
1260          auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1261          load_addr->set_alu_flag(alu_no_schedule_bias);
1262          emit_instruction(load_addr);
1263 
1264          ir = new ScratchIOInstr(
1265             dest, addr_temp, align, align_offset, 0xf, m_scratch_size, true);
1266       }
1267       emit_instruction(ir);
1268    }
1269 
1270    m_flags.set(sh_needs_scratch_space);
1271 
1272    return true;
1273 }
1274 
emit_load_global(nir_intrinsic_instr * intr)1275 bool Shader::emit_load_global(nir_intrinsic_instr *intr)
1276 {
1277    auto dest = value_factory().dest_vec4(intr->def, pin_group);
1278 
1279    auto src_value = value_factory().src(intr->src[0], 0);
1280    auto src = src_value->as_register();
1281    if (!src) {
1282       src = value_factory().temp_register();
1283       emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write));
1284    }
1285    auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32);
1286    load->set_mfc(4);
1287    load->set_num_format(vtx_nf_int);
1288    load->reset_fetch_flag(FetchInstr::format_comp_signed);
1289 
1290    emit_instruction(load);
1291    return true;
1292 }
1293 
1294 bool
emit_local_store(nir_intrinsic_instr * instr)1295 Shader::emit_local_store(nir_intrinsic_instr *instr)
1296 {
1297    unsigned write_mask = nir_intrinsic_write_mask(instr);
1298 
1299    auto address = value_factory().src(instr->src[1], 0);
1300    int swizzle_base = 0;
1301    unsigned w = write_mask;
1302    while (!(w & 1)) {
1303       ++swizzle_base;
1304       w >>= 1;
1305    }
1306    write_mask = write_mask >> swizzle_base;
1307 
1308    if ((write_mask & 3) != 3) {
1309       auto value = value_factory().src(instr->src[0], swizzle_base);
1310       emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1311    } else {
1312       auto value = value_factory().src(instr->src[0], swizzle_base);
1313       auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1314       emit_instruction(
1315          new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1316    }
1317    return true;
1318 }
1319 
1320 bool
emit_local_load(nir_intrinsic_instr * instr)1321 Shader::emit_local_load(nir_intrinsic_instr *instr)
1322 {
1323    auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1324    auto dest_value = value_factory().dest_vec(instr->def, instr->num_components);
1325    emit_instruction(new LDSReadInstr(dest_value, address));
1326    return true;
1327 }
1328 
1329 void
chain_scratch_read(Instr * instr)1330 Shader::chain_scratch_read(Instr *instr)
1331 {
1332    m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1333 }
1334 
1335 void
chain_ssbo_read(Instr * instr)1336 Shader::chain_ssbo_read(Instr *instr)
1337 {
1338    m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1339 }
1340 
1341 bool
emit_wait_ack()1342 Shader::emit_wait_ack()
1343 {
1344    start_new_block(0);
1345    emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1346    start_new_block(0);
1347    return true;
1348 }
1349 
get_array_hash(const VirtualValue & value)1350 static uint32_t get_array_hash(const VirtualValue& value)
1351 {
1352    assert (value.pin() == pin_array);
1353    const LocalArrayValue& av = static_cast<const LocalArrayValue&>(value);
1354    return av.chan() | (av.array().base_sel() << 2);
1355 }
1356 
visit(AluInstr * instr)1357 void Shader::InstructionChain::visit(AluInstr *instr)
1358 {
1359    if (instr->is_kill()) {
1360       last_kill_instr = instr;
1361 
1362       // these instructions have side effects, they should
1363       // not be re-order with kill
1364       if (last_gds_instr)
1365          instr->add_required_instr(last_gds_instr);
1366 
1367       if (last_ssbo_instr)
1368          instr->add_required_instr(last_ssbo_instr);
1369    }
1370 
1371    /* Make sure array reads and writes depends on the last indirect access
1372     * so that we don't overwrite array elements too early */
1373 
1374    if (auto d = instr->dest()) {
1375       if (d->pin() == pin_array) {
1376          if (d->addr()) {
1377             last_alu_with_indirect_reg[get_array_hash(*d)] = instr;
1378             return;
1379          }
1380          auto pos = last_alu_with_indirect_reg.find(get_array_hash(*d));
1381          if (pos != last_alu_with_indirect_reg.end()) {
1382             instr->add_required_instr(pos->second);
1383          }
1384       }
1385    }
1386 
1387    for (auto& s : instr->sources()) {
1388       if (s->pin() == pin_array) {
1389          if (s->get_addr()) {
1390             last_alu_with_indirect_reg[get_array_hash(*s)] = instr;
1391             return;
1392          }
1393          auto pos = last_alu_with_indirect_reg.find(get_array_hash(*s));
1394          if (pos != last_alu_with_indirect_reg.end()) {
1395             instr->add_required_instr(pos->second);
1396          }
1397       }
1398    }
1399 
1400    if (instr->has_lds_access()) {
1401       last_lds_access = instr;
1402       if (last_group_barrier)
1403          instr->add_required_instr(last_group_barrier);
1404    }
1405 
1406    if (!instr->has_alu_flag(alu_is_lds) &&
1407        instr->opcode() == op0_group_barrier) {
1408       last_group_barrier = instr;
1409       if (last_lds_access)
1410          instr->add_required_instr(last_group_barrier);
1411       if (last_ssbo_instr)
1412          instr->add_required_instr(last_ssbo_instr);
1413    }
1414 
1415 }
1416 
1417 void
visit(ScratchIOInstr * instr)1418 Shader::InstructionChain::visit(ScratchIOInstr *instr)
1419 {
1420    apply(instr, &last_scratch_instr);
1421 }
1422 
1423 void
visit(GDSInstr * instr)1424 Shader::InstructionChain::visit(GDSInstr *instr)
1425 {
1426    apply(instr, &last_gds_instr);
1427    Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1428    for (auto& loop : this_shader->m_loops) {
1429       loop->set_instr_flag(flag);
1430    }
1431    if (last_kill_instr)
1432       instr->add_required_instr(last_kill_instr);
1433 
1434 }
1435 
1436 void
visit(RatInstr * instr)1437 Shader::InstructionChain::visit(RatInstr *instr)
1438 {
1439    apply(instr, &last_ssbo_instr);
1440    Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1441    for (auto& loop : this_shader->m_loops) {
1442       loop->set_instr_flag(flag);
1443    }
1444 
1445    if (prepare_mem_barrier)
1446       instr->set_ack();
1447 
1448    if (this_shader->m_current_block->inc_rat_emitted() > 15)
1449       this_shader->start_new_block(0);
1450 
1451    if (last_kill_instr)
1452       instr->add_required_instr(last_kill_instr);
1453 
1454    if (last_group_barrier)
1455       instr->add_required_instr(last_group_barrier);
1456 }
1457 
1458 void
apply(Instr * current,Instr ** last)1459 Shader::InstructionChain::apply(Instr *current, Instr **last)
1460 {
1461    if (*last)
1462       current->add_required_instr(*last);
1463    *last = current;
1464 }
1465 
1466 void
emit_instruction(PInst instr)1467 Shader::emit_instruction(PInst instr)
1468 {
1469    sfn_log << SfnLog::instr << "   " << *instr << "\n";
1470    instr->accept(m_chain_instr);
1471    m_current_block->push_back(instr);
1472 }
1473 
1474 bool
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)1475 Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset)
1476 {
1477    auto src = value_factory().temp_register();
1478    emit_instruction(
1479       new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
1480 
1481    auto dest = value_factory().dest_vec4(instr->def, pin_group);
1482    auto fetch = new LoadFromBuffer(dest,
1483                                    {0, 1, 2, 3},
1484                                    src,
1485                                    offset,
1486                                    R600_LDS_INFO_CONST_BUFFER,
1487                                    nullptr,
1488                                    fmt_32_32_32_32);
1489 
1490    fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1491    emit_instruction(fetch);
1492 
1493    return true;
1494 }
1495 
1496 bool
emit_shader_clock(nir_intrinsic_instr * instr)1497 Shader::emit_shader_clock(nir_intrinsic_instr *instr)
1498 {
1499    auto& vf = value_factory();
1500    auto group = new AluGroup();
1501    group->add_instruction(new AluInstr(op1_mov,
1502                                        vf.dest(instr->def, 0, pin_chan),
1503                                        vf.inline_const(ALU_SRC_TIME_LO, 0),
1504                                        AluInstr::write));
1505    group->add_instruction(new AluInstr(op1_mov,
1506                                        vf.dest(instr->def, 1, pin_chan),
1507                                        vf.inline_const(ALU_SRC_TIME_HI, 0),
1508                                        AluInstr::last_write));
1509    emit_instruction(group);
1510    return true;
1511 }
1512 
1513 bool
emit_group_barrier(nir_intrinsic_instr * intr)1514 Shader::emit_group_barrier(nir_intrinsic_instr *intr)
1515 {
1516    assert(m_control_flow_depth == 0);
1517    (void)intr;
1518    auto op = new AluInstr(op0_group_barrier, 0);
1519    op->set_alu_flag(alu_last_instr);
1520    emit_instruction(op);
1521    return true;
1522 }
1523 
emit_barrier(nir_intrinsic_instr * intr)1524 bool Shader::emit_barrier(nir_intrinsic_instr *intr)
1525 {
1526 
1527    if ((nir_intrinsic_execution_scope(intr) == SCOPE_WORKGROUP)) {
1528       if (!emit_group_barrier(intr))
1529          return false;
1530    }
1531 
1532    /* We don't check nir_var_mem_shared because we don't emit a real barrier -
1533     * for this we need to implement GWS (Global Wave Sync).
1534     * Here we just emit a wait_ack - this is no real barrier,
1535     * it's just a wait for RAT writes to be finished (if they
1536     * are emitted with the _ACK opcode and the `mark` flag set - it
1537     * is very likely that WAIT_ACK is also only relevant for this
1538     * shader instance). */
1539    auto full_barrier_mem_modes = nir_var_mem_ssbo |  nir_var_image | nir_var_mem_global;
1540 
1541    if ((nir_intrinsic_memory_scope(intr) != SCOPE_NONE) &&
1542        (nir_intrinsic_memory_modes(intr) & full_barrier_mem_modes)) {
1543       return emit_wait_ack();
1544    }
1545 
1546    return true;
1547 }
1548 
1549 bool
load_ubo(nir_intrinsic_instr * instr)1550 Shader::load_ubo(nir_intrinsic_instr *instr)
1551 {
1552    auto bufid = nir_src_as_const_value(instr->src[0]);
1553    auto buf_offset = nir_src_as_const_value(instr->src[1]);
1554    auto base_id = nir_intrinsic_base(instr);
1555 
1556    if (!buf_offset) {
1557       /* TODO: if bufid is constant then this can also be solved by using the
1558        * CF index on the ALU block, and this would probably make sense when
1559        * there are more then one loads with the same buffer ID. */
1560 
1561       auto addr = value_factory().src(instr->src[1], 0)->as_register();
1562       RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
1563       auto dest = value_factory().dest_vec4(instr->def, pin_group);
1564 
1565       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1566          dest_swz[i] = i + nir_intrinsic_component(instr);
1567       }
1568 
1569       LoadFromBuffer *ir;
1570       if (bufid) {
1571          ir = new LoadFromBuffer(
1572             dest, dest_swz, addr, 0, bufid->u32, nullptr, fmt_32_32_32_32_float);
1573       } else {
1574          auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1575          ir = new LoadFromBuffer(
1576             dest, dest_swz, addr, 0, base_id, buffer_id, fmt_32_32_32_32_float);
1577       }
1578       emit_instruction(ir);
1579       return true;
1580    }
1581 
1582    /* direct load using the constant cache */
1583    if (bufid) {
1584       int buf_cmp = nir_intrinsic_component(instr);
1585 
1586       AluInstr *ir = nullptr;
1587       auto pin = instr->def.num_components == 1
1588                     ? pin_free
1589                     : pin_none;
1590       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1591 
1592          sfn_log << SfnLog::io << "UBO[" << bufid << "] " << instr->def.index
1593                  << " const[" << i << "]: " << instr->const_index[i] << "\n";
1594 
1595          auto uniform =
1596             value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32);
1597          ir = new AluInstr(op1_mov,
1598                            value_factory().dest(instr->def, i, pin),
1599                            uniform,
1600                            {alu_write});
1601          emit_instruction(ir);
1602       }
1603       if (ir)
1604          ir->set_alu_flag(alu_last_instr);
1605       return true;
1606    } else {
1607       int buf_cmp = nir_intrinsic_component(instr);
1608       AluInstr *ir = nullptr;
1609       auto kc_id = value_factory().src(instr->src[0], 0);
1610 
1611       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1612          int cmp = buf_cmp + i;
1613          auto u =
1614             new UniformValue(512 + buf_offset->u32, cmp, kc_id, nir_intrinsic_base(instr));
1615          auto dest = value_factory().dest(instr->def, i, pin_none);
1616          ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
1617          emit_instruction(ir);
1618       }
1619       if (ir)
1620          ir->set_alu_flag(alu_last_instr);
1621       m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1622       return true;
1623    }
1624 }
1625 
1626 void
start_new_block(int depth)1627 Shader::start_new_block(int depth)
1628 {
1629    int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1630    m_current_block = new Block(depth + depth_offset, m_next_block++);
1631    m_root.push_back(m_current_block);
1632 }
1633 
1634 bool
emit_simple_mov(nir_def & def,int chan,PVirtualValue src,Pin pin)1635 Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin)
1636 {
1637    auto dst = value_factory().dest(def, chan, pin);
1638    emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1639    return true;
1640 }
1641 
1642 void
print(std::ostream & os) const1643 Shader::print(std::ostream& os) const
1644 {
1645    print_header(os);
1646 
1647    for (auto& [dummy, i] : m_inputs) {
1648       i.print(os);
1649       os << "\n";
1650    }
1651 
1652    for (auto& [dummy, o] : m_outputs) {
1653       o.print(os);
1654       os << "\n";
1655    }
1656 
1657    os << "SHADER\n";
1658    for (auto& b : m_root)
1659       b->print(os);
1660 }
1661 
1662 const char *chip_class_names[] = {"R600", "R700", "EVERGREEN", "CAYMAN"};
1663 
1664 void
print_header(std::ostream & os) const1665 Shader::print_header(std::ostream& os) const
1666 {
1667    assert(m_chip_class <= ISA_CC_CAYMAN);
1668    os << "Shader: " << m_shader_id << "\n";
1669    os << m_type_id << "\n";
1670    os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1671    print_properties(os);
1672 }
1673 
1674 void
print_properties(std::ostream & os) const1675 Shader::print_properties(std::ostream& os) const
1676 {
1677    do_print_properties(os);
1678 }
1679 
1680 bool
equal_to(const Shader & other) const1681 Shader::equal_to(const Shader& other) const
1682 {
1683    if (m_root.size() != other.m_root.size())
1684       return false;
1685    return std::inner_product(
1686       m_root.begin(),
1687       m_root.end(),
1688       other.m_root.begin(),
1689       true,
1690       [](bool lhs, bool rhs) { return lhs & rhs; },
1691       [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1692          return lhs->is_equal_to(*rhs);
1693       });
1694 }
1695 
1696 void
get_shader_info(r600_shader * sh_info)1697 Shader::get_shader_info(r600_shader *sh_info)
1698 {
1699    sh_info->ninput = m_inputs.size();
1700    sh_info->nlds = 0;
1701    int input_array_array_loc = 0;
1702    for (auto& [index, info] : m_inputs) {
1703       r600_shader_io& io = sh_info->input[input_array_array_loc++];
1704 
1705       io.varying_slot = info.varying_slot();
1706       io.system_value = info.system_value();
1707       io.gpr = info.gpr();
1708       io.spi_sid = info.spi_sid();
1709       io.ij_index = info.ij_index();
1710       io.interpolate = info.interpolator();
1711       io.interpolate_location = info.interpolate_loc();
1712       if (info.need_lds_pos()) {
1713          io.lds_pos = info.lds_pos();
1714          sh_info->nlds = MAX2(unsigned(info.lds_pos() + 1), sh_info->nlds);
1715       } else {
1716          io.lds_pos = 0;
1717       }
1718 
1719       io.ring_offset = info.ring_offset();
1720       io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1721 
1722       sfn_log << SfnLog::io << "Emit input [" << index << "]";
1723       if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1724          sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1725       if (io.system_value != SYSTEM_VALUE_MAX)
1726          sfn_log << " system_value:" << static_cast<int>(io.system_value);
1727       sfn_log << " spi_sid:" << io.spi_sid << "\n";
1728       assert(io.spi_sid >= 0);
1729    }
1730 
1731    sh_info->noutput = m_outputs.size();
1732    /* VS is required to export at least one parameter. */
1733    sh_info->highest_export_param = 0;
1734    sh_info->num_loops = m_nloops;
1735    int output_array_array_loc = 0;
1736 
1737    for (auto& [index, info] : m_outputs) {
1738       r600_shader_io& io = sh_info->output[output_array_array_loc++];
1739       io.varying_slot = info.varying_slot();
1740       io.frag_result = info.frag_result();
1741       io.gpr = info.gpr();
1742       io.spi_sid = info.spi_sid();
1743       io.write_mask = info.writemask();
1744       io.export_param = info.export_param();
1745       if (info.export_param() >= 0)
1746          sh_info->highest_export_param = MAX2(unsigned(info.export_param()),
1747                                               sh_info->highest_export_param);
1748 
1749       sfn_log << SfnLog::io << "Emit output[" << index << "]";
1750       if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1751          sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1752       if (io.frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
1753          sfn_log << " frag_result:" << static_cast<int>(io.frag_result);
1754       sfn_log << " spi_sid:" << io.spi_sid << " write_mask:" << io.write_mask << "\n";
1755       assert(io.spi_sid >= 0);
1756    }
1757 
1758    sh_info->nhwatomic = m_nhwatomic;
1759    sh_info->atomic_base = m_atomic_base;
1760    sh_info->nhwatomic_ranges = m_atomics.size();
1761    for (unsigned i = 0; i < m_atomics.size(); ++i)
1762       sh_info->atomics[i] = m_atomics[i];
1763 
1764    if (m_flags.test(sh_indirect_const_file))
1765       sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1766 
1767    if (m_flags.test(sh_indirect_atomic))
1768       sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1769 
1770    sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1771 
1772    value_factory().get_shader_info(sh_info);
1773 
1774    sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1775    sh_info->uses_images = m_flags.test(sh_uses_images);
1776    sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1777    sh_info->disable_sb = m_flags.test(sh_disble_sb);
1778    sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1779    sh_info->indirect_files = m_indirect_files;
1780    do_get_shader_info(sh_info);
1781 }
1782 
1783 PRegister
atomic_update()1784 Shader::atomic_update()
1785 {
1786    assert(m_atomic_update);
1787    return m_atomic_update;
1788 }
1789 
1790 int
remap_atomic_base(int base)1791 Shader::remap_atomic_base(int base)
1792 {
1793    return m_atomic_base_map[base];
1794 }
1795 
1796 void
do_get_shader_info(r600_shader * sh_info)1797 Shader::do_get_shader_info(r600_shader *sh_info)
1798 {
1799    sh_info->uses_atomics = m_nhwatomic > 0;
1800 }
1801 
1802 const ShaderInput&
input(int base) const1803 Shader::input(int base) const
1804 {
1805    auto io = m_inputs.find(base);
1806    assert(io != m_inputs.end());
1807    return io->second;
1808 }
1809 
1810 const ShaderOutput&
output(int base) const1811 Shader::output(int base) const
1812 {
1813    auto io = m_outputs.find(base);
1814    assert(io != m_outputs.end());
1815    return io->second;
1816 }
1817 
1818 LiveRangeMap
prepare_live_range_map()1819 Shader::prepare_live_range_map()
1820 {
1821    return m_instr_factory->value_factory().prepare_live_range_map();
1822 }
1823 
1824 void
reset_function(ShaderBlocks & new_root)1825 Shader::reset_function(ShaderBlocks& new_root)
1826 {
1827    std::swap(m_root, new_root);
1828 }
1829 
1830 void
finalize()1831 Shader::finalize()
1832 {
1833    do_finalize();
1834 }
1835 
1836 void
do_finalize()1837 Shader::do_finalize()
1838 {
1839 }
1840 
1841 } // namespace r600
1842