1 /*
2 * Copyright © 2011 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_eu.h"
8 #include "brw_nir.h"
9 #include "brw_private.h"
10 #include "dev/intel_debug.h"
11
12 using namespace brw;
13
14 static void
brw_assign_vs_urb_setup(fs_visitor & s)15 brw_assign_vs_urb_setup(fs_visitor &s)
16 {
17 struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(s.prog_data);
18
19 assert(s.stage == MESA_SHADER_VERTEX);
20
21 /* Each attribute is 4 regs. */
22 s.first_non_payload_grf += 4 * vs_prog_data->nr_attribute_slots;
23
24 assert(vs_prog_data->base.urb_read_length <= 15);
25
26 /* Rewrite all ATTR file references to the hw grf that they land in. */
27 foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
28 s.convert_attr_sources_to_hw_regs(inst);
29 }
30 }
31
32 static bool
run_vs(fs_visitor & s)33 run_vs(fs_visitor &s)
34 {
35 assert(s.stage == MESA_SHADER_VERTEX);
36
37 s.payload_ = new vs_thread_payload(s);
38
39 nir_to_brw(&s);
40
41 if (s.failed)
42 return false;
43
44 s.emit_urb_writes();
45
46 brw_calculate_cfg(s);
47
48 brw_fs_optimize(s);
49
50 s.assign_curb_setup();
51 brw_assign_vs_urb_setup(s);
52
53 brw_fs_lower_3src_null_dest(s);
54 brw_fs_workaround_memory_fence_before_eot(s);
55 brw_fs_workaround_emit_dummy_mov_instruction(s);
56
57 brw_allocate_registers(s, true /* allow_spilling */);
58
59 return !s.failed;
60 }
61
62 extern "C" const unsigned *
brw_compile_vs(const struct brw_compiler * compiler,struct brw_compile_vs_params * params)63 brw_compile_vs(const struct brw_compiler *compiler,
64 struct brw_compile_vs_params *params)
65 {
66 struct nir_shader *nir = params->base.nir;
67 const struct brw_vs_prog_key *key = params->key;
68 struct brw_vs_prog_data *prog_data = params->prog_data;
69 const bool debug_enabled =
70 brw_should_print_shader(nir, params->base.debug_flag ?
71 params->base.debug_flag : DEBUG_VS);
72
73 prog_data->base.base.stage = MESA_SHADER_VERTEX;
74 prog_data->base.base.ray_queries = nir->info.ray_queries;
75 prog_data->base.base.total_scratch = 0;
76
77 brw_nir_apply_key(nir, compiler, &key->base,
78 brw_geometry_stage_dispatch_width(compiler->devinfo));
79
80 prog_data->inputs_read = nir->info.inputs_read;
81 prog_data->double_inputs_read = nir->info.vs.double_inputs;
82
83 brw_nir_lower_vs_inputs(nir);
84 brw_nir_lower_vue_outputs(nir);
85 brw_postprocess_nir(nir, compiler, debug_enabled,
86 key->base.robust_flags);
87
88 prog_data->base.clip_distance_mask =
89 ((1 << nir->info.clip_distance_array_size) - 1);
90 prog_data->base.cull_distance_mask =
91 ((1 << nir->info.cull_distance_array_size) - 1) <<
92 nir->info.clip_distance_array_size;
93
94 unsigned nr_attribute_slots = util_bitcount64(prog_data->inputs_read);
95
96 /* gl_VertexID and gl_InstanceID are system values, but arrive via an
97 * incoming vertex attribute. So, add an extra slot.
98 */
99 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
100 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
101 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) ||
102 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID)) {
103 nr_attribute_slots++;
104 }
105
106 /* gl_DrawID and IsIndexedDraw share its very own vec4 */
107 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID) ||
108 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW)) {
109 nr_attribute_slots++;
110 }
111
112 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW))
113 prog_data->uses_is_indexed_draw = true;
114
115 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX))
116 prog_data->uses_firstvertex = true;
117
118 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE))
119 prog_data->uses_baseinstance = true;
120
121 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE))
122 prog_data->uses_vertexid = true;
123
124 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID))
125 prog_data->uses_instanceid = true;
126
127 if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
128 prog_data->uses_drawid = true;
129
130 prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attribute_slots, 2);
131 prog_data->nr_attribute_slots = nr_attribute_slots;
132
133 /* Since vertex shaders reuse the same VUE entry for inputs and outputs
134 * (overwriting the original contents), we need to make sure the size is
135 * the larger of the two.
136 */
137 const unsigned vue_entries =
138 MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots);
139
140 prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
141
142 if (unlikely(debug_enabled)) {
143 fprintf(stderr, "VS Output ");
144 brw_print_vue_map(stderr, &prog_data->base.vue_map, MESA_SHADER_VERTEX);
145 }
146
147 const unsigned dispatch_width = compiler->devinfo->ver >= 20 ? 16 : 8;
148 prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
149
150 fs_visitor v(compiler, ¶ms->base, &key->base,
151 &prog_data->base.base, nir, dispatch_width,
152 params->base.stats != NULL, debug_enabled);
153 if (!run_vs(v)) {
154 params->base.error_str =
155 ralloc_strdup(params->base.mem_ctx, v.fail_msg);
156 return NULL;
157 }
158
159 assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0);
160 prog_data->base.base.dispatch_grf_start_reg =
161 v.payload().num_regs / reg_unit(compiler->devinfo);
162
163 fs_generator g(compiler, ¶ms->base,
164 &prog_data->base.base,
165 MESA_SHADER_VERTEX);
166 if (unlikely(debug_enabled)) {
167 const char *debug_name =
168 ralloc_asprintf(params->base.mem_ctx, "%s vertex shader %s",
169 nir->info.label ? nir->info.label :
170 "unnamed",
171 nir->info.name);
172
173 g.enable_debug(debug_name);
174 }
175 g.generate_code(v.cfg, dispatch_width, v.shader_stats,
176 v.performance_analysis.require(), params->base.stats);
177 g.add_const_data(nir->constant_data, nir->constant_data_size);
178
179 return g.get_assembly();
180 }
181