1 /*
2 * Copyright © 2010 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_fs_live_variables.h"
8 #include "brw_nir.h"
9 #include "brw_cfg.h"
10 #include "brw_private.h"
11 #include "intel_nir.h"
12 #include "shader_enums.h"
13 #include "dev/intel_debug.h"
14 #include "dev/intel_wa.h"
15
16 #include <memory>
17
18 static uint64_t
brw_bsr(const struct intel_device_info * devinfo,uint32_t offset,uint8_t simd_size,uint8_t local_arg_offset)19 brw_bsr(const struct intel_device_info *devinfo,
20 uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset)
21 {
22 assert(offset % 64 == 0);
23 assert(simd_size == 8 || simd_size == 16);
24 assert(local_arg_offset % 8 == 0);
25
26 return offset |
27 SET_BITS(simd_size == 8, 4, 4) |
28 SET_BITS(local_arg_offset / 8, 2, 0);
29 }
30
31 static bool
run_bs(fs_visitor & s,bool allow_spilling)32 run_bs(fs_visitor &s, bool allow_spilling)
33 {
34 assert(s.stage >= MESA_SHADER_RAYGEN && s.stage <= MESA_SHADER_CALLABLE);
35
36 s.payload_ = new bs_thread_payload(s);
37
38 nir_to_brw(&s);
39
40 if (s.failed)
41 return false;
42
43 /* TODO(RT): Perhaps rename this? */
44 s.emit_cs_terminate();
45
46 brw_calculate_cfg(s);
47
48 brw_fs_optimize(s);
49
50 s.assign_curb_setup();
51
52 brw_fs_lower_3src_null_dest(s);
53 brw_fs_workaround_memory_fence_before_eot(s);
54 brw_fs_workaround_emit_dummy_mov_instruction(s);
55
56 brw_allocate_registers(s, allow_spilling);
57
58 return !s.failed;
59 }
60
61 static uint8_t
compile_single_bs(const struct brw_compiler * compiler,struct brw_compile_bs_params * params,const struct brw_bs_prog_key * key,struct brw_bs_prog_data * prog_data,nir_shader * shader,fs_generator * g,struct brw_compile_stats * stats,int * prog_offset)62 compile_single_bs(const struct brw_compiler *compiler,
63 struct brw_compile_bs_params *params,
64 const struct brw_bs_prog_key *key,
65 struct brw_bs_prog_data *prog_data,
66 nir_shader *shader,
67 fs_generator *g,
68 struct brw_compile_stats *stats,
69 int *prog_offset)
70 {
71 const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT);
72
73 prog_data->base.stage = shader->info.stage;
74 prog_data->max_stack_size = MAX2(prog_data->max_stack_size,
75 shader->scratch_size);
76
77 const unsigned max_dispatch_width = 16;
78 brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width);
79 brw_postprocess_nir(shader, compiler, debug_enabled,
80 key->base.robust_flags);
81
82 brw_simd_selection_state simd_state{
83 .devinfo = compiler->devinfo,
84 .prog_data = prog_data,
85
86 /* Since divergence is a lot more likely in RT than compute, it makes
87 * sense to limit ourselves to the smallest available SIMD for now.
88 */
89 .required_width = compiler->devinfo->ver >= 20 ? 16u : 8u,
90 };
91
92 std::unique_ptr<fs_visitor> v[2];
93
94 for (unsigned simd = 0; simd < ARRAY_SIZE(v); simd++) {
95 if (!brw_simd_should_compile(simd_state, simd))
96 continue;
97
98 const unsigned dispatch_width = 8u << simd;
99
100 if (dispatch_width == 8 && compiler->devinfo->ver >= 20)
101 continue;
102
103 v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base,
104 &key->base,
105 &prog_data->base, shader,
106 dispatch_width,
107 stats != NULL,
108 debug_enabled);
109
110 const bool allow_spilling = !brw_simd_any_compiled(simd_state);
111 if (run_bs(*v[simd], allow_spilling)) {
112 brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
113 } else {
114 simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx,
115 v[simd]->fail_msg);
116 if (simd > 0) {
117 brw_shader_perf_log(compiler, params->base.log_data,
118 "SIMD%u shader failed to compile: %s",
119 dispatch_width, v[simd]->fail_msg);
120 }
121 }
122 }
123
124 const int selected_simd = brw_simd_select(simd_state);
125 if (selected_simd < 0) {
126 params->base.error_str =
127 ralloc_asprintf(params->base.mem_ctx,
128 "Can't compile shader: "
129 "SIMD8 '%s' and SIMD16 '%s'.\n",
130 simd_state.error[0], simd_state.error[1]);
131 return 0;
132 }
133
134 assert(selected_simd < int(ARRAY_SIZE(v)));
135 fs_visitor *selected = v[selected_simd].get();
136 assert(selected);
137
138 const unsigned dispatch_width = selected->dispatch_width;
139
140 int offset = g->generate_code(selected->cfg, dispatch_width, selected->shader_stats,
141 selected->performance_analysis.require(), stats);
142 if (prog_offset)
143 *prog_offset = offset;
144 else
145 assert(offset == 0);
146
147 return dispatch_width;
148 }
149
150 const unsigned *
brw_compile_bs(const struct brw_compiler * compiler,struct brw_compile_bs_params * params)151 brw_compile_bs(const struct brw_compiler *compiler,
152 struct brw_compile_bs_params *params)
153 {
154 nir_shader *shader = params->base.nir;
155 struct brw_bs_prog_data *prog_data = params->prog_data;
156 unsigned num_resume_shaders = params->num_resume_shaders;
157 nir_shader **resume_shaders = params->resume_shaders;
158 const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT);
159
160 prog_data->base.stage = shader->info.stage;
161 prog_data->base.ray_queries = shader->info.ray_queries;
162 prog_data->base.total_scratch = 0;
163
164 prog_data->max_stack_size = 0;
165 prog_data->num_resume_shaders = num_resume_shaders;
166
167 fs_generator g(compiler, ¶ms->base, &prog_data->base,
168 shader->info.stage);
169 if (unlikely(debug_enabled)) {
170 char *name = ralloc_asprintf(params->base.mem_ctx,
171 "%s %s shader %s",
172 shader->info.label ?
173 shader->info.label : "unnamed",
174 gl_shader_stage_name(shader->info.stage),
175 shader->info.name);
176 g.enable_debug(name);
177 }
178
179 prog_data->simd_size =
180 compile_single_bs(compiler, params, params->key, prog_data,
181 shader, &g, params->base.stats, NULL);
182 if (prog_data->simd_size == 0)
183 return NULL;
184
185 uint64_t *resume_sbt = ralloc_array(params->base.mem_ctx,
186 uint64_t, num_resume_shaders);
187 for (unsigned i = 0; i < num_resume_shaders; i++) {
188 if (INTEL_DEBUG(DEBUG_RT)) {
189 char *name = ralloc_asprintf(params->base.mem_ctx,
190 "%s %s resume(%u) shader %s",
191 shader->info.label ?
192 shader->info.label : "unnamed",
193 gl_shader_stage_name(shader->info.stage),
194 i, shader->info.name);
195 g.enable_debug(name);
196 }
197
198 /* TODO: Figure out shader stats etc. for resume shaders */
199 int offset = 0;
200 uint8_t simd_size =
201 compile_single_bs(compiler, params, params->key,
202 prog_data, resume_shaders[i], &g, NULL, &offset);
203 if (simd_size == 0)
204 return NULL;
205
206 assert(offset > 0);
207 resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0);
208 }
209
210 /* We only have one constant data so we want to make sure they're all the
211 * same.
212 */
213 for (unsigned i = 0; i < num_resume_shaders; i++) {
214 assert(resume_shaders[i]->constant_data_size ==
215 shader->constant_data_size);
216 assert(memcmp(resume_shaders[i]->constant_data,
217 shader->constant_data,
218 shader->constant_data_size) == 0);
219 }
220
221 g.add_const_data(shader->constant_data, shader->constant_data_size);
222 g.add_resume_sbt(num_resume_shaders, resume_sbt);
223
224 return g.get_assembly();
225 }
226