1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55
56 #define KEY_INIT(prefix) \
57 .prefix.program_string_id = ish->program_id, \
58 .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
60 .base.program_string_id = prog_id, \
61 .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input) \
63 .base.program_string_id = prog_id, \
64 .base.limit_trig_input_range = limit_trig_input
65
66 struct iris_threaded_compile_job {
67 struct iris_screen *screen;
68 struct u_upload_mgr *uploader;
69 struct util_debug_callback *dbg;
70 struct iris_uncompiled_shader *ish;
71 struct iris_compiled_shader *shader;
72 };
73
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77 return p_atomic_inc_return(&screen->program_id);
78 }
79
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82 const struct brw_wm_prog_data *brw)
83 {
84 assert(shader->stage == MESA_SHADER_FRAGMENT);
85 struct iris_fs_data *iris = &shader->fs;
86
87 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89 memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90 memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91 iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92
93 iris->num_varying_inputs = brw->num_varying_inputs;
94 iris->msaa_flags_param = brw->msaa_flags_param;
95 iris->flat_inputs = brw->flat_inputs;
96 iris->inputs = brw->inputs;
97 iris->computed_depth_mode = brw->computed_depth_mode;
98 iris->max_polygons = brw->max_polygons;
99 iris->dispatch_multi = brw->dispatch_multi;
100 iris->computed_stencil = brw->computed_stencil;
101 iris->early_fragment_tests = brw->early_fragment_tests;
102 iris->post_depth_coverage = brw->post_depth_coverage;
103 iris->inner_coverage = brw->inner_coverage;
104 iris->dispatch_8 = brw->dispatch_8;
105 iris->dispatch_16 = brw->dispatch_16;
106 iris->dispatch_32 = brw->dispatch_32;
107 iris->dual_src_blend = brw->dual_src_blend;
108 iris->uses_pos_offset = brw->uses_pos_offset;
109 iris->uses_omask = brw->uses_omask;
110 iris->uses_kill = brw->uses_kill;
111 iris->uses_src_depth = brw->uses_src_depth;
112 iris->uses_src_w = brw->uses_src_w;
113 iris->uses_sample_mask = brw->uses_sample_mask;
114 iris->uses_vmask = brw->uses_vmask;
115 iris->has_side_effects = brw->has_side_effects;
116 iris->pulls_bary = brw->pulls_bary;
117
118 iris->uses_sample_offsets = brw->uses_sample_offsets;
119 iris->uses_npc_bary_coefficients = brw->uses_npc_bary_coefficients;
120 iris->uses_pc_bary_coefficients = brw->uses_pc_bary_coefficients;
121 iris->uses_depth_w_coefficients = brw->uses_depth_w_coefficients;
122
123 iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
124
125 iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
126 }
127
128 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)129 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
130 const struct brw_cs_prog_data *brw)
131 {
132 assert(shader->stage == MESA_SHADER_COMPUTE);
133 struct iris_cs_data *iris = &shader->cs;
134
135 iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
136 iris->push.cross_thread.regs = brw->push.cross_thread.regs;
137 iris->push.cross_thread.size = brw->push.cross_thread.size;
138
139 iris->push.per_thread.dwords = brw->push.per_thread.dwords;
140 iris->push.per_thread.regs = brw->push.per_thread.regs;
141 iris->push.per_thread.size = brw->push.per_thread.size;
142
143 iris->local_size[0] = brw->local_size[0];
144 iris->local_size[1] = brw->local_size[1];
145 iris->local_size[2] = brw->local_size[2];
146 iris->prog_offset[0] = brw->prog_offset[0];
147 iris->prog_offset[1] = brw->prog_offset[1];
148 iris->prog_offset[2] = brw->prog_offset[2];
149
150 iris->generate_local_id = brw->generate_local_id;
151 iris->walk_order = brw->walk_order;
152 iris->uses_barrier = brw->uses_barrier;
153 iris->prog_mask = brw->prog_mask;
154
155 iris->first_param_is_builtin_subgroup_id =
156 brw->base.nr_params > 0 &&
157 brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
158 }
159
160 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)161 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
162 struct iris_vue_data *iris)
163 {
164 memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
165
166 iris->urb_read_length = brw->urb_read_length;
167 iris->cull_distance_mask = brw->cull_distance_mask;
168 iris->urb_entry_size = brw->urb_entry_size;
169 iris->dispatch_mode = brw->dispatch_mode;
170 iris->include_vue_handles = brw->include_vue_handles;
171 }
172
173 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)174 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
175 const struct brw_vs_prog_data *brw)
176 {
177 assert(shader->stage == MESA_SHADER_VERTEX);
178 struct iris_vs_data *iris = &shader->vs;
179
180 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
181
182 iris->uses_vertexid = brw->uses_vertexid;
183 iris->uses_instanceid = brw->uses_instanceid;
184 iris->uses_firstvertex = brw->uses_firstvertex;
185 iris->uses_baseinstance = brw->uses_baseinstance;
186 iris->uses_drawid = brw->uses_drawid;
187 }
188
189 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)190 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
191 const struct brw_tcs_prog_data *brw)
192 {
193 assert(shader->stage == MESA_SHADER_TESS_CTRL);
194 struct iris_tcs_data *iris = &shader->tcs;
195
196 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
197
198 iris->instances = brw->instances;
199 iris->patch_count_threshold = brw->patch_count_threshold;
200 iris->include_primitive_id = brw->include_primitive_id;
201 }
202
203 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)204 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
205 const struct brw_tes_prog_data *brw)
206 {
207 assert(shader->stage == MESA_SHADER_TESS_EVAL);
208 struct iris_tes_data *iris = &shader->tes;
209
210 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
211
212 iris->partitioning = brw->partitioning;
213 iris->output_topology = brw->output_topology;
214 iris->domain = brw->domain;
215 iris->include_primitive_id = brw->include_primitive_id;
216 }
217
218 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)219 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
220 const struct brw_gs_prog_data *brw)
221 {
222 assert(shader->stage == MESA_SHADER_GEOMETRY);
223 struct iris_gs_data *iris = &shader->gs;
224
225 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
226
227 iris->vertices_in = brw->vertices_in;
228 iris->output_vertex_size_hwords = brw->output_vertex_size_hwords;
229 iris->output_topology = brw->output_topology;
230 iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
231 iris->control_data_format = brw->control_data_format;
232 iris->static_vertex_count = brw->static_vertex_count;
233 iris->invocations = brw->invocations;
234 iris->include_primitive_id = brw->include_primitive_id;
235 }
236
237 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)238 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
239 struct brw_stage_prog_data *brw)
240 {
241 STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
242 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
243 shader->ubo_ranges[i].block = brw->ubo_ranges[i].block;
244 shader->ubo_ranges[i].start = brw->ubo_ranges[i].start;
245 shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
246 }
247
248 shader->nr_params = brw->nr_params;
249 shader->total_scratch = brw->total_scratch;
250 shader->total_shared = brw->total_shared;
251 shader->program_size = brw->program_size;
252 shader->const_data_offset = brw->const_data_offset;
253 shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
254 shader->has_ubo_pull = brw->has_ubo_pull;
255 shader->use_alt_mode = brw->use_alt_mode;
256
257 switch (shader->stage) {
258 case MESA_SHADER_FRAGMENT:
259 iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
260 break;
261 case MESA_SHADER_COMPUTE:
262 iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
263 break;
264 case MESA_SHADER_VERTEX:
265 iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
266 break;
267 case MESA_SHADER_TESS_CTRL:
268 iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
269 break;
270 case MESA_SHADER_TESS_EVAL:
271 iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
272 break;
273 case MESA_SHADER_GEOMETRY:
274 iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
275 break;
276 default:
277 unreachable("invalid shader stage");
278 }
279
280 shader->brw_prog_data = brw;
281
282 ralloc_steal(shader, shader->brw_prog_data);
283 ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
284 ralloc_steal(shader->brw_prog_data, brw->param);
285 }
286
287 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)288 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
289 const struct elk_wm_prog_data *elk)
290 {
291 assert(shader->stage == MESA_SHADER_FRAGMENT);
292 struct iris_fs_data *iris = &shader->fs;
293
294 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
295 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
296 memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
297 memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
298 iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
299
300 iris->num_varying_inputs = elk->num_varying_inputs;
301 iris->msaa_flags_param = elk->msaa_flags_param;
302 iris->flat_inputs = elk->flat_inputs;
303 iris->inputs = elk->inputs;
304 iris->computed_depth_mode = elk->computed_depth_mode;
305 iris->max_polygons = 1;
306 iris->dispatch_multi = 0;
307 iris->computed_stencil = elk->computed_stencil;
308 iris->early_fragment_tests = elk->early_fragment_tests;
309 iris->post_depth_coverage = elk->post_depth_coverage;
310 iris->inner_coverage = elk->inner_coverage;
311 iris->dispatch_8 = elk->dispatch_8;
312 iris->dispatch_16 = elk->dispatch_16;
313 iris->dispatch_32 = elk->dispatch_32;
314 iris->dual_src_blend = elk->dual_src_blend;
315 iris->uses_pos_offset = elk->uses_pos_offset;
316 iris->uses_omask = elk->uses_omask;
317 iris->uses_kill = elk->uses_kill;
318 iris->uses_src_depth = elk->uses_src_depth;
319 iris->uses_src_w = elk->uses_src_w;
320 iris->uses_sample_mask = elk->uses_sample_mask;
321 iris->uses_vmask = elk->uses_vmask;
322 iris->pulls_bary = elk->pulls_bary;
323 iris->has_side_effects = elk->has_side_effects;
324
325 iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
326
327 iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
328 }
329
330 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)331 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
332 const struct elk_cs_prog_data *elk)
333 {
334 assert(shader->stage == MESA_SHADER_COMPUTE);
335 struct iris_cs_data *iris = &shader->cs;
336
337 iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
338 iris->push.cross_thread.regs = elk->push.cross_thread.regs;
339 iris->push.cross_thread.size = elk->push.cross_thread.size;
340
341 iris->push.per_thread.dwords = elk->push.per_thread.dwords;
342 iris->push.per_thread.regs = elk->push.per_thread.regs;
343 iris->push.per_thread.size = elk->push.per_thread.size;
344
345 iris->local_size[0] = elk->local_size[0];
346 iris->local_size[1] = elk->local_size[1];
347 iris->local_size[2] = elk->local_size[2];
348 iris->prog_offset[0] = elk->prog_offset[0];
349 iris->prog_offset[1] = elk->prog_offset[1];
350 iris->prog_offset[2] = elk->prog_offset[2];
351
352 iris->uses_barrier = elk->uses_barrier;
353 iris->prog_mask = elk->prog_mask;
354
355 iris->first_param_is_builtin_subgroup_id =
356 elk->base.nr_params > 0 &&
357 elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
358 }
359
360 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)361 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
362 struct iris_vue_data *iris)
363 {
364 memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
365
366 iris->urb_read_length = elk->urb_read_length;
367 iris->cull_distance_mask = elk->cull_distance_mask;
368 iris->urb_entry_size = elk->urb_entry_size;
369 iris->dispatch_mode = elk->dispatch_mode;
370 iris->include_vue_handles = elk->include_vue_handles;
371 }
372
373 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)374 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
375 const struct elk_vs_prog_data *elk)
376 {
377 assert(shader->stage == MESA_SHADER_VERTEX);
378 struct iris_vs_data *iris = &shader->vs;
379
380 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
381
382 iris->uses_vertexid = elk->uses_vertexid;
383 iris->uses_instanceid = elk->uses_instanceid;
384 iris->uses_firstvertex = elk->uses_firstvertex;
385 iris->uses_baseinstance = elk->uses_baseinstance;
386 iris->uses_drawid = elk->uses_drawid;
387 }
388
389 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)390 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
391 const struct elk_tcs_prog_data *elk)
392 {
393 assert(shader->stage == MESA_SHADER_TESS_CTRL);
394 struct iris_tcs_data *iris = &shader->tcs;
395
396 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
397
398 iris->instances = elk->instances;
399 iris->patch_count_threshold = elk->patch_count_threshold;
400 iris->include_primitive_id = elk->include_primitive_id;
401 }
402
403 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)404 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
405 const struct elk_tes_prog_data *elk)
406 {
407 assert(shader->stage == MESA_SHADER_TESS_EVAL);
408 struct iris_tes_data *iris = &shader->tes;
409
410 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
411
412 iris->partitioning = elk->partitioning;
413 iris->output_topology = elk->output_topology;
414 iris->domain = elk->domain;
415 iris->include_primitive_id = elk->include_primitive_id;
416 }
417
418 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)419 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
420 const struct elk_gs_prog_data *elk)
421 {
422 assert(shader->stage == MESA_SHADER_GEOMETRY);
423 struct iris_gs_data *iris = &shader->gs;
424
425 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
426
427 iris->vertices_in = elk->vertices_in;
428 iris->output_vertex_size_hwords = elk->output_vertex_size_hwords;
429 iris->output_topology = elk->output_topology;
430 iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
431 iris->control_data_format = elk->control_data_format;
432 iris->static_vertex_count = elk->static_vertex_count;
433 iris->invocations = elk->invocations;
434 iris->include_primitive_id = elk->include_primitive_id;
435 }
436
437 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)438 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
439 struct elk_stage_prog_data *elk)
440 {
441 STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
442 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
443 shader->ubo_ranges[i].block = elk->ubo_ranges[i].block;
444 shader->ubo_ranges[i].start = elk->ubo_ranges[i].start;
445 shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
446 }
447
448 shader->nr_params = elk->nr_params;
449 shader->total_scratch = elk->total_scratch;
450 shader->total_shared = elk->total_shared;
451 shader->program_size = elk->program_size;
452 shader->const_data_offset = elk->const_data_offset;
453 shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
454 shader->has_ubo_pull = elk->has_ubo_pull;
455 shader->use_alt_mode = elk->use_alt_mode;
456
457 switch (shader->stage) {
458 case MESA_SHADER_FRAGMENT:
459 iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
460 break;
461 case MESA_SHADER_COMPUTE:
462 iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
463 break;
464 case MESA_SHADER_VERTEX:
465 iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
466 break;
467 case MESA_SHADER_TESS_CTRL:
468 iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
469 break;
470 case MESA_SHADER_TESS_EVAL:
471 iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
472 break;
473 case MESA_SHADER_GEOMETRY:
474 iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
475 break;
476 default:
477 unreachable("invalid shader stage");
478 }
479
480 shader->elk_prog_data = elk;
481
482 ralloc_steal(shader, shader->elk_prog_data);
483 ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
484 ralloc_steal(shader->elk_prog_data, elk->param);
485 }
486
487 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)488 iris_finalize_program(struct iris_compiled_shader *shader,
489 uint32_t *streamout,
490 uint32_t *system_values,
491 unsigned num_system_values,
492 unsigned kernel_input_size,
493 unsigned num_cbufs,
494 const struct iris_binding_table *bt)
495 {
496 /* There can be only one. */
497 assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
498
499 shader->streamout = streamout;
500 shader->system_values = system_values;
501 shader->num_system_values = num_system_values;
502 shader->kernel_input_size = kernel_input_size;
503 shader->num_cbufs = num_cbufs;
504 shader->bt = *bt;
505
506 ralloc_steal(shader, shader->streamout);
507 ralloc_steal(shader, shader->system_values);
508 }
509
510 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)511 iris_to_brw_vs_key(const struct iris_screen *screen,
512 const struct iris_vs_prog_key *key)
513 {
514 return (struct brw_vs_prog_key) {
515 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
516 key->vue.base.limit_trig_input_range),
517 };
518 }
519
520 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)521 iris_to_brw_tcs_key(const struct iris_screen *screen,
522 const struct iris_tcs_prog_key *key)
523 {
524 return (struct brw_tcs_prog_key) {
525 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
526 key->vue.base.limit_trig_input_range),
527 ._tes_primitive_mode = key->_tes_primitive_mode,
528 .input_vertices = key->input_vertices,
529 .patch_outputs_written = key->patch_outputs_written,
530 .outputs_written = key->outputs_written,
531 };
532 }
533
534 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)535 iris_to_brw_tes_key(const struct iris_screen *screen,
536 const struct iris_tes_prog_key *key)
537 {
538 return (struct brw_tes_prog_key) {
539 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
540 key->vue.base.limit_trig_input_range),
541 .patch_inputs_read = key->patch_inputs_read,
542 .inputs_read = key->inputs_read,
543 };
544 }
545
546 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)547 iris_to_brw_gs_key(const struct iris_screen *screen,
548 const struct iris_gs_prog_key *key)
549 {
550 return (struct brw_gs_prog_key) {
551 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
552 key->vue.base.limit_trig_input_range),
553 };
554 }
555
556 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)557 iris_to_brw_fs_key(const struct iris_screen *screen,
558 const struct iris_fs_prog_key *key)
559 {
560 return (struct brw_wm_prog_key) {
561 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
562 key->base.limit_trig_input_range),
563 .nr_color_regions = key->nr_color_regions,
564 .flat_shade = key->flat_shade,
565 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
566 .alpha_to_coverage = key->alpha_to_coverage ? BRW_ALWAYS : BRW_NEVER,
567 .clamp_fragment_color = key->clamp_fragment_color,
568 .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
569 .multisample_fbo = key->multisample_fbo ? BRW_ALWAYS : BRW_NEVER,
570 .force_dual_color_blend = key->force_dual_color_blend,
571 .coherent_fb_fetch = key->coherent_fb_fetch,
572 .color_outputs_valid = key->color_outputs_valid,
573 .input_slots_valid = key->input_slots_valid,
574 .ignore_sample_mask_out = !key->multisample_fbo,
575 .null_push_constant_tbimr_workaround =
576 screen->devinfo->needs_null_push_constant_tbimr_workaround,
577 };
578 }
579
580 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)581 iris_to_brw_cs_key(const struct iris_screen *screen,
582 const struct iris_cs_prog_key *key)
583 {
584 return (struct brw_cs_prog_key) {
585 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
586 key->base.limit_trig_input_range),
587 };
588 }
589
590 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)591 iris_to_elk_vs_key(const struct iris_screen *screen,
592 const struct iris_vs_prog_key *key)
593 {
594 return (struct elk_vs_prog_key) {
595 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
596 key->vue.base.limit_trig_input_range),
597
598 /* Don't tell the backend about our clip plane constants, we've
599 * already lowered them in NIR and don't want it doing it again.
600 */
601 .nr_userclip_plane_consts = 0,
602 };
603 }
604
605 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)606 iris_to_elk_tcs_key(const struct iris_screen *screen,
607 const struct iris_tcs_prog_key *key)
608 {
609 return (struct elk_tcs_prog_key) {
610 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
611 key->vue.base.limit_trig_input_range),
612 ._tes_primitive_mode = key->_tes_primitive_mode,
613 .input_vertices = key->input_vertices,
614 .patch_outputs_written = key->patch_outputs_written,
615 .outputs_written = key->outputs_written,
616 .quads_workaround = key->quads_workaround,
617 };
618 }
619
620 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)621 iris_to_elk_tes_key(const struct iris_screen *screen,
622 const struct iris_tes_prog_key *key)
623 {
624 return (struct elk_tes_prog_key) {
625 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
626 key->vue.base.limit_trig_input_range),
627 .patch_inputs_read = key->patch_inputs_read,
628 .inputs_read = key->inputs_read,
629 };
630 }
631
632 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)633 iris_to_elk_gs_key(const struct iris_screen *screen,
634 const struct iris_gs_prog_key *key)
635 {
636 return (struct elk_gs_prog_key) {
637 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
638 key->vue.base.limit_trig_input_range),
639 };
640 }
641
642 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)643 iris_to_elk_fs_key(const struct iris_screen *screen,
644 const struct iris_fs_prog_key *key)
645 {
646 return (struct elk_wm_prog_key) {
647 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
648 key->base.limit_trig_input_range),
649 .nr_color_regions = key->nr_color_regions,
650 .flat_shade = key->flat_shade,
651 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
652 .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
653 .clamp_fragment_color = key->clamp_fragment_color,
654 .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
655 .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
656 .force_dual_color_blend = key->force_dual_color_blend,
657 .coherent_fb_fetch = key->coherent_fb_fetch,
658 .color_outputs_valid = key->color_outputs_valid,
659 .input_slots_valid = key->input_slots_valid,
660 .ignore_sample_mask_out = !key->multisample_fbo,
661 };
662 }
663
664 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)665 iris_to_elk_cs_key(const struct iris_screen *screen,
666 const struct iris_cs_prog_key *key)
667 {
668 return (struct elk_cs_prog_key) {
669 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
670 key->base.limit_trig_input_range),
671 };
672 }
673
674 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)675 upload_state(struct u_upload_mgr *uploader,
676 struct iris_state_ref *ref,
677 unsigned size,
678 unsigned alignment)
679 {
680 void *p = NULL;
681 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
682 return p;
683 }
684
685 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)686 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
687 struct pipe_shader_buffer *buf,
688 struct iris_state_ref *surf_state,
689 isl_surf_usage_flags_t usage)
690 {
691 struct pipe_context *ctx = &ice->ctx;
692 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
693 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
694
695 void *map =
696 upload_state(ice->state.surface_uploader, surf_state,
697 screen->isl_dev.ss.size, 64);
698 if (!unlikely(map)) {
699 surf_state->res = NULL;
700 return;
701 }
702
703 struct iris_resource *res = (void *) buf->buffer;
704 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
705 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
706
707 const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
708
709 isl_buffer_fill_state(&screen->isl_dev, map,
710 .address = res->bo->address + res->offset +
711 buf->buffer_offset,
712 .size_B = buf->buffer_size - res->offset,
713 .format = dataport ? ISL_FORMAT_RAW
714 : ISL_FORMAT_R32G32B32A32_FLOAT,
715 .swizzle = ISL_SWIZZLE_IDENTITY,
716 .stride_B = 1,
717 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
718 }
719
720 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)721 get_aoa_deref_offset(nir_builder *b,
722 nir_deref_instr *deref,
723 unsigned elem_size)
724 {
725 unsigned array_size = elem_size;
726 nir_def *offset = nir_imm_int(b, 0);
727
728 while (deref->deref_type != nir_deref_type_var) {
729 assert(deref->deref_type == nir_deref_type_array);
730
731 /* This level's element size is the previous level's array size */
732 nir_def *index = deref->arr.index.ssa;
733 assert(deref->arr.index.ssa);
734 offset = nir_iadd(b, offset,
735 nir_imul_imm(b, index, array_size));
736
737 deref = nir_deref_instr_parent(deref);
738 assert(glsl_type_is_array(deref->type));
739 array_size *= glsl_get_length(deref->type);
740 }
741
742 /* Accessing an invalid surface index with the dataport can result in a
743 * hang. According to the spec "if the index used to select an individual
744 * element is negative or greater than or equal to the size of the array,
745 * the results of the operation are undefined but may not lead to
746 * termination" -- which is one of the possible outcomes of the hang.
747 * Clamp the index to prevent access outside of the array bounds.
748 */
749 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
750 }
751
752 static void
iris_lower_storage_image_derefs(nir_shader * nir)753 iris_lower_storage_image_derefs(nir_shader *nir)
754 {
755 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
756
757 nir_builder b = nir_builder_create(impl);
758
759 nir_foreach_block(block, impl) {
760 nir_foreach_instr_safe(instr, block) {
761 if (instr->type != nir_instr_type_intrinsic)
762 continue;
763
764 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
765 switch (intrin->intrinsic) {
766 case nir_intrinsic_image_deref_load:
767 case nir_intrinsic_image_deref_store:
768 case nir_intrinsic_image_deref_atomic:
769 case nir_intrinsic_image_deref_atomic_swap:
770 case nir_intrinsic_image_deref_size:
771 case nir_intrinsic_image_deref_samples:
772 case nir_intrinsic_image_deref_load_raw_intel:
773 case nir_intrinsic_image_deref_store_raw_intel: {
774 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
775 nir_variable *var = nir_deref_instr_get_variable(deref);
776
777 b.cursor = nir_before_instr(&intrin->instr);
778 nir_def *index =
779 nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
780 var->data.driver_location);
781 nir_rewrite_image_intrinsic(intrin, index, false);
782 break;
783 }
784
785 default:
786 break;
787 }
788 }
789 }
790 }
791
792 static bool
iris_uses_image_atomic(const nir_shader * shader)793 iris_uses_image_atomic(const nir_shader *shader)
794 {
795 nir_foreach_function_impl(impl, shader) {
796 nir_foreach_block(block, impl) {
797 nir_foreach_instr(instr, block) {
798 if (instr->type != nir_instr_type_intrinsic)
799 continue;
800
801 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
802 switch (intrin->intrinsic) {
803 case nir_intrinsic_image_deref_atomic:
804 case nir_intrinsic_image_deref_atomic_swap:
805 unreachable("Should have been lowered in "
806 "iris_lower_storage_image_derefs");
807
808 case nir_intrinsic_image_atomic:
809 case nir_intrinsic_image_atomic_swap:
810 return true;
811
812 default:
813 break;
814 }
815 }
816 }
817 }
818
819 return false;
820 }
821
822 /**
823 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
824 */
825 static bool
iris_fix_edge_flags(nir_shader * nir)826 iris_fix_edge_flags(nir_shader *nir)
827 {
828 if (nir->info.stage != MESA_SHADER_VERTEX) {
829 nir_shader_preserve_all_metadata(nir);
830 return false;
831 }
832
833 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
834 VARYING_SLOT_EDGE);
835 if (!var) {
836 nir_shader_preserve_all_metadata(nir);
837 return false;
838 }
839
840 var->data.mode = nir_var_shader_temp;
841 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
842 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
843 nir_fixup_deref_modes(nir);
844
845 nir_foreach_function_impl(impl, nir) {
846 nir_metadata_preserve(impl, nir_metadata_control_flow |
847 nir_metadata_live_defs |
848 nir_metadata_loop_analysis);
849 }
850
851 return true;
852 }
853
854 /**
855 * Fix an uncompiled shader's stream output info.
856 *
857 * Core Gallium stores output->register_index as a "slot" number, where
858 * slots are assigned consecutively to all outputs in info->outputs_written.
859 * This naive packing of outputs doesn't work for us - we too have slots,
860 * but the layout is defined by the VUE map, which we won't have until we
861 * compile a specific shader variant. So, we remap these and simply store
862 * VARYING_SLOT_* in our copy's output->register_index fields.
863 *
864 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
865 * components of our VUE header. See brw_vue_map.c for the layout.
866 */
867 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)868 update_so_info(struct pipe_stream_output_info *so_info,
869 uint64_t outputs_written)
870 {
871 uint8_t reverse_map[64] = {};
872 unsigned slot = 0;
873 while (outputs_written) {
874 reverse_map[slot++] = u_bit_scan64(&outputs_written);
875 }
876
877 for (unsigned i = 0; i < so_info->num_outputs; i++) {
878 struct pipe_stream_output *output = &so_info->output[i];
879
880 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
881 output->register_index = reverse_map[output->register_index];
882
883 /* The VUE header contains three scalar fields packed together:
884 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
885 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
886 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
887 */
888 switch (output->register_index) {
889 case VARYING_SLOT_LAYER:
890 assert(output->num_components == 1);
891 output->register_index = VARYING_SLOT_PSIZ;
892 output->start_component = 1;
893 break;
894 case VARYING_SLOT_VIEWPORT:
895 assert(output->num_components == 1);
896 output->register_index = VARYING_SLOT_PSIZ;
897 output->start_component = 2;
898 break;
899 case VARYING_SLOT_PSIZ:
900 assert(output->num_components == 1);
901 output->start_component = 3;
902 break;
903 }
904
905 //info->outputs_written |= 1ull << output->register_index;
906 }
907 }
908
909 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)910 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
911 unsigned offset, unsigned n)
912 {
913 assert(offset % sizeof(uint32_t) == 0);
914
915 for (unsigned i = 0; i < n; ++i)
916 sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
917
918 for (unsigned i = n; i < 4; ++i)
919 sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
920 }
921
922 /**
923 * Associate NIR uniform variables with the prog_data->param[] mechanism
924 * used by the backend. Also, decide which UBOs we'd like to push in an
925 * ideal situation (though the backend can reduce this).
926 */
927 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)928 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
929 void *mem_ctx,
930 nir_shader *nir,
931 unsigned kernel_input_size,
932 uint32_t **out_system_values,
933 unsigned *out_num_system_values,
934 unsigned *out_num_cbufs)
935 {
936 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
937
938 const unsigned IRIS_MAX_SYSTEM_VALUES =
939 PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
940 unsigned *system_values =
941 rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
942 unsigned num_system_values = 0;
943
944 unsigned patch_vert_idx = -1;
945 unsigned tess_outer_default_idx = -1;
946 unsigned tess_inner_default_idx = -1;
947 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
948 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
949 unsigned variable_group_size_idx = -1;
950 unsigned work_dim_idx = -1;
951 memset(ucp_idx, -1, sizeof(ucp_idx));
952 memset(img_idx, -1, sizeof(img_idx));
953
954 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
955
956 nir_builder b = nir_builder_at(nir_before_impl(impl));
957
958 nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
959
960 /* Turn system value intrinsics into uniforms */
961 nir_foreach_block(block, impl) {
962 nir_foreach_instr_safe(instr, block) {
963 if (instr->type != nir_instr_type_intrinsic)
964 continue;
965
966 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
967 nir_def *offset;
968
969 switch (intrin->intrinsic) {
970 case nir_intrinsic_load_base_workgroup_id: {
971 /* GL doesn't have a concept of base workgroup */
972 b.cursor = nir_instr_remove(&intrin->instr);
973 nir_def_rewrite_uses(&intrin->def,
974 nir_imm_zero(&b, 3, 32));
975 continue;
976 }
977 case nir_intrinsic_load_constant: {
978 unsigned load_size = intrin->def.num_components *
979 intrin->def.bit_size / 8;
980 unsigned load_align = intrin->def.bit_size / 8;
981
982 /* This one is special because it reads from the shader constant
983 * data and not cbuf0 which gallium uploads for us.
984 */
985 b.cursor = nir_instr_remove(&intrin->instr);
986
987 nir_def *offset =
988 nir_iadd_imm(&b, intrin->src[0].ssa,
989 nir_intrinsic_base(intrin));
990
991 assert(load_size < b.shader->constant_data_size);
992 unsigned max_offset = b.shader->constant_data_size - load_size;
993 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
994
995 /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
996 * and cannot cross that 4GB boundary, so we can do the address
997 * calculation with 32-bit adds. Also, we can ignore the high
998 * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
999 */
1000 assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
1001
1002 nir_def *const_data_addr =
1003 nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1004
1005 nir_def *data =
1006 nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1007 load_align,
1008 intrin->def.num_components,
1009 intrin->def.bit_size);
1010
1011 nir_def_rewrite_uses(&intrin->def,
1012 data);
1013 continue;
1014 }
1015 case nir_intrinsic_load_user_clip_plane: {
1016 unsigned ucp = nir_intrinsic_ucp_id(intrin);
1017
1018 if (ucp_idx[ucp] == -1) {
1019 ucp_idx[ucp] = num_system_values;
1020 num_system_values += 4;
1021 }
1022
1023 for (int i = 0; i < 4; i++) {
1024 system_values[ucp_idx[ucp] + i] =
1025 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1026 }
1027
1028 b.cursor = nir_before_instr(instr);
1029 offset = nir_imm_int(&b, system_values_start +
1030 ucp_idx[ucp] * sizeof(uint32_t));
1031 break;
1032 }
1033 case nir_intrinsic_load_patch_vertices_in:
1034 if (patch_vert_idx == -1)
1035 patch_vert_idx = num_system_values++;
1036
1037 system_values[patch_vert_idx] =
1038 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1039
1040 b.cursor = nir_before_instr(instr);
1041 offset = nir_imm_int(&b, system_values_start +
1042 patch_vert_idx * sizeof(uint32_t));
1043 break;
1044 case nir_intrinsic_load_tess_level_outer_default:
1045 if (tess_outer_default_idx == -1) {
1046 tess_outer_default_idx = num_system_values;
1047 num_system_values += 4;
1048 }
1049
1050 for (int i = 0; i < 4; i++) {
1051 system_values[tess_outer_default_idx + i] =
1052 BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1053 }
1054
1055 b.cursor = nir_before_instr(instr);
1056 offset = nir_imm_int(&b, system_values_start +
1057 tess_outer_default_idx * sizeof(uint32_t));
1058 break;
1059 case nir_intrinsic_load_tess_level_inner_default:
1060 if (tess_inner_default_idx == -1) {
1061 tess_inner_default_idx = num_system_values;
1062 num_system_values += 2;
1063 }
1064
1065 for (int i = 0; i < 2; i++) {
1066 system_values[tess_inner_default_idx + i] =
1067 BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1068 }
1069
1070 b.cursor = nir_before_instr(instr);
1071 offset = nir_imm_int(&b, system_values_start +
1072 tess_inner_default_idx * sizeof(uint32_t));
1073 break;
1074 case nir_intrinsic_image_deref_load_param_intel: {
1075 assert(devinfo->ver < 9);
1076 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1077 nir_variable *var = nir_deref_instr_get_variable(deref);
1078
1079 if (img_idx[var->data.binding] == -1) {
1080 /* GL only allows arrays of arrays of images. */
1081 assert(glsl_type_is_image(glsl_without_array(var->type)));
1082 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1083
1084 for (int i = 0; i < num_images; i++) {
1085 const unsigned img = var->data.binding + i;
1086
1087 img_idx[img] = num_system_values;
1088 num_system_values += ISL_IMAGE_PARAM_SIZE;
1089
1090 uint32_t *img_sv = &system_values[img_idx[img]];
1091
1092 setup_vec4_image_sysval(
1093 img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1094 offsetof(struct isl_image_param, offset), 2);
1095 setup_vec4_image_sysval(
1096 img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1097 offsetof(struct isl_image_param, size), 3);
1098 setup_vec4_image_sysval(
1099 img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1100 offsetof(struct isl_image_param, stride), 4);
1101 setup_vec4_image_sysval(
1102 img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1103 offsetof(struct isl_image_param, tiling), 3);
1104 setup_vec4_image_sysval(
1105 img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1106 offsetof(struct isl_image_param, swizzling), 2);
1107 }
1108 }
1109
1110 b.cursor = nir_before_instr(instr);
1111 offset = nir_iadd_imm(&b,
1112 get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1113 system_values_start +
1114 img_idx[var->data.binding] * 4 +
1115 nir_intrinsic_base(intrin) * 16);
1116 break;
1117 }
1118 case nir_intrinsic_load_workgroup_size: {
1119 assert(nir->info.workgroup_size_variable);
1120 if (variable_group_size_idx == -1) {
1121 variable_group_size_idx = num_system_values;
1122 num_system_values += 3;
1123 for (int i = 0; i < 3; i++) {
1124 system_values[variable_group_size_idx + i] =
1125 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1126 }
1127 }
1128
1129 b.cursor = nir_before_instr(instr);
1130 offset = nir_imm_int(&b, system_values_start +
1131 variable_group_size_idx * sizeof(uint32_t));
1132 break;
1133 }
1134 case nir_intrinsic_load_work_dim: {
1135 if (work_dim_idx == -1) {
1136 work_dim_idx = num_system_values++;
1137 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1138 }
1139 b.cursor = nir_before_instr(instr);
1140 offset = nir_imm_int(&b, system_values_start +
1141 work_dim_idx * sizeof(uint32_t));
1142 break;
1143 }
1144 case nir_intrinsic_load_kernel_input: {
1145 assert(nir_intrinsic_base(intrin) +
1146 nir_intrinsic_range(intrin) <= kernel_input_size);
1147 b.cursor = nir_before_instr(instr);
1148 offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1149 nir_intrinsic_base(intrin));
1150 break;
1151 }
1152 default:
1153 continue;
1154 }
1155
1156 nir_def *load =
1157 nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1158 temp_ubo_name, offset,
1159 .align_mul = 4,
1160 .align_offset = 0,
1161 .range_base = 0,
1162 .range = ~0);
1163
1164 nir_def_rewrite_uses(&intrin->def,
1165 load);
1166 nir_instr_remove(instr);
1167 }
1168 }
1169
1170 nir_validate_shader(nir, "before remapping");
1171
1172 /* Uniforms are stored in constant buffer 0, the
1173 * user-facing UBOs are indexed by one. So if any constant buffer is
1174 * needed, the constant buffer 0 will be needed, so account for it.
1175 */
1176 unsigned num_cbufs = nir->info.num_ubos;
1177 if (num_cbufs || nir->num_uniforms)
1178 num_cbufs++;
1179
1180 /* Place the new params in a new cbuf. */
1181 if (num_system_values > 0 || kernel_input_size > 0) {
1182 unsigned sysval_cbuf_index = num_cbufs;
1183 num_cbufs++;
1184
1185 system_values = reralloc(mem_ctx, system_values, unsigned,
1186 num_system_values);
1187
1188 nir_foreach_block(block, impl) {
1189 nir_foreach_instr_safe(instr, block) {
1190 if (instr->type != nir_instr_type_intrinsic)
1191 continue;
1192
1193 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1194
1195 if (load->intrinsic != nir_intrinsic_load_ubo)
1196 continue;
1197
1198 b.cursor = nir_before_instr(instr);
1199
1200 if (load->src[0].ssa == temp_ubo_name) {
1201 nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1202 nir_src_rewrite(&load->src[0], imm);
1203 }
1204 }
1205 }
1206
1207 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1208 nir_opt_constant_folding(nir);
1209 } else {
1210 ralloc_free(system_values);
1211 system_values = NULL;
1212 }
1213
1214 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1215 nir_validate_shader(nir, "after remap");
1216
1217 /* We don't use params[] but gallium leaves num_uniforms set. We use this
1218 * to detect when cbuf0 exists but we don't need it anymore when we get
1219 * here. Instead, zero it out so that the back-end doesn't get confused
1220 * when nr_params * 4 != num_uniforms != nr_params * 4.
1221 */
1222 nir->num_uniforms = 0;
1223
1224 *out_system_values = system_values;
1225 *out_num_system_values = num_system_values;
1226 *out_num_cbufs = num_cbufs;
1227 }
1228
1229 static const char *surface_group_names[] = {
1230 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
1231 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1232 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
1233 [IRIS_SURFACE_GROUP_TEXTURE_LOW64] = "texture",
1234 [IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = "texture",
1235 [IRIS_SURFACE_GROUP_UBO] = "ubo",
1236 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
1237 [IRIS_SURFACE_GROUP_IMAGE] = "image",
1238 };
1239
1240 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1241 iris_print_binding_table(FILE *fp, const char *name,
1242 const struct iris_binding_table *bt)
1243 {
1244 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1245
1246 uint32_t total = 0;
1247 uint32_t compacted = 0;
1248
1249 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1250 uint32_t size = bt->sizes[i];
1251 total += size;
1252 if (size)
1253 compacted += util_bitcount64(bt->used_mask[i]);
1254 }
1255
1256 if (total == 0) {
1257 fprintf(fp, "Binding table for %s is empty\n\n", name);
1258 return;
1259 }
1260
1261 if (total != compacted) {
1262 fprintf(fp, "Binding table for %s "
1263 "(compacted to %u entries from %u entries)\n",
1264 name, compacted, total);
1265 } else {
1266 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1267 }
1268
1269 uint32_t entry = 0;
1270 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1271 uint64_t mask = bt->used_mask[i];
1272 while (mask) {
1273 int index = u_bit_scan64(&mask);
1274 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
1275 }
1276 }
1277 fprintf(fp, "\n");
1278 }
1279
1280 enum {
1281 /* Max elements in a surface group. */
1282 SURFACE_GROUP_MAX_ELEMENTS = 64,
1283 };
1284
1285 /**
1286 * Map a <group, index> pair to a binding table index.
1287 *
1288 * For example: <UBO, 5> => binding table index 12
1289 */
1290 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1291 iris_group_index_to_bti(const struct iris_binding_table *bt,
1292 enum iris_surface_group group, uint32_t index)
1293 {
1294 assert(index < bt->sizes[group]);
1295 uint64_t mask = bt->used_mask[group];
1296 uint64_t bit = 1ull << index;
1297 if (bit & mask) {
1298 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1299 } else {
1300 return IRIS_SURFACE_NOT_USED;
1301 }
1302 }
1303
1304 /**
1305 * Map a binding table index back to a <group, index> pair.
1306 *
1307 * For example: binding table index 12 => <UBO, 5>
1308 */
1309 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1310 iris_bti_to_group_index(const struct iris_binding_table *bt,
1311 enum iris_surface_group group, uint32_t bti)
1312 {
1313 uint64_t used_mask = bt->used_mask[group];
1314 assert(bti >= bt->offsets[group]);
1315
1316 uint32_t c = bti - bt->offsets[group];
1317 while (used_mask) {
1318 int i = u_bit_scan64(&used_mask);
1319 if (c == 0)
1320 return i;
1321 c--;
1322 }
1323
1324 return IRIS_SURFACE_NOT_USED;
1325 }
1326
1327 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1328 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1329 nir_instr *instr, nir_src *src,
1330 enum iris_surface_group group)
1331 {
1332 assert(bt->sizes[group] > 0);
1333
1334 b->cursor = nir_before_instr(instr);
1335 nir_def *bti;
1336 if (nir_src_is_const(*src)) {
1337 uint32_t index = nir_src_as_uint(*src);
1338 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1339 src->ssa->bit_size);
1340 } else {
1341 /* Indirect usage makes all the surfaces of the group to be available,
1342 * so we can just add the base.
1343 */
1344 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1345 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1346 }
1347 nir_src_rewrite(src, bti);
1348 }
1349
1350 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1351 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1352 enum iris_surface_group group)
1353 {
1354 assert(bt->sizes[group] > 0);
1355
1356 if (nir_src_is_const(*src)) {
1357 uint64_t index = nir_src_as_uint(*src);
1358 assert(index < bt->sizes[group]);
1359 bt->used_mask[group] |= 1ull << index;
1360 } else {
1361 /* There's an indirect usage, we need all the surfaces. */
1362 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1363 }
1364 }
1365
1366 static bool
skip_compacting_binding_tables(void)1367 skip_compacting_binding_tables(void)
1368 {
1369 static int skip = -1;
1370 if (skip < 0)
1371 skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1372 return skip;
1373 }
1374
1375 /**
1376 * Set up the binding table indices and apply to the shader.
1377 */
1378 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)1379 iris_setup_binding_table(const struct intel_device_info *devinfo,
1380 struct nir_shader *nir,
1381 struct iris_binding_table *bt,
1382 unsigned num_render_targets,
1383 unsigned num_system_values,
1384 unsigned num_cbufs)
1385 {
1386 const struct shader_info *info = &nir->info;
1387
1388 memset(bt, 0, sizeof(*bt));
1389
1390 /* Set the sizes for each surface group. For some groups, we already know
1391 * upfront how many will be used, so mark them.
1392 */
1393 if (info->stage == MESA_SHADER_FRAGMENT) {
1394 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1395 /* All render targets used. */
1396 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1397 BITFIELD64_MASK(num_render_targets);
1398
1399 /* Setup render target read surface group in order to support non-coherent
1400 * framebuffer fetch on Gfx8
1401 */
1402 if (devinfo->ver == 8 && info->outputs_read) {
1403 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1404 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1405 BITFIELD64_MASK(num_render_targets);
1406 }
1407 } else if (info->stage == MESA_SHADER_COMPUTE) {
1408 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1409 }
1410
1411 assert(ARRAY_SIZE(info->textures_used) >= 4);
1412 int max_tex = BITSET_LAST_BIT(info->textures_used);
1413 assert(max_tex <= 128);
1414 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1415 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1416 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1417 info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1418 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1419 info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1420 bt->samplers_used_mask = info->samplers_used[0];
1421
1422 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1423
1424 /* Allocate an extra slot in the UBO section for NIR constants.
1425 * Binding table compaction will remove it if unnecessary.
1426 *
1427 * We don't include them in iris_compiled_shader::num_cbufs because
1428 * they are uploaded separately from shs->constbuf[], but from a shader
1429 * point of view, they're another UBO (at the end of the section).
1430 */
1431 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1432
1433 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1434
1435 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1436 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1437
1438 /* Mark surfaces used for the cases we don't have the information available
1439 * upfront.
1440 */
1441 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1442 nir_foreach_block (block, impl) {
1443 nir_foreach_instr (instr, block) {
1444 if (instr->type != nir_instr_type_intrinsic)
1445 continue;
1446
1447 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1448 switch (intrin->intrinsic) {
1449 case nir_intrinsic_load_num_workgroups:
1450 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1451 break;
1452
1453 case nir_intrinsic_load_output:
1454 if (devinfo->ver == 8) {
1455 mark_used_with_src(bt, &intrin->src[0],
1456 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1457 }
1458 break;
1459
1460 case nir_intrinsic_image_size:
1461 case nir_intrinsic_image_load:
1462 case nir_intrinsic_image_store:
1463 case nir_intrinsic_image_atomic:
1464 case nir_intrinsic_image_atomic_swap:
1465 case nir_intrinsic_image_load_raw_intel:
1466 case nir_intrinsic_image_store_raw_intel:
1467 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1468 break;
1469
1470 case nir_intrinsic_load_ubo:
1471 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1472 break;
1473
1474 case nir_intrinsic_store_ssbo:
1475 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1476 break;
1477
1478 case nir_intrinsic_get_ssbo_size:
1479 case nir_intrinsic_ssbo_atomic:
1480 case nir_intrinsic_ssbo_atomic_swap:
1481 case nir_intrinsic_load_ssbo:
1482 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1483 break;
1484
1485 default:
1486 break;
1487 }
1488 }
1489 }
1490
1491 /* When disable we just mark everything as used. */
1492 if (unlikely(skip_compacting_binding_tables())) {
1493 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1494 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1495 }
1496
1497 /* Calculate the offsets and the binding table size based on the used
1498 * surfaces. After this point, the functions to go between "group indices"
1499 * and binding table indices can be used.
1500 */
1501 uint32_t next = 0;
1502 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1503 if (bt->used_mask[i] != 0) {
1504 bt->offsets[i] = next;
1505 next += util_bitcount64(bt->used_mask[i]);
1506 }
1507 }
1508 bt->size_bytes = next * 4;
1509
1510 if (INTEL_DEBUG(DEBUG_BT)) {
1511 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1512 }
1513
1514 /* Apply the binding table indices. The backend compiler is not expected
1515 * to change those, as we haven't set any of the *_start entries in brw
1516 * binding_table.
1517 */
1518 nir_builder b = nir_builder_create(impl);
1519
1520 nir_foreach_block (block, impl) {
1521 nir_foreach_instr (instr, block) {
1522 if (instr->type == nir_instr_type_tex) {
1523 nir_tex_instr *tex = nir_instr_as_tex(instr);
1524 if (tex->texture_index < 64) {
1525 tex->texture_index =
1526 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1527 tex->texture_index);
1528 } else {
1529 tex->texture_index =
1530 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1531 tex->texture_index - 64);
1532 }
1533 continue;
1534 }
1535
1536 if (instr->type != nir_instr_type_intrinsic)
1537 continue;
1538
1539 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1540 switch (intrin->intrinsic) {
1541 case nir_intrinsic_image_size:
1542 case nir_intrinsic_image_load:
1543 case nir_intrinsic_image_store:
1544 case nir_intrinsic_image_atomic:
1545 case nir_intrinsic_image_atomic_swap:
1546 case nir_intrinsic_image_load_raw_intel:
1547 case nir_intrinsic_image_store_raw_intel:
1548 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1549 IRIS_SURFACE_GROUP_IMAGE);
1550 break;
1551
1552 case nir_intrinsic_load_ubo:
1553 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1554 IRIS_SURFACE_GROUP_UBO);
1555 break;
1556
1557 case nir_intrinsic_store_ssbo:
1558 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1559 IRIS_SURFACE_GROUP_SSBO);
1560 break;
1561
1562 case nir_intrinsic_load_output:
1563 if (devinfo->ver == 8) {
1564 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1565 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1566 }
1567 break;
1568
1569 case nir_intrinsic_get_ssbo_size:
1570 case nir_intrinsic_ssbo_atomic:
1571 case nir_intrinsic_ssbo_atomic_swap:
1572 case nir_intrinsic_load_ssbo:
1573 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1574 IRIS_SURFACE_GROUP_SSBO);
1575 break;
1576
1577 default:
1578 break;
1579 }
1580 }
1581 }
1582 }
1583
1584 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1585 iris_debug_recompile_brw(struct iris_screen *screen,
1586 struct util_debug_callback *dbg,
1587 struct iris_uncompiled_shader *ish,
1588 const struct brw_base_prog_key *key)
1589 {
1590 if (!ish || list_is_empty(&ish->variants)
1591 || list_is_singular(&ish->variants))
1592 return;
1593
1594 const struct brw_compiler *c = screen->brw;
1595 const struct shader_info *info = &ish->nir->info;
1596
1597 brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1598 _mesa_shader_stage_to_string(info->stage),
1599 info->name ? info->name : "(no identifier)",
1600 info->label ? info->label : "");
1601
1602 struct iris_compiled_shader *shader =
1603 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1604 const void *old_iris_key = &shader->key;
1605
1606 union brw_any_prog_key old_key;
1607
1608 switch (info->stage) {
1609 case MESA_SHADER_VERTEX:
1610 old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1611 break;
1612 case MESA_SHADER_TESS_CTRL:
1613 old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1614 break;
1615 case MESA_SHADER_TESS_EVAL:
1616 old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1617 break;
1618 case MESA_SHADER_GEOMETRY:
1619 old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1620 break;
1621 case MESA_SHADER_FRAGMENT:
1622 old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1623 break;
1624 case MESA_SHADER_COMPUTE:
1625 old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1626 break;
1627 default:
1628 unreachable("invalid shader stage");
1629 }
1630
1631 brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1632 }
1633
1634 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1635 iris_debug_recompile_elk(struct iris_screen *screen,
1636 struct util_debug_callback *dbg,
1637 struct iris_uncompiled_shader *ish,
1638 const struct elk_base_prog_key *key)
1639 {
1640 if (!ish || list_is_empty(&ish->variants)
1641 || list_is_singular(&ish->variants))
1642 return;
1643
1644 const struct elk_compiler *c = screen->elk;
1645 const struct shader_info *info = &ish->nir->info;
1646
1647 elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1648 _mesa_shader_stage_to_string(info->stage),
1649 info->name ? info->name : "(no identifier)",
1650 info->label ? info->label : "");
1651
1652 struct iris_compiled_shader *shader =
1653 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1654 const void *old_iris_key = &shader->key;
1655
1656 union elk_any_prog_key old_key;
1657
1658 switch (info->stage) {
1659 case MESA_SHADER_VERTEX:
1660 old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1661 break;
1662 case MESA_SHADER_TESS_CTRL:
1663 old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1664 break;
1665 case MESA_SHADER_TESS_EVAL:
1666 old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1667 break;
1668 case MESA_SHADER_GEOMETRY:
1669 old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1670 break;
1671 case MESA_SHADER_FRAGMENT:
1672 old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1673 break;
1674 case MESA_SHADER_COMPUTE:
1675 old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1676 break;
1677 default:
1678 unreachable("invalid shader stage");
1679 }
1680
1681 elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1682 }
1683
1684
1685 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1686 check_urb_size(struct iris_context *ice,
1687 unsigned needed_size,
1688 gl_shader_stage stage)
1689 {
1690 unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1691
1692 /* If the last URB allocation wasn't large enough for our needs,
1693 * flag it as needing to be reconfigured. Otherwise, we can use
1694 * the existing config. However, if the URB is constrained, and
1695 * we can shrink our size for this stage, we may be able to gain
1696 * extra concurrency by reconfiguring it to be smaller. Do so.
1697 */
1698 if (last_allocated_size < needed_size ||
1699 (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1700 ice->state.dirty |= IRIS_DIRTY_URB;
1701 }
1702 }
1703
1704 /**
1705 * Get the shader for the last enabled geometry stage.
1706 *
1707 * This stage is the one which will feed stream output and the rasterizer.
1708 */
1709 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1710 last_vue_stage(struct iris_context *ice)
1711 {
1712 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1713 return MESA_SHADER_GEOMETRY;
1714
1715 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1716 return MESA_SHADER_TESS_EVAL;
1717
1718 return MESA_SHADER_VERTEX;
1719 }
1720
1721 /**
1722 * \param added Set to \c true if the variant was added to the list (i.e., a
1723 * variant matching \c key was not found). Set to \c false
1724 * otherwise.
1725 */
1726 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1727 find_or_add_variant(const struct iris_screen *screen,
1728 struct iris_uncompiled_shader *ish,
1729 enum iris_program_cache_id cache_id,
1730 const void *key, unsigned key_size,
1731 bool *added)
1732 {
1733 struct list_head *start = ish->variants.next;
1734
1735 *added = false;
1736
1737 if (screen->precompile) {
1738 /* Check the first list entry. There will always be at least one
1739 * variant in the list (most likely the precompile variant), and
1740 * other contexts only append new variants, so we can safely check
1741 * it without locking, saving that cost in the common case.
1742 */
1743 struct iris_compiled_shader *first =
1744 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1745
1746 if (memcmp(&first->key, key, key_size) == 0) {
1747 util_queue_fence_wait(&first->ready);
1748 return first;
1749 }
1750
1751 /* Skip this one in the loop below */
1752 start = first->link.next;
1753 }
1754
1755 struct iris_compiled_shader *variant = NULL;
1756
1757 /* If it doesn't match, we have to walk the list; other contexts may be
1758 * concurrently appending shaders to it, so we need to lock here.
1759 */
1760 simple_mtx_lock(&ish->lock);
1761
1762 list_for_each_entry_from(struct iris_compiled_shader, v, start,
1763 &ish->variants, link) {
1764 if (memcmp(&v->key, key, key_size) == 0) {
1765 variant = v;
1766 break;
1767 }
1768 }
1769
1770 gl_shader_stage stage = ish->nir->info.stage;
1771
1772 if (variant == NULL) {
1773 variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1774 key_size, key);
1775
1776 /* Append our new variant to the shader's variant list. */
1777 list_addtail(&variant->link, &ish->variants);
1778 *added = true;
1779
1780 simple_mtx_unlock(&ish->lock);
1781 } else {
1782 simple_mtx_unlock(&ish->lock);
1783
1784 util_queue_fence_wait(&variant->ready);
1785 }
1786
1787 assert(stage == variant->stage);
1788 return variant;
1789 }
1790
1791 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1792 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1793 UNUSED int thread_index)
1794 {
1795 free(_job);
1796 }
1797
1798 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1799 iris_schedule_compile(struct iris_screen *screen,
1800 struct util_queue_fence *ready_fence,
1801 struct util_debug_callback *dbg,
1802 struct iris_threaded_compile_job *job,
1803 util_queue_execute_func execute)
1804
1805 {
1806 struct util_async_debug_callback async_debug;
1807
1808 if (dbg) {
1809 u_async_debug_init(&async_debug);
1810 job->dbg = &async_debug.base;
1811 }
1812
1813 util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1814 iris_threaded_compile_job_delete, 0);
1815
1816 if (screen->driconf.sync_compile || dbg)
1817 util_queue_fence_wait(ready_fence);
1818
1819 if (dbg) {
1820 u_async_debug_drain(&async_debug, dbg);
1821 u_async_debug_cleanup(&async_debug);
1822 }
1823 }
1824
1825 /**
1826 * Compile a vertex shader, and upload the assembly.
1827 */
1828 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1829 iris_compile_vs(struct iris_screen *screen,
1830 struct u_upload_mgr *uploader,
1831 struct util_debug_callback *dbg,
1832 struct iris_uncompiled_shader *ish,
1833 struct iris_compiled_shader *shader)
1834 {
1835 const struct intel_device_info *devinfo = screen->devinfo;
1836 void *mem_ctx = ralloc_context(NULL);
1837 uint32_t *system_values;
1838 unsigned num_system_values;
1839 unsigned num_cbufs;
1840
1841 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1842 const struct iris_vs_prog_key *const key = &shader->key.vs;
1843
1844 if (key->vue.nr_userclip_plane_consts) {
1845 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1846 /* Check if variables were found. */
1847 if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1848 true, false, NULL)) {
1849 nir_lower_io_to_temporaries(nir, impl, true, false);
1850 nir_lower_global_vars_to_local(nir);
1851 nir_lower_vars_to_ssa(nir);
1852 nir_shader_gather_info(nir, impl);
1853 }
1854 }
1855
1856 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1857 &num_system_values, &num_cbufs);
1858
1859 struct iris_binding_table bt;
1860 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1861 num_system_values, num_cbufs);
1862
1863 const char *error;
1864 const unsigned *program;
1865 if (screen->brw) {
1866 struct brw_vs_prog_data *brw_prog_data =
1867 rzalloc(mem_ctx, struct brw_vs_prog_data);
1868
1869 brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1870
1871 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1872
1873 brw_compute_vue_map(devinfo,
1874 &brw_prog_data->base.vue_map, nir->info.outputs_written,
1875 nir->info.separate_shader, /* pos_slots */ 1);
1876
1877 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1878
1879 struct brw_compile_vs_params params = {
1880 .base = {
1881 .mem_ctx = mem_ctx,
1882 .nir = nir,
1883 .log_data = dbg,
1884 .source_hash = ish->source_hash,
1885 },
1886 .key = &brw_key,
1887 .prog_data = brw_prog_data,
1888 };
1889
1890 program = brw_compile_vs(screen->brw, ¶ms);
1891 error = params.base.error_str;
1892 if (program) {
1893 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1894 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1895 }
1896 } else {
1897 struct elk_vs_prog_data *elk_prog_data =
1898 rzalloc(mem_ctx, struct elk_vs_prog_data);
1899
1900 elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1901
1902 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1903
1904 elk_compute_vue_map(devinfo,
1905 &elk_prog_data->base.vue_map, nir->info.outputs_written,
1906 nir->info.separate_shader, /* pos_slots */ 1);
1907
1908 struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1909
1910 struct elk_compile_vs_params params = {
1911 .base = {
1912 .mem_ctx = mem_ctx,
1913 .nir = nir,
1914 .log_data = dbg,
1915 .source_hash = ish->source_hash,
1916 },
1917 .key = &elk_key,
1918 .prog_data = elk_prog_data,
1919 };
1920
1921 program = elk_compile_vs(screen->elk, ¶ms);
1922 error = params.base.error_str;
1923 if (program) {
1924 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1925 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1926 }
1927 }
1928
1929 if (program == NULL) {
1930 dbg_printf("Failed to compile vertex shader: %s\n", error);
1931 ralloc_free(mem_ctx);
1932
1933 shader->compilation_failed = true;
1934 util_queue_fence_signal(&shader->ready);
1935
1936 return;
1937 }
1938
1939 shader->compilation_failed = false;
1940
1941 uint32_t *so_decls =
1942 screen->vtbl.create_so_decl_list(&ish->stream_output,
1943 &iris_vue_data(shader)->vue_map);
1944
1945 iris_finalize_program(shader, so_decls,
1946 system_values, num_system_values, 0, num_cbufs, &bt);
1947
1948 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1949 sizeof(*key), key, program);
1950
1951 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1952
1953 ralloc_free(mem_ctx);
1954 }
1955
1956 /**
1957 * Update the current vertex shader variant.
1958 *
1959 * Fill out the key, look in the cache, compile and bind if needed.
1960 */
1961 static void
iris_update_compiled_vs(struct iris_context * ice)1962 iris_update_compiled_vs(struct iris_context *ice)
1963 {
1964 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1965 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1966 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1967 struct iris_uncompiled_shader *ish =
1968 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1969
1970 struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1971 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1972
1973 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1974 bool added;
1975 struct iris_compiled_shader *shader =
1976 find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1977
1978 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1979 &key, sizeof(key))) {
1980 iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1981 }
1982
1983 if (shader->compilation_failed)
1984 shader = NULL;
1985
1986 if (old != shader) {
1987 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1988 shader);
1989 ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1990 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1991 IRIS_STAGE_DIRTY_BINDINGS_VS |
1992 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1993 shs->sysvals_need_upload = true;
1994
1995 unsigned urb_entry_size = shader ?
1996 iris_vue_data(shader)->urb_entry_size : 0;
1997 check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1998 }
1999 }
2000
2001 /**
2002 * Get the shader_info for a given stage, or NULL if the stage is disabled.
2003 */
2004 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2005 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2006 {
2007 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2008
2009 if (!ish)
2010 return NULL;
2011
2012 const nir_shader *nir = ish->nir;
2013 return &nir->info;
2014 }
2015
2016 /**
2017 * Get the union of TCS output and TES input slots.
2018 *
2019 * TCS and TES need to agree on a common URB entry layout. In particular,
2020 * the data for all patch vertices is stored in a single URB entry (unlike
2021 * GS which has one entry per input vertex). This means that per-vertex
2022 * array indexing needs a stride.
2023 *
2024 * SSO requires locations to match, but doesn't require the number of
2025 * outputs/inputs to match (in fact, the TCS often has extra outputs).
2026 * So, we need to take the extra step of unifying these on the fly.
2027 */
2028 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2029 get_unified_tess_slots(const struct iris_context *ice,
2030 uint64_t *per_vertex_slots,
2031 uint32_t *per_patch_slots)
2032 {
2033 const struct shader_info *tcs =
2034 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2035 const struct shader_info *tes =
2036 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2037
2038 *per_vertex_slots = tes->inputs_read;
2039 *per_patch_slots = tes->patch_inputs_read;
2040
2041 if (tcs) {
2042 *per_vertex_slots |= tcs->outputs_written;
2043 *per_patch_slots |= tcs->patch_outputs_written;
2044 }
2045 }
2046
2047 /**
2048 * Compile a tessellation control shader, and upload the assembly.
2049 */
2050 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2051 iris_compile_tcs(struct iris_screen *screen,
2052 struct hash_table *passthrough_ht,
2053 struct u_upload_mgr *uploader,
2054 struct util_debug_callback *dbg,
2055 struct iris_uncompiled_shader *ish,
2056 struct iris_compiled_shader *shader)
2057 {
2058 void *mem_ctx = ralloc_context(NULL);
2059 const struct intel_device_info *devinfo = screen->devinfo;
2060 uint32_t *system_values = NULL;
2061 unsigned num_system_values = 0;
2062 unsigned num_cbufs = 0;
2063
2064 nir_shader *nir;
2065
2066 struct iris_binding_table bt;
2067
2068 const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2069 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2070 struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2071 uint32_t source_hash;
2072
2073 if (ish) {
2074 nir = nir_shader_clone(mem_ctx, ish->nir);
2075 source_hash = ish->source_hash;
2076 } else {
2077 if (screen->brw) {
2078 nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2079 } else {
2080 assert(screen->elk);
2081 nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2082 }
2083 source_hash = *(uint32_t*)nir->info.source_blake3;
2084 }
2085
2086 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2087 &num_system_values, &num_cbufs);
2088 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2089 num_system_values, num_cbufs);
2090
2091 const char *error = NULL;
2092 const unsigned *program;
2093 if (screen->brw) {
2094 struct brw_tcs_prog_data *brw_prog_data =
2095 rzalloc(mem_ctx, struct brw_tcs_prog_data);
2096 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2097
2098 struct brw_compile_tcs_params params = {
2099 .base = {
2100 .mem_ctx = mem_ctx,
2101 .nir = nir,
2102 .log_data = dbg,
2103 .source_hash = source_hash,
2104 },
2105 .key = &brw_key,
2106 .prog_data = brw_prog_data,
2107 };
2108
2109 program = brw_compile_tcs(screen->brw, ¶ms);
2110 error = params.base.error_str;
2111
2112 if (program) {
2113 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2114 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2115 }
2116 } else {
2117 assert(screen->elk);
2118 struct elk_tcs_prog_data *elk_prog_data =
2119 rzalloc(mem_ctx, struct elk_tcs_prog_data);
2120 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2121
2122 struct elk_compile_tcs_params params = {
2123 .base = {
2124 .mem_ctx = mem_ctx,
2125 .nir = nir,
2126 .log_data = dbg,
2127 .source_hash = source_hash,
2128 },
2129 .key = &elk_key,
2130 .prog_data = elk_prog_data,
2131 };
2132
2133 program = elk_compile_tcs(screen->elk, ¶ms);
2134 error = params.base.error_str;
2135
2136 if (program) {
2137 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2138 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2139 }
2140 }
2141
2142 if (program == NULL) {
2143 dbg_printf("Failed to compile control shader: %s\n", error);
2144 ralloc_free(mem_ctx);
2145
2146 shader->compilation_failed = true;
2147 util_queue_fence_signal(&shader->ready);
2148
2149 return;
2150 }
2151
2152 shader->compilation_failed = false;
2153
2154 iris_finalize_program(shader, NULL, system_values,
2155 num_system_values, 0, num_cbufs, &bt);
2156
2157 iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2158 IRIS_CACHE_TCS, sizeof(*key), key, program);
2159
2160 if (ish)
2161 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2162
2163 ralloc_free(mem_ctx);
2164 }
2165
2166 /**
2167 * Update the current tessellation control shader variant.
2168 *
2169 * Fill out the key, look in the cache, compile and bind if needed.
2170 */
2171 static void
iris_update_compiled_tcs(struct iris_context * ice)2172 iris_update_compiled_tcs(struct iris_context *ice)
2173 {
2174 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2175 struct iris_uncompiled_shader *tcs =
2176 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2177 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2178 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2179 const struct intel_device_info *devinfo = screen->devinfo;
2180
2181 const struct shader_info *tes_info =
2182 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2183 struct iris_tcs_prog_key key = {
2184 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2185 ._tes_primitive_mode = tes_info->tess._primitive_mode,
2186 .input_vertices =
2187 !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2188 .quads_workaround = devinfo->ver < 9 &&
2189 tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2190 tes_info->tess.spacing == TESS_SPACING_EQUAL,
2191 };
2192 get_unified_tess_slots(ice, &key.outputs_written,
2193 &key.patch_outputs_written);
2194 screen->vtbl.populate_tcs_key(ice, &key);
2195
2196 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2197 struct iris_compiled_shader *shader;
2198 bool added = false;
2199
2200 if (tcs != NULL) {
2201 shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2202 sizeof(key), &added);
2203 } else {
2204 /* Look for and possibly create a passthrough TCS */
2205 shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2206
2207
2208 if (shader == NULL) {
2209 shader = iris_create_shader_variant(screen, ice->shaders.cache,
2210 MESA_SHADER_TESS_CTRL,
2211 IRIS_CACHE_TCS, sizeof(key), &key);
2212 added = true;
2213 }
2214
2215 }
2216
2217 /* If the shader was not found in (whichever cache), call iris_compile_tcs
2218 * if either ish is NULL or the shader could not be found in the disk
2219 * cache.
2220 */
2221 if (added &&
2222 (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2223 &key, sizeof(key)))) {
2224 iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2225 shader);
2226 }
2227
2228 if (shader->compilation_failed)
2229 shader = NULL;
2230
2231 if (old != shader) {
2232 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2233 shader);
2234 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2235 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2236 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2237 shs->sysvals_need_upload = true;
2238
2239 unsigned urb_entry_size = shader ?
2240 iris_vue_data(shader)->urb_entry_size : 0;
2241 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2242 }
2243 }
2244
2245 /**
2246 * Compile a tessellation evaluation shader, and upload the assembly.
2247 */
2248 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2249 iris_compile_tes(struct iris_screen *screen,
2250 struct u_upload_mgr *uploader,
2251 struct util_debug_callback *dbg,
2252 struct iris_uncompiled_shader *ish,
2253 struct iris_compiled_shader *shader)
2254 {
2255 void *mem_ctx = ralloc_context(NULL);
2256 uint32_t *system_values;
2257 const struct intel_device_info *devinfo = screen->devinfo;
2258 unsigned num_system_values;
2259 unsigned num_cbufs;
2260
2261 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2262 const struct iris_tes_prog_key *const key = &shader->key.tes;
2263
2264 if (key->vue.nr_userclip_plane_consts) {
2265 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2266 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2267 true, false, NULL);
2268 nir_lower_io_to_temporaries(nir, impl, true, false);
2269 nir_lower_global_vars_to_local(nir);
2270 nir_lower_vars_to_ssa(nir);
2271 nir_shader_gather_info(nir, impl);
2272 }
2273
2274 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2275 &num_system_values, &num_cbufs);
2276
2277 struct iris_binding_table bt;
2278 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2279 num_system_values, num_cbufs);
2280
2281 const char *error;
2282 const unsigned *program;
2283
2284 if (screen->brw) {
2285 struct brw_tes_prog_data *brw_prog_data =
2286 rzalloc(mem_ctx, struct brw_tes_prog_data);
2287
2288 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2289
2290 struct intel_vue_map input_vue_map;
2291 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2292 key->patch_inputs_read);
2293
2294 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2295
2296 struct brw_compile_tes_params params = {
2297 .base = {
2298 .mem_ctx = mem_ctx,
2299 .nir = nir,
2300 .log_data = dbg,
2301 .source_hash = ish->source_hash,
2302 },
2303 .key = &brw_key,
2304 .prog_data = brw_prog_data,
2305 .input_vue_map = &input_vue_map,
2306 };
2307
2308 program = brw_compile_tes(screen->brw, ¶ms);
2309 error = params.base.error_str;
2310
2311 if (program) {
2312 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2313 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2314 }
2315 } else {
2316 struct elk_tes_prog_data *elk_prog_data =
2317 rzalloc(mem_ctx, struct elk_tes_prog_data);
2318
2319 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2320
2321 struct intel_vue_map input_vue_map;
2322 elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2323 key->patch_inputs_read);
2324
2325 struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2326
2327 struct elk_compile_tes_params params = {
2328 .base = {
2329 .mem_ctx = mem_ctx,
2330 .nir = nir,
2331 .log_data = dbg,
2332 .source_hash = ish->source_hash,
2333 },
2334 .key = &elk_key,
2335 .prog_data = elk_prog_data,
2336 .input_vue_map = &input_vue_map,
2337 };
2338
2339 program = elk_compile_tes(screen->elk, ¶ms);
2340 error = params.base.error_str;
2341
2342 if (program) {
2343 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2344 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2345 }
2346 }
2347
2348 if (program == NULL) {
2349 dbg_printf("Failed to compile evaluation shader: %s\n", error);
2350 ralloc_free(mem_ctx);
2351
2352 shader->compilation_failed = true;
2353 util_queue_fence_signal(&shader->ready);
2354
2355 return;
2356 }
2357
2358 shader->compilation_failed = false;
2359
2360 uint32_t *so_decls =
2361 screen->vtbl.create_so_decl_list(&ish->stream_output,
2362 &iris_vue_data(shader)->vue_map);
2363
2364 iris_finalize_program(shader, so_decls, system_values,
2365 num_system_values, 0, num_cbufs, &bt);
2366
2367 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2368 sizeof(*key), key, program);
2369
2370 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2371
2372 ralloc_free(mem_ctx);
2373 }
2374
2375 /**
2376 * Update the current tessellation evaluation shader variant.
2377 *
2378 * Fill out the key, look in the cache, compile and bind if needed.
2379 */
2380 static void
iris_update_compiled_tes(struct iris_context * ice)2381 iris_update_compiled_tes(struct iris_context *ice)
2382 {
2383 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2384 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2385 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2386 struct iris_uncompiled_shader *ish =
2387 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2388
2389 struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2390 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2391 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2392
2393 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2394 bool added;
2395 struct iris_compiled_shader *shader =
2396 find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2397
2398 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2399 &key, sizeof(key))) {
2400 iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2401 }
2402
2403 if (shader->compilation_failed)
2404 shader = NULL;
2405
2406 if (old != shader) {
2407 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2408 shader);
2409 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2410 IRIS_STAGE_DIRTY_BINDINGS_TES |
2411 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2412 shs->sysvals_need_upload = true;
2413
2414 unsigned urb_entry_size = shader ?
2415 iris_vue_data(shader)->urb_entry_size : 0;
2416 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2417 }
2418
2419 /* TODO: Could compare and avoid flagging this. */
2420 const struct shader_info *tes_info = &ish->nir->info;
2421 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2422 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2423 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2424 }
2425 }
2426
2427 /**
2428 * Compile a geometry shader, and upload the assembly.
2429 */
2430 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2431 iris_compile_gs(struct iris_screen *screen,
2432 struct u_upload_mgr *uploader,
2433 struct util_debug_callback *dbg,
2434 struct iris_uncompiled_shader *ish,
2435 struct iris_compiled_shader *shader)
2436 {
2437 const struct intel_device_info *devinfo = screen->devinfo;
2438 void *mem_ctx = ralloc_context(NULL);
2439 uint32_t *system_values;
2440 unsigned num_system_values;
2441 unsigned num_cbufs;
2442
2443 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2444 const struct iris_gs_prog_key *const key = &shader->key.gs;
2445
2446 if (key->vue.nr_userclip_plane_consts) {
2447 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2448 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2449 false, NULL);
2450 nir_lower_io_to_temporaries(nir, impl, true, false);
2451 nir_lower_global_vars_to_local(nir);
2452 nir_lower_vars_to_ssa(nir);
2453 nir_shader_gather_info(nir, impl);
2454 }
2455
2456 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2457 &num_system_values, &num_cbufs);
2458
2459 struct iris_binding_table bt;
2460 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2461 num_system_values, num_cbufs);
2462
2463 const char *error;
2464 const unsigned *program;
2465 if (screen->brw) {
2466 struct brw_gs_prog_data *brw_prog_data =
2467 rzalloc(mem_ctx, struct brw_gs_prog_data);
2468
2469 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2470
2471 brw_compute_vue_map(devinfo,
2472 &brw_prog_data->base.vue_map, nir->info.outputs_written,
2473 nir->info.separate_shader, /* pos_slots */ 1);
2474
2475 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2476
2477 struct brw_compile_gs_params params = {
2478 .base = {
2479 .mem_ctx = mem_ctx,
2480 .nir = nir,
2481 .log_data = dbg,
2482 .source_hash = ish->source_hash,
2483 },
2484 .key = &brw_key,
2485 .prog_data = brw_prog_data,
2486 };
2487
2488 program = brw_compile_gs(screen->brw, ¶ms);
2489 error = params.base.error_str;
2490 if (program) {
2491 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2492 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2493 }
2494 } else {
2495 struct elk_gs_prog_data *elk_prog_data =
2496 rzalloc(mem_ctx, struct elk_gs_prog_data);
2497
2498 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2499
2500 elk_compute_vue_map(devinfo,
2501 &elk_prog_data->base.vue_map, nir->info.outputs_written,
2502 nir->info.separate_shader, /* pos_slots */ 1);
2503
2504 struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2505
2506 struct elk_compile_gs_params params = {
2507 .base = {
2508 .mem_ctx = mem_ctx,
2509 .nir = nir,
2510 .log_data = dbg,
2511 .source_hash = ish->source_hash,
2512 },
2513 .key = &elk_key,
2514 .prog_data = elk_prog_data,
2515 };
2516
2517 program = elk_compile_gs(screen->elk, ¶ms);
2518 error = params.base.error_str;
2519 if (program) {
2520 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2521 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2522 }
2523 }
2524
2525 if (program == NULL) {
2526 dbg_printf("Failed to compile geometry shader: %s\n", error);
2527 ralloc_free(mem_ctx);
2528
2529 shader->compilation_failed = true;
2530 util_queue_fence_signal(&shader->ready);
2531
2532 return;
2533 }
2534
2535 shader->compilation_failed = false;
2536
2537 uint32_t *so_decls =
2538 screen->vtbl.create_so_decl_list(&ish->stream_output,
2539 &iris_vue_data(shader)->vue_map);
2540
2541 iris_finalize_program(shader, so_decls, system_values,
2542 num_system_values, 0, num_cbufs, &bt);
2543
2544 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2545 sizeof(*key), key, program);
2546
2547 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2548
2549 ralloc_free(mem_ctx);
2550 }
2551
2552 /**
2553 * Update the current geometry shader variant.
2554 *
2555 * Fill out the key, look in the cache, compile and bind if needed.
2556 */
2557 static void
iris_update_compiled_gs(struct iris_context * ice)2558 iris_update_compiled_gs(struct iris_context *ice)
2559 {
2560 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2561 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2562 struct iris_uncompiled_shader *ish =
2563 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2564 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2565 struct iris_compiled_shader *shader = NULL;
2566 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2567
2568 if (ish) {
2569 struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2570 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2571
2572 bool added;
2573
2574 shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2575 sizeof(key), &added);
2576
2577 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2578 &key, sizeof(key))) {
2579 iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2580 }
2581
2582 if (shader->compilation_failed)
2583 shader = NULL;
2584 }
2585
2586 if (old != shader) {
2587 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2588 shader);
2589 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2590 IRIS_STAGE_DIRTY_BINDINGS_GS |
2591 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2592 shs->sysvals_need_upload = true;
2593
2594 unsigned urb_entry_size = shader ?
2595 iris_vue_data(shader)->urb_entry_size : 0;
2596 check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2597 }
2598 }
2599
2600 /**
2601 * Compile a fragment (pixel) shader, and upload the assembly.
2602 */
2603 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2604 iris_compile_fs(struct iris_screen *screen,
2605 struct u_upload_mgr *uploader,
2606 struct util_debug_callback *dbg,
2607 struct iris_uncompiled_shader *ish,
2608 struct iris_compiled_shader *shader,
2609 struct intel_vue_map *vue_map)
2610 {
2611 void *mem_ctx = ralloc_context(NULL);
2612 uint32_t *system_values;
2613 const struct intel_device_info *devinfo = screen->devinfo;
2614 unsigned num_system_values;
2615 unsigned num_cbufs;
2616
2617 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2618 const struct iris_fs_prog_key *const key = &shader->key.fs;
2619
2620 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2621 &num_system_values, &num_cbufs);
2622
2623 /* Lower output variables to load_output intrinsics before setting up
2624 * binding tables, so iris_setup_binding_table can map any load_output
2625 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2626 * non-coherent framebuffer fetches.
2627 */
2628 brw_nir_lower_fs_outputs(nir);
2629
2630 /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
2631 * and do not need a binding table entry with a null surface. Earlier
2632 * generations need an entry for a null surface.
2633 */
2634 int null_rts = devinfo->ver < 11 ? 1 : 0;
2635
2636 struct iris_binding_table bt;
2637 iris_setup_binding_table(devinfo, nir, &bt,
2638 MAX2(key->nr_color_regions, null_rts),
2639 num_system_values, num_cbufs);
2640
2641 const char *error;
2642 const unsigned *program;
2643
2644 if (screen->brw) {
2645 struct brw_wm_prog_data *brw_prog_data =
2646 rzalloc(mem_ctx, struct brw_wm_prog_data);
2647
2648 brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2649
2650 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2651
2652 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2653
2654 struct brw_compile_fs_params params = {
2655 .base = {
2656 .mem_ctx = mem_ctx,
2657 .nir = nir,
2658 .log_data = dbg,
2659 .source_hash = ish->source_hash,
2660 },
2661 .key = &brw_key,
2662 .prog_data = brw_prog_data,
2663
2664 .allow_spilling = true,
2665 .max_polygons = UCHAR_MAX,
2666 .vue_map = vue_map,
2667 };
2668
2669 program = brw_compile_fs(screen->brw, ¶ms);
2670 error = params.base.error_str;
2671 if (program) {
2672 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2673 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2674 }
2675 } else {
2676 struct elk_wm_prog_data *elk_prog_data =
2677 rzalloc(mem_ctx, struct elk_wm_prog_data);
2678
2679 elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2680
2681 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2682
2683 struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2684
2685 struct elk_compile_fs_params params = {
2686 .base = {
2687 .mem_ctx = mem_ctx,
2688 .nir = nir,
2689 .log_data = dbg,
2690 .source_hash = ish->source_hash,
2691 },
2692 .key = &elk_key,
2693 .prog_data = elk_prog_data,
2694
2695 .allow_spilling = true,
2696 .max_polygons = UCHAR_MAX,
2697 .vue_map = vue_map,
2698 };
2699
2700 program = elk_compile_fs(screen->elk, ¶ms);
2701 error = params.base.error_str;
2702 if (program) {
2703 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2704 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2705 }
2706 }
2707
2708 if (program == NULL) {
2709 dbg_printf("Failed to compile fragment shader: %s\n", error);
2710 ralloc_free(mem_ctx);
2711
2712 shader->compilation_failed = true;
2713 util_queue_fence_signal(&shader->ready);
2714
2715 return;
2716 }
2717
2718 shader->compilation_failed = false;
2719
2720 iris_finalize_program(shader, NULL, system_values,
2721 num_system_values, 0, num_cbufs, &bt);
2722
2723 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2724 sizeof(*key), key, program);
2725
2726 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2727
2728 ralloc_free(mem_ctx);
2729 }
2730
2731 /**
2732 * Update the current fragment shader variant.
2733 *
2734 * Fill out the key, look in the cache, compile and bind if needed.
2735 */
2736 static void
iris_update_compiled_fs(struct iris_context * ice)2737 iris_update_compiled_fs(struct iris_context *ice)
2738 {
2739 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2740 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2741 struct iris_uncompiled_shader *ish =
2742 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2743 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2744 struct iris_fs_prog_key key = { KEY_INIT(base) };
2745 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2746
2747 struct intel_vue_map *last_vue_map =
2748 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2749
2750 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2751 key.input_slots_valid = last_vue_map->slots_valid;
2752
2753 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2754 bool added;
2755 struct iris_compiled_shader *shader =
2756 find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2757 sizeof(key), &added);
2758
2759 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2760 &key, sizeof(key))) {
2761 iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2762 }
2763
2764 if (shader->compilation_failed)
2765 shader = NULL;
2766
2767 if (old != shader) {
2768 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2769 // toggles. might be able to avoid flagging SBE too.
2770 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2771 shader);
2772 ice->state.dirty |= IRIS_DIRTY_WM |
2773 IRIS_DIRTY_CLIP |
2774 IRIS_DIRTY_SBE;
2775 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2776 IRIS_STAGE_DIRTY_BINDINGS_FS |
2777 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2778 shs->sysvals_need_upload = true;
2779 }
2780 }
2781
2782 /**
2783 * Update the last enabled stage's VUE map.
2784 *
2785 * When the shader feeding the rasterizer's output interface changes, we
2786 * need to re-emit various packets.
2787 */
2788 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2789 update_last_vue_map(struct iris_context *ice,
2790 struct iris_compiled_shader *shader)
2791 {
2792 const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2793 const struct intel_vue_map *old_map =
2794 !ice->shaders.last_vue_shader ? NULL :
2795 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2796 const uint64_t changed_slots =
2797 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2798
2799 if (changed_slots & VARYING_BIT_VIEWPORT) {
2800 ice->state.num_viewports =
2801 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2802 ice->state.dirty |= IRIS_DIRTY_CLIP |
2803 IRIS_DIRTY_SF_CL_VIEWPORT |
2804 IRIS_DIRTY_CC_VIEWPORT |
2805 IRIS_DIRTY_SCISSOR_RECT;
2806 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2807 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2808 }
2809
2810 if (changed_slots & VARYING_BIT_LAYER) {
2811 ice->state.dirty |= IRIS_DIRTY_CLIP;
2812 }
2813
2814 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2815 ice->state.dirty |= IRIS_DIRTY_SBE;
2816 }
2817
2818 iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2819 }
2820
2821 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2822 iris_update_pull_constant_descriptors(struct iris_context *ice,
2823 gl_shader_stage stage)
2824 {
2825 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2826
2827 if (!shader || !shader->has_ubo_pull)
2828 return;
2829
2830 struct iris_shader_state *shs = &ice->state.shaders[stage];
2831 bool any_new_descriptors =
2832 shader->num_system_values > 0 && shs->sysvals_need_upload;
2833
2834 unsigned bound_cbufs = shs->bound_cbufs;
2835
2836 while (bound_cbufs) {
2837 const int i = u_bit_scan(&bound_cbufs);
2838 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2839 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2840 if (!surf_state->res && cbuf->buffer) {
2841 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2842 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2843 any_new_descriptors = true;
2844 }
2845 }
2846
2847 if (any_new_descriptors)
2848 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2849 }
2850
2851 /**
2852 * Update the current shader variants for the given state.
2853 *
2854 * This should be called on every draw call to ensure that the correct
2855 * shaders are bound. It will also flag any dirty state triggered by
2856 * swapping out those shaders.
2857 */
2858 void
iris_update_compiled_shaders(struct iris_context * ice)2859 iris_update_compiled_shaders(struct iris_context *ice)
2860 {
2861 const uint64_t stage_dirty = ice->state.stage_dirty;
2862
2863 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2864 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2865 struct iris_uncompiled_shader *tes =
2866 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2867 if (tes) {
2868 iris_update_compiled_tcs(ice);
2869 iris_update_compiled_tes(ice);
2870 } else {
2871 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2872 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2873 ice->state.stage_dirty |=
2874 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2875 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2876 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2877
2878 if (ice->shaders.urb.constrained)
2879 ice->state.dirty |= IRIS_DIRTY_URB;
2880 }
2881 }
2882
2883 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2884 iris_update_compiled_vs(ice);
2885 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2886 iris_update_compiled_gs(ice);
2887
2888 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2889 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2890 const struct iris_compiled_shader *gs =
2891 ice->shaders.prog[MESA_SHADER_GEOMETRY];
2892 const struct iris_compiled_shader *tes =
2893 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2894
2895 bool points_or_lines = false;
2896
2897 if (gs) {
2898 const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2899 points_or_lines =
2900 gs_data->output_topology == _3DPRIM_POINTLIST ||
2901 gs_data->output_topology == _3DPRIM_LINESTRIP;
2902 } else if (tes) {
2903 const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2904 points_or_lines =
2905 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2906 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2907 }
2908
2909 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2910 /* Outbound to XY Clip enables */
2911 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2912 ice->state.dirty |= IRIS_DIRTY_CLIP;
2913 }
2914 }
2915
2916 gl_shader_stage last_stage = last_vue_stage(ice);
2917 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2918 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2919 update_last_vue_map(ice, shader);
2920 if (ice->state.streamout != shader->streamout) {
2921 ice->state.streamout = shader->streamout;
2922 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2923 }
2924
2925 if (ice->state.streamout_active) {
2926 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2927 struct iris_stream_output_target *so =
2928 (void *) ice->state.so_target[i];
2929 if (so)
2930 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2931 }
2932 }
2933
2934 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2935 iris_update_compiled_fs(ice);
2936
2937 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2938 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2939 iris_update_pull_constant_descriptors(ice, i);
2940 }
2941 }
2942
2943 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2944 iris_compile_cs(struct iris_screen *screen,
2945 struct u_upload_mgr *uploader,
2946 struct util_debug_callback *dbg,
2947 struct iris_uncompiled_shader *ish,
2948 struct iris_compiled_shader *shader)
2949 {
2950 void *mem_ctx = ralloc_context(NULL);
2951 uint32_t *system_values;
2952 const struct intel_device_info *devinfo = screen->devinfo;
2953 unsigned num_system_values;
2954 unsigned num_cbufs;
2955
2956 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2957 const struct iris_cs_prog_key *const key = &shader->key.cs;
2958
2959 if (screen->brw)
2960 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2961 else
2962 NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2963
2964 iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2965 &system_values, &num_system_values, &num_cbufs);
2966
2967 struct iris_binding_table bt;
2968 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2969 num_system_values, num_cbufs);
2970
2971 const char *error;
2972 const unsigned *program;
2973
2974 if (screen->brw) {
2975 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2976
2977 struct brw_cs_prog_data *brw_prog_data =
2978 rzalloc(mem_ctx, struct brw_cs_prog_data);
2979
2980 struct brw_compile_cs_params params = {
2981 .base = {
2982 .mem_ctx = mem_ctx,
2983 .nir = nir,
2984 .log_data = dbg,
2985 .source_hash = ish->source_hash,
2986 },
2987 .key = &brw_key,
2988 .prog_data = brw_prog_data,
2989 };
2990
2991 program = brw_compile_cs(screen->brw, ¶ms);
2992 error = params.base.error_str;
2993 if (program) {
2994 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2995 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2996 }
2997 } else {
2998 struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
2999
3000 struct elk_cs_prog_data *elk_prog_data =
3001 rzalloc(mem_ctx, struct elk_cs_prog_data);
3002
3003 struct elk_compile_cs_params params = {
3004 .base = {
3005 .mem_ctx = mem_ctx,
3006 .nir = nir,
3007 .log_data = dbg,
3008 .source_hash = ish->source_hash,
3009 },
3010 .key = &elk_key,
3011 .prog_data = elk_prog_data,
3012 };
3013
3014 program = elk_compile_cs(screen->elk, ¶ms);
3015 error = params.base.error_str;
3016 if (program) {
3017 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3018 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3019 }
3020 }
3021
3022 if (program == NULL) {
3023 dbg_printf("Failed to compile compute shader: %s\n", error);
3024
3025 shader->compilation_failed = true;
3026 util_queue_fence_signal(&shader->ready);
3027
3028 return;
3029 }
3030
3031 shader->compilation_failed = false;
3032
3033 iris_finalize_program(shader, NULL, system_values,
3034 num_system_values, ish->kernel_input_size, num_cbufs,
3035 &bt);
3036
3037 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3038 sizeof(*key), key, program);
3039
3040 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3041
3042 ralloc_free(mem_ctx);
3043 }
3044
3045 static void
iris_update_compiled_cs(struct iris_context * ice)3046 iris_update_compiled_cs(struct iris_context *ice)
3047 {
3048 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3049 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3050 struct iris_uncompiled_shader *ish =
3051 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3052 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3053 struct iris_cs_prog_key key = { KEY_INIT(base) };
3054 screen->vtbl.populate_cs_key(ice, &key);
3055
3056 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3057 bool added;
3058 struct iris_compiled_shader *shader =
3059 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3060 sizeof(key), &added);
3061
3062 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3063 &key, sizeof(key))) {
3064 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3065 }
3066
3067 if (shader->compilation_failed)
3068 shader = NULL;
3069
3070 if (old != shader) {
3071 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3072 shader);
3073 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3074 IRIS_STAGE_DIRTY_BINDINGS_CS |
3075 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3076 shs->sysvals_need_upload = true;
3077 }
3078 }
3079
3080 void
iris_update_compiled_compute_shader(struct iris_context * ice)3081 iris_update_compiled_compute_shader(struct iris_context *ice)
3082 {
3083 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3084 iris_update_compiled_cs(ice);
3085
3086 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3087 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3088 }
3089
3090 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3091 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3092 struct iris_compiled_shader *shader,
3093 unsigned threads,
3094 uint32_t *dst)
3095 {
3096 struct iris_cs_data *cs_data = iris_cs_data(shader);
3097 assert(iris_cs_push_const_total_size(shader, threads) > 0);
3098 assert(cs_data->push.cross_thread.size == 0);
3099 assert(cs_data->push.per_thread.dwords == 1);
3100 assert(cs_data->first_param_is_builtin_subgroup_id);
3101 for (unsigned t = 0; t < threads; t++)
3102 dst[8 * t] = t;
3103 }
3104
3105 /**
3106 * Allocate scratch BOs as needed for the given per-thread size and stage.
3107 */
3108 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3109 iris_get_scratch_space(struct iris_context *ice,
3110 unsigned per_thread_scratch,
3111 gl_shader_stage stage)
3112 {
3113 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3114 struct iris_bufmgr *bufmgr = screen->bufmgr;
3115 const struct intel_device_info *devinfo = screen->devinfo;
3116
3117 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3118 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3119 assert(per_thread_scratch == 1 << (encoded_size + 10));
3120
3121 /* On GFX version 12.5, scratch access changed to a surface-based model.
3122 * Instead of each shader type having its own layout based on IDs passed
3123 * from the relevant fixed-function unit, all scratch access is based on
3124 * thread IDs like it always has been for compute.
3125 */
3126 if (devinfo->verx10 >= 125)
3127 stage = MESA_SHADER_COMPUTE;
3128
3129 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3130
3131 if (!*bop) {
3132 assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3133 uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3134 *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3135 IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3136 }
3137
3138 return *bop;
3139 }
3140
3141 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3142 iris_get_scratch_surf(struct iris_context *ice,
3143 unsigned per_thread_scratch)
3144 {
3145 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3146 ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3147
3148 assert(devinfo->verx10 >= 125);
3149
3150 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3151 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3152 assert(per_thread_scratch == 1 << (encoded_size + 10));
3153
3154 struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3155
3156 if (ref->res)
3157 return ref;
3158
3159 struct iris_bo *scratch_bo =
3160 iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3161
3162 void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3163 screen->isl_dev.ss.size, 64);
3164
3165 isl_buffer_fill_state(&screen->isl_dev, map,
3166 .address = scratch_bo->address,
3167 .size_B = scratch_bo->size,
3168 .format = ISL_FORMAT_RAW,
3169 .swizzle = ISL_SWIZZLE_IDENTITY,
3170 .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3171 .stride_B = per_thread_scratch,
3172 .is_scratch = true);
3173
3174 return ref;
3175 }
3176
3177 /* ------------------------------------------------------------------- */
3178
3179 /**
3180 * The pipe->create_[stage]_state() driver hooks.
3181 *
3182 * Performs basic NIR preprocessing, records any state dependencies, and
3183 * returns an iris_uncompiled_shader as the Gallium CSO.
3184 *
3185 * Actual shader compilation to assembly happens later, at first use.
3186 */
3187 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3188 iris_create_uncompiled_shader(struct iris_screen *screen,
3189 nir_shader *nir,
3190 const struct pipe_stream_output_info *so_info)
3191 {
3192 struct iris_uncompiled_shader *ish =
3193 calloc(1, sizeof(struct iris_uncompiled_shader));
3194 if (!ish)
3195 return NULL;
3196
3197 pipe_reference_init(&ish->ref, 1);
3198 list_inithead(&ish->variants);
3199 simple_mtx_init(&ish->lock, mtx_plain);
3200 util_queue_fence_init(&ish->ready);
3201
3202 ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3203
3204 ish->program_id = get_new_program_id(screen);
3205 ish->nir = nir;
3206 if (so_info) {
3207 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3208 update_so_info(&ish->stream_output, nir->info.outputs_written);
3209 }
3210
3211 /* Use lowest dword of source shader blake3 for shader hash. */
3212 ish->source_hash = *(uint32_t*)nir->info.source_blake3;
3213
3214 if (screen->disk_cache) {
3215 /* Serialize the NIR to a binary blob that we can hash for the disk
3216 * cache. Drop unnecessary information (like variable names)
3217 * so the serialized NIR is smaller, and also to let us detect more
3218 * isomorphic shaders when hashing, increasing cache hits.
3219 */
3220 struct blob blob;
3221 blob_init(&blob);
3222 nir_serialize(&blob, nir, true);
3223 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3224 blob_finish(&blob);
3225 }
3226
3227 return ish;
3228 }
3229
3230 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3231 iris_create_compute_state(struct pipe_context *ctx,
3232 const struct pipe_compute_state *state)
3233 {
3234 struct iris_context *ice = (void *) ctx;
3235 struct iris_screen *screen = (void *) ctx->screen;
3236 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3237 const nir_shader_compiler_options *options =
3238 screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
3239 : screen->elk->nir_options[MESA_SHADER_COMPUTE];
3240
3241 nir_shader *nir;
3242 switch (state->ir_type) {
3243 case PIPE_SHADER_IR_NIR:
3244 nir = (void *)state->prog;
3245 break;
3246
3247 case PIPE_SHADER_IR_NIR_SERIALIZED: {
3248 struct blob_reader reader;
3249 const struct pipe_binary_program_header *hdr = state->prog;
3250 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
3251 nir = nir_deserialize(NULL, options, &reader);
3252 break;
3253 }
3254
3255 default:
3256 unreachable("Unsupported IR");
3257 }
3258
3259 /* Most of iris doesn't really care about the difference between compute
3260 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so
3261 * it's way easier if we just normalize to COMPUTE here.
3262 */
3263 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3264 nir->info.stage == MESA_SHADER_KERNEL);
3265 nir->info.stage = MESA_SHADER_COMPUTE;
3266
3267 struct iris_uncompiled_shader *ish =
3268 iris_create_uncompiled_shader(screen, nir, NULL);
3269 ish->kernel_input_size = state->req_input_mem;
3270 ish->kernel_shared_size = state->static_shared_mem;
3271
3272 // XXX: disallow more than 64KB of shared variables
3273
3274 if (screen->precompile) {
3275 struct iris_cs_prog_key key = { KEY_INIT(base) };
3276
3277 struct iris_compiled_shader *shader =
3278 iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3279 IRIS_CACHE_CS, sizeof(key), &key);
3280
3281 /* Append our new variant to the shader's variant list. */
3282 list_addtail(&shader->link, &ish->variants);
3283
3284 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3285 &key, sizeof(key))) {
3286 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3287 }
3288 }
3289
3290 return ish;
3291 }
3292
3293 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3294 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3295 struct pipe_compute_state_object_info *info)
3296 {
3297 struct iris_screen *screen = (void *) ctx->screen;
3298 struct iris_uncompiled_shader *ish = state;
3299
3300 info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3301 info->private_memory = 0;
3302 info->preferred_simd_size = 32;
3303 info->simd_sizes = 8 | 16 | 32;
3304
3305 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3306 &ish->variants, link) {
3307 info->private_memory = MAX2(info->private_memory,
3308 shader->total_scratch);
3309 }
3310 }
3311
3312 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3313 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3314 const uint32_t block[3])
3315 {
3316 struct iris_context *ice = (void *) ctx;
3317 struct iris_screen *screen = (void *) ctx->screen;
3318 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3319 struct iris_uncompiled_shader *ish = state;
3320
3321 struct iris_cs_prog_key key = { KEY_INIT(base) };
3322 screen->vtbl.populate_cs_key(ice, &key);
3323
3324 bool added;
3325 struct iris_compiled_shader *shader =
3326 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3327 sizeof(key), &added);
3328
3329 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3330 &key, sizeof(key))) {
3331 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3332 }
3333
3334 return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3335 }
3336
3337 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3338 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3339 {
3340 const struct iris_threaded_compile_job *job =
3341 (struct iris_threaded_compile_job *) _job;
3342
3343 struct iris_screen *screen = job->screen;
3344 struct u_upload_mgr *uploader = job->uploader;
3345 struct util_debug_callback *dbg = job->dbg;
3346 struct iris_uncompiled_shader *ish = job->ish;
3347 struct iris_compiled_shader *shader = job->shader;
3348
3349 switch (ish->nir->info.stage) {
3350 case MESA_SHADER_VERTEX:
3351 iris_compile_vs(screen, uploader, dbg, ish, shader);
3352 break;
3353 case MESA_SHADER_TESS_CTRL:
3354 iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3355 break;
3356 case MESA_SHADER_TESS_EVAL:
3357 iris_compile_tes(screen, uploader, dbg, ish, shader);
3358 break;
3359 case MESA_SHADER_GEOMETRY:
3360 iris_compile_gs(screen, uploader, dbg, ish, shader);
3361 break;
3362 case MESA_SHADER_FRAGMENT:
3363 iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3364 break;
3365
3366 default:
3367 unreachable("Invalid shader stage.");
3368 }
3369 }
3370
3371 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3372 iris_create_shader_state(struct pipe_context *ctx,
3373 const struct pipe_shader_state *state)
3374 {
3375 struct iris_context *ice = (void *) ctx;
3376 struct iris_screen *screen = (void *) ctx->screen;
3377 struct nir_shader *nir;
3378
3379 if (state->type == PIPE_SHADER_IR_TGSI)
3380 nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3381 else
3382 nir = state->ir.nir;
3383
3384 const struct shader_info *const info = &nir->info;
3385 struct iris_uncompiled_shader *ish =
3386 iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3387
3388 union iris_any_prog_key key;
3389 unsigned key_size = 0;
3390
3391 memset(&key, 0, sizeof(key));
3392
3393 switch (info->stage) {
3394 case MESA_SHADER_VERTEX:
3395 /* User clip planes */
3396 if (info->clip_distance_array_size == 0)
3397 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3398
3399 key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3400 key_size = sizeof(key.vs);
3401 break;
3402
3403 case MESA_SHADER_TESS_CTRL: {
3404 key.tcs = (struct iris_tcs_prog_key) {
3405 KEY_INIT(vue.base),
3406 // XXX: make sure the linker fills this out from the TES...
3407 ._tes_primitive_mode =
3408 info->tess._primitive_mode ? info->tess._primitive_mode
3409 : TESS_PRIMITIVE_TRIANGLES,
3410 .outputs_written = info->outputs_written,
3411 .patch_outputs_written = info->patch_outputs_written,
3412 };
3413
3414 /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3415 * We don't have that information, so we randomly guess that the input
3416 * and output patches are the same size. This is a bad guess, but we
3417 * can't do much better.
3418 */
3419 if (iris_use_tcs_multi_patch(screen))
3420 key.tcs.input_vertices = info->tess.tcs_vertices_out;
3421
3422 key_size = sizeof(key.tcs);
3423 break;
3424 }
3425
3426 case MESA_SHADER_TESS_EVAL:
3427 /* User clip planes */
3428 if (info->clip_distance_array_size == 0)
3429 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3430
3431 key.tes = (struct iris_tes_prog_key) {
3432 KEY_INIT(vue.base),
3433 // XXX: not ideal, need TCS output/TES input unification
3434 .inputs_read = info->inputs_read,
3435 .patch_inputs_read = info->patch_inputs_read,
3436 };
3437
3438 key_size = sizeof(key.tes);
3439 break;
3440
3441 case MESA_SHADER_GEOMETRY:
3442 /* User clip planes */
3443 if (info->clip_distance_array_size == 0)
3444 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3445
3446 key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3447 key_size = sizeof(key.gs);
3448 break;
3449
3450 case MESA_SHADER_FRAGMENT:
3451 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3452 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3453 (1ull << IRIS_NOS_RASTERIZER) |
3454 (1ull << IRIS_NOS_BLEND);
3455
3456 STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3457
3458 /* The program key needs the VUE map if there are > 16 inputs */
3459 if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3460 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3461 }
3462
3463 const uint64_t color_outputs = info->outputs_written &
3464 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3465 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3466 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3467
3468 bool can_rearrange_varyings =
3469 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3470
3471 const struct intel_device_info *devinfo = screen->devinfo;
3472
3473 key.fs = (struct iris_fs_prog_key) {
3474 KEY_INIT(base),
3475 .nr_color_regions = util_bitcount(color_outputs),
3476 .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
3477 .input_slots_valid =
3478 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3479 };
3480
3481 key_size = sizeof(key.fs);
3482 break;
3483
3484 default:
3485 unreachable("Invalid shader stage.");
3486 }
3487
3488 if (screen->precompile) {
3489 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3490
3491 struct iris_compiled_shader *shader =
3492 iris_create_shader_variant(screen, NULL, info->stage,
3493 (enum iris_program_cache_id) info->stage,
3494 key_size, &key);
3495
3496 /* Append our new variant to the shader's variant list. */
3497 list_addtail(&shader->link, &ish->variants);
3498
3499 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3500 &key, key_size)) {
3501 assert(!util_queue_fence_is_signalled(&shader->ready));
3502
3503 struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3504
3505 job->screen = screen;
3506 job->uploader = uploader;
3507 job->ish = ish;
3508 job->shader = shader;
3509
3510 iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3511 iris_compile_shader);
3512 }
3513 }
3514
3515 return ish;
3516 }
3517
3518 /**
3519 * Called when the refcount on the iris_uncompiled_shader reaches 0.
3520 *
3521 * Frees the iris_uncompiled_shader.
3522 *
3523 * \sa iris_delete_shader_state
3524 */
3525 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3526 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3527 {
3528 struct iris_uncompiled_shader *ish = state;
3529
3530 /* No need to take ish->lock; we hold the last reference to ish */
3531 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3532 &ish->variants, link) {
3533 list_del(&shader->link);
3534
3535 iris_shader_variant_reference(&shader, NULL);
3536 }
3537
3538 simple_mtx_destroy(&ish->lock);
3539 util_queue_fence_destroy(&ish->ready);
3540
3541 ralloc_free(ish->nir);
3542 free(ish);
3543 }
3544
3545 /**
3546 * The pipe->delete_[stage]_state() driver hooks.
3547 *
3548 * \sa iris_destroy_shader_state
3549 */
3550 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3551 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3552 {
3553 struct iris_uncompiled_shader *ish = state;
3554 struct iris_context *ice = (void *) ctx;
3555
3556 const gl_shader_stage stage = ish->nir->info.stage;
3557
3558 if (ice->shaders.uncompiled[stage] == ish) {
3559 ice->shaders.uncompiled[stage] = NULL;
3560 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3561 }
3562
3563 if (pipe_reference(&ish->ref, NULL))
3564 iris_destroy_shader_state(ctx, state);
3565 }
3566
3567 /**
3568 * The pipe->bind_[stage]_state() driver hook.
3569 *
3570 * Binds an uncompiled shader as the current one for a particular stage.
3571 * Updates dirty tracking to account for the shader's NOS.
3572 */
3573 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3574 bind_shader_state(struct iris_context *ice,
3575 struct iris_uncompiled_shader *ish,
3576 gl_shader_stage stage)
3577 {
3578 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3579 const uint64_t nos = ish ? ish->nos : 0;
3580
3581 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3582 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3583
3584 if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3585 (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3586 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3587 }
3588
3589 ice->shaders.uncompiled[stage] = ish;
3590 ice->state.stage_dirty |= stage_dirty_bit;
3591
3592 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3593 * (or that they no longer need to do so).
3594 */
3595 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3596 if (nos & (1 << i))
3597 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3598 else
3599 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3600 }
3601 }
3602
3603 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3604 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3605 {
3606 struct iris_context *ice = (struct iris_context *)ctx;
3607 struct iris_uncompiled_shader *ish = state;
3608
3609 if (ish) {
3610 const struct shader_info *info = &ish->nir->info;
3611 if (ice->state.window_space_position != info->vs.window_space_position) {
3612 ice->state.window_space_position = info->vs.window_space_position;
3613
3614 ice->state.dirty |= IRIS_DIRTY_CLIP |
3615 IRIS_DIRTY_RASTER |
3616 IRIS_DIRTY_CC_VIEWPORT;
3617 }
3618
3619 const bool uses_draw_params =
3620 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3621 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3622 const bool uses_derived_draw_params =
3623 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3624 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3625 const bool needs_sgvs_element = uses_draw_params ||
3626 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3627 BITSET_TEST(info->system_values_read,
3628 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3629
3630 if (ice->state.vs_uses_draw_params != uses_draw_params ||
3631 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3632 ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3633 ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3634 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3635 IRIS_DIRTY_VERTEX_ELEMENTS;
3636 }
3637
3638 ice->state.vs_uses_draw_params = uses_draw_params;
3639 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3640 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3641 ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3642 }
3643
3644 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3645 }
3646
3647 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3648 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3649 {
3650 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3651 }
3652
3653 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3654 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3655 {
3656 struct iris_context *ice = (struct iris_context *)ctx;
3657 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3658 const struct intel_device_info *devinfo = screen->devinfo;
3659
3660 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3661 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3662 ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3663 IRIS_DIRTY_VFG : 0);
3664
3665 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3666 }
3667
3668 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3669 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3670 {
3671 struct iris_context *ice = (struct iris_context *)ctx;
3672
3673 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3674 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3675 ice->state.dirty |= IRIS_DIRTY_URB;
3676
3677 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3678 }
3679
3680 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3681 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3682 {
3683 struct iris_context *ice = (struct iris_context *) ctx;
3684 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3685 const struct intel_device_info *devinfo = screen->devinfo;
3686 struct iris_uncompiled_shader *old_ish =
3687 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3688 struct iris_uncompiled_shader *new_ish = state;
3689
3690 const unsigned color_bits =
3691 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3692 BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3693
3694 /* Fragment shader outputs influence HasWriteableRT */
3695 if (!old_ish || !new_ish ||
3696 (old_ish->nir->info.outputs_written & color_bits) !=
3697 (new_ish->nir->info.outputs_written & color_bits))
3698 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3699
3700 if (devinfo->ver == 8)
3701 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3702
3703 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3704 }
3705
3706 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3707 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3708 {
3709 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3710 }
3711
3712 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)3713 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
3714 {
3715 struct iris_screen *screen = (struct iris_screen *)_screen;
3716 struct nir_shader *nir = (struct nir_shader *) nirptr;
3717 const struct intel_device_info *devinfo = screen->devinfo;
3718
3719 NIR_PASS_V(nir, iris_fix_edge_flags);
3720
3721 if (screen->brw) {
3722 struct brw_nir_compiler_opts opts = {};
3723 brw_preprocess_nir(screen->brw, nir, &opts);
3724
3725 NIR_PASS_V(nir, brw_nir_lower_storage_image,
3726 &(struct brw_nir_lower_storage_image_opts) {
3727 .devinfo = devinfo,
3728 .lower_loads = true,
3729 .lower_stores = true,
3730 });
3731 } else {
3732 assert(screen->elk);
3733
3734 struct elk_nir_compiler_opts opts = {};
3735 elk_preprocess_nir(screen->elk, nir, &opts);
3736
3737 NIR_PASS_V(nir, elk_nir_lower_storage_image,
3738 &(struct elk_nir_lower_storage_image_opts) {
3739 .devinfo = devinfo,
3740 .lower_loads = true,
3741 .lower_stores = true,
3742
3743 /* Iris uploads image params used by
3744 * get_size lowering only in Gfx8.
3745 */
3746 .lower_get_size = true,
3747 });
3748 }
3749
3750 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3751
3752 nir_sweep(nir);
3753
3754 return NULL;
3755 }
3756
3757 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3758 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3759 unsigned max_threads)
3760 {
3761 struct iris_screen *screen = (struct iris_screen *) pscreen;
3762 util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3763 false);
3764 }
3765
3766 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3767 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3768 void *v_shader,
3769 enum pipe_shader_type p_stage)
3770 {
3771 struct iris_screen *screen = (struct iris_screen *) pscreen;
3772
3773 /* Threaded compilation is only used for the precompile. If precompile is
3774 * disabled, threaded compilation is "done."
3775 */
3776 if (!screen->precompile)
3777 return true;
3778
3779 struct iris_uncompiled_shader *ish = v_shader;
3780
3781 /* When precompile is enabled, the first entry is the precompile variant.
3782 * Check the ready fence of the precompile variant.
3783 */
3784 struct iris_compiled_shader *first =
3785 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3786
3787 return util_queue_fence_is_signalled(&first->ready);
3788 }
3789
3790 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3791 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3792 {
3793 pscreen->is_parallel_shader_compilation_finished =
3794 iris_is_parallel_shader_compilation_finished;
3795 pscreen->set_max_shader_compiler_threads =
3796 iris_set_max_shader_compiler_threads;
3797 pscreen->finalize_nir = iris_finalize_nir;
3798 }
3799
3800 void
iris_init_program_functions(struct pipe_context * ctx)3801 iris_init_program_functions(struct pipe_context *ctx)
3802 {
3803 ctx->create_vs_state = iris_create_shader_state;
3804 ctx->create_tcs_state = iris_create_shader_state;
3805 ctx->create_tes_state = iris_create_shader_state;
3806 ctx->create_gs_state = iris_create_shader_state;
3807 ctx->create_fs_state = iris_create_shader_state;
3808 ctx->create_compute_state = iris_create_compute_state;
3809
3810 ctx->delete_vs_state = iris_delete_shader_state;
3811 ctx->delete_tcs_state = iris_delete_shader_state;
3812 ctx->delete_tes_state = iris_delete_shader_state;
3813 ctx->delete_gs_state = iris_delete_shader_state;
3814 ctx->delete_fs_state = iris_delete_shader_state;
3815 ctx->delete_compute_state = iris_delete_shader_state;
3816
3817 ctx->bind_vs_state = iris_bind_vs_state;
3818 ctx->bind_tcs_state = iris_bind_tcs_state;
3819 ctx->bind_tes_state = iris_bind_tes_state;
3820 ctx->bind_gs_state = iris_bind_gs_state;
3821 ctx->bind_fs_state = iris_bind_fs_state;
3822 ctx->bind_compute_state = iris_bind_cs_state;
3823
3824 ctx->get_compute_state_info = iris_get_compute_state_info;
3825 ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3826 }
3827
3828 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3829 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3830 const struct iris_compiled_shader *shader,
3831 const uint32_t block[3])
3832 {
3833 if (shader->brw_prog_data) {
3834 return brw_cs_get_dispatch_info(devinfo,
3835 brw_cs_prog_data(shader->brw_prog_data),
3836 block);
3837 } else {
3838 assert(shader->elk_prog_data);
3839 return elk_cs_get_dispatch_info(devinfo,
3840 elk_cs_prog_data(shader->elk_prog_data),
3841 block);
3842 }
3843 }
3844
3845 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3846 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3847 unsigned threads)
3848 {
3849 if (shader->brw_prog_data) {
3850 return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3851 threads);
3852 } else {
3853 assert(shader->elk_prog_data);
3854 return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3855 threads);
3856 }
3857 }
3858
3859 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3860 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3861 enum intel_msaa_flags pushed_msaa_flags)
3862 {
3863 if (shader->brw_prog_data) {
3864 return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3865 pushed_msaa_flags);
3866 } else {
3867 assert(shader->elk_prog_data);
3868 return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3869 pushed_msaa_flags);
3870 }
3871 }
3872
3873 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3874 iris_use_tcs_multi_patch(struct iris_screen *screen)
3875 {
3876 return screen->brw && screen->brw->use_tcs_multi_patch;
3877 }
3878
3879 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3880 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3881 {
3882 if (screen->brw) {
3883 return screen->brw->indirect_ubos_use_sampler;
3884 } else {
3885 assert(screen->elk);
3886 return screen->elk->indirect_ubos_use_sampler;
3887 }
3888 }
3889
3890 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3891 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3892 {
3893 struct util_debug_callback *dbg = data;
3894 va_list args;
3895
3896 if (!dbg->debug_message)
3897 return;
3898
3899 va_start(args, fmt);
3900 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3901 va_end(args);
3902 }
3903
3904 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3905 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3906 {
3907 struct util_debug_callback *dbg = data;
3908 va_list args;
3909 va_start(args, fmt);
3910
3911 if (INTEL_DEBUG(DEBUG_PERF)) {
3912 va_list args_copy;
3913 va_copy(args_copy, args);
3914 vfprintf(stderr, fmt, args_copy);
3915 va_end(args_copy);
3916 }
3917
3918 if (dbg->debug_message) {
3919 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3920 }
3921
3922 va_end(args);
3923 }
3924
3925 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3926 iris_get_compiler_options(struct pipe_screen *pscreen,
3927 enum pipe_shader_ir ir,
3928 enum pipe_shader_type pstage)
3929 {
3930 struct iris_screen *screen = (struct iris_screen *) pscreen;
3931 gl_shader_stage stage = stage_from_pipe(pstage);
3932 assert(ir == PIPE_SHADER_IR_NIR);
3933
3934 return screen->brw ? screen->brw->nir_options[stage]
3935 : screen->elk->nir_options[stage];
3936 }
3937
3938 void
iris_compiler_init(struct iris_screen * screen)3939 iris_compiler_init(struct iris_screen *screen)
3940 {
3941 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3942 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3943 STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3944
3945 if (screen->devinfo->ver >= 9) {
3946 screen->brw = brw_compiler_create(screen, screen->devinfo);
3947 screen->brw->shader_debug_log = iris_shader_debug_log;
3948 screen->brw->shader_perf_log = iris_shader_perf_log;
3949 } else {
3950 screen->elk = elk_compiler_create(screen, screen->devinfo);
3951 screen->elk->shader_debug_log = iris_shader_debug_log;
3952 screen->elk->shader_perf_log = iris_shader_perf_log;
3953 screen->elk->supports_shader_constants = true;
3954 }
3955 }
3956