xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_program.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55 
56 #define KEY_INIT(prefix)                                                   \
57    .prefix.program_string_id = ish->program_id,                            \
58    .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
60    .base.program_string_id = prog_id,                      \
61    .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input)       \
63    .base.program_string_id = prog_id,                      \
64    .base.limit_trig_input_range = limit_trig_input
65 
66 struct iris_threaded_compile_job {
67    struct iris_screen *screen;
68    struct u_upload_mgr *uploader;
69    struct util_debug_callback *dbg;
70    struct iris_uncompiled_shader *ish;
71    struct iris_compiled_shader *shader;
72 };
73 
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77    return p_atomic_inc_return(&screen->program_id);
78 }
79 
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82                             const struct brw_wm_prog_data *brw)
83 {
84    assert(shader->stage == MESA_SHADER_FRAGMENT);
85    struct iris_fs_data *iris = &shader->fs;
86 
87    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89    memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90    memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91    iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92 
93    iris->num_varying_inputs   = brw->num_varying_inputs;
94    iris->msaa_flags_param     = brw->msaa_flags_param;
95    iris->flat_inputs          = brw->flat_inputs;
96    iris->inputs               = brw->inputs;
97    iris->computed_depth_mode  = brw->computed_depth_mode;
98    iris->max_polygons         = brw->max_polygons;
99    iris->dispatch_multi       = brw->dispatch_multi;
100    iris->computed_stencil     = brw->computed_stencil;
101    iris->early_fragment_tests = brw->early_fragment_tests;
102    iris->post_depth_coverage  = brw->post_depth_coverage;
103    iris->inner_coverage       = brw->inner_coverage;
104    iris->dispatch_8           = brw->dispatch_8;
105    iris->dispatch_16          = brw->dispatch_16;
106    iris->dispatch_32          = brw->dispatch_32;
107    iris->dual_src_blend       = brw->dual_src_blend;
108    iris->uses_pos_offset      = brw->uses_pos_offset;
109    iris->uses_omask           = brw->uses_omask;
110    iris->uses_kill            = brw->uses_kill;
111    iris->uses_src_depth       = brw->uses_src_depth;
112    iris->uses_src_w           = brw->uses_src_w;
113    iris->uses_sample_mask     = brw->uses_sample_mask;
114    iris->uses_vmask           = brw->uses_vmask;
115    iris->has_side_effects     = brw->has_side_effects;
116    iris->pulls_bary           = brw->pulls_bary;
117 
118    iris->uses_sample_offsets        = brw->uses_sample_offsets;
119    iris->uses_npc_bary_coefficients = brw->uses_npc_bary_coefficients;
120    iris->uses_pc_bary_coefficients  = brw->uses_pc_bary_coefficients;
121    iris->uses_depth_w_coefficients  = brw->uses_depth_w_coefficients;
122 
123    iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
124 
125    iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
126 }
127 
128 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)129 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
130                             const struct brw_cs_prog_data *brw)
131 {
132    assert(shader->stage == MESA_SHADER_COMPUTE);
133    struct iris_cs_data *iris = &shader->cs;
134 
135    iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
136    iris->push.cross_thread.regs   = brw->push.cross_thread.regs;
137    iris->push.cross_thread.size   = brw->push.cross_thread.size;
138 
139    iris->push.per_thread.dwords = brw->push.per_thread.dwords;
140    iris->push.per_thread.regs   = brw->push.per_thread.regs;
141    iris->push.per_thread.size   = brw->push.per_thread.size;
142 
143    iris->local_size[0]  = brw->local_size[0];
144    iris->local_size[1]  = brw->local_size[1];
145    iris->local_size[2]  = brw->local_size[2];
146    iris->prog_offset[0] = brw->prog_offset[0];
147    iris->prog_offset[1] = brw->prog_offset[1];
148    iris->prog_offset[2] = brw->prog_offset[2];
149 
150    iris->generate_local_id = brw->generate_local_id;
151    iris->walk_order        = brw->walk_order;
152    iris->uses_barrier      = brw->uses_barrier;
153    iris->prog_mask         = brw->prog_mask;
154 
155    iris->first_param_is_builtin_subgroup_id =
156       brw->base.nr_params > 0 &&
157       brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
158 }
159 
160 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)161 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
162                              struct iris_vue_data *iris)
163 {
164    memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
165 
166    iris->urb_read_length     = brw->urb_read_length;
167    iris->cull_distance_mask  = brw->cull_distance_mask;
168    iris->urb_entry_size      = brw->urb_entry_size;
169    iris->dispatch_mode       = brw->dispatch_mode;
170    iris->include_vue_handles = brw->include_vue_handles;
171 }
172 
173 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)174 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
175                             const struct brw_vs_prog_data *brw)
176 {
177    assert(shader->stage == MESA_SHADER_VERTEX);
178    struct iris_vs_data *iris = &shader->vs;
179 
180    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
181 
182    iris->uses_vertexid     = brw->uses_vertexid;
183    iris->uses_instanceid   = brw->uses_instanceid;
184    iris->uses_firstvertex  = brw->uses_firstvertex;
185    iris->uses_baseinstance = brw->uses_baseinstance;
186    iris->uses_drawid       = brw->uses_drawid;
187 }
188 
189 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)190 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
191                              const struct brw_tcs_prog_data *brw)
192 {
193    assert(shader->stage == MESA_SHADER_TESS_CTRL);
194    struct iris_tcs_data *iris = &shader->tcs;
195 
196    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
197 
198    iris->instances             = brw->instances;
199    iris->patch_count_threshold = brw->patch_count_threshold;
200    iris->include_primitive_id  = brw->include_primitive_id;
201 }
202 
203 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)204 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
205                              const struct brw_tes_prog_data *brw)
206 {
207    assert(shader->stage == MESA_SHADER_TESS_EVAL);
208    struct iris_tes_data *iris = &shader->tes;
209 
210    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
211 
212    iris->partitioning         = brw->partitioning;
213    iris->output_topology      = brw->output_topology;
214    iris->domain               = brw->domain;
215    iris->include_primitive_id = brw->include_primitive_id;
216 }
217 
218 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)219 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
220                             const struct brw_gs_prog_data *brw)
221 {
222    assert(shader->stage == MESA_SHADER_GEOMETRY);
223    struct iris_gs_data *iris = &shader->gs;
224 
225    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
226 
227    iris->vertices_in                     = brw->vertices_in;
228    iris->output_vertex_size_hwords       = brw->output_vertex_size_hwords;
229    iris->output_topology                 = brw->output_topology;
230    iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
231    iris->control_data_format             = brw->control_data_format;
232    iris->static_vertex_count             = brw->static_vertex_count;
233    iris->invocations                     = brw->invocations;
234    iris->include_primitive_id            = brw->include_primitive_id;
235 }
236 
237 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)238 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
239                          struct brw_stage_prog_data *brw)
240 {
241    STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
242    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
243       shader->ubo_ranges[i].block  = brw->ubo_ranges[i].block;
244       shader->ubo_ranges[i].start  = brw->ubo_ranges[i].start;
245       shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
246    }
247 
248    shader->nr_params              = brw->nr_params;
249    shader->total_scratch          = brw->total_scratch;
250    shader->total_shared           = brw->total_shared;
251    shader->program_size           = brw->program_size;
252    shader->const_data_offset      = brw->const_data_offset;
253    shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
254    shader->has_ubo_pull           = brw->has_ubo_pull;
255    shader->use_alt_mode           = brw->use_alt_mode;
256 
257    switch (shader->stage) {
258    case MESA_SHADER_FRAGMENT:
259       iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
260       break;
261    case MESA_SHADER_COMPUTE:
262       iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
263       break;
264    case MESA_SHADER_VERTEX:
265       iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
266       break;
267    case MESA_SHADER_TESS_CTRL:
268       iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
269       break;
270    case MESA_SHADER_TESS_EVAL:
271       iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
272       break;
273    case MESA_SHADER_GEOMETRY:
274       iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
275       break;
276    default:
277       unreachable("invalid shader stage");
278    }
279 
280    shader->brw_prog_data = brw;
281 
282    ralloc_steal(shader, shader->brw_prog_data);
283    ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
284    ralloc_steal(shader->brw_prog_data, brw->param);
285 }
286 
287 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)288 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
289                             const struct elk_wm_prog_data *elk)
290 {
291    assert(shader->stage == MESA_SHADER_FRAGMENT);
292    struct iris_fs_data *iris = &shader->fs;
293 
294    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
295    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
296    memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
297    memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
298    iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
299 
300    iris->num_varying_inputs   = elk->num_varying_inputs;
301    iris->msaa_flags_param     = elk->msaa_flags_param;
302    iris->flat_inputs          = elk->flat_inputs;
303    iris->inputs               = elk->inputs;
304    iris->computed_depth_mode  = elk->computed_depth_mode;
305    iris->max_polygons         = 1;
306    iris->dispatch_multi       = 0;
307    iris->computed_stencil     = elk->computed_stencil;
308    iris->early_fragment_tests = elk->early_fragment_tests;
309    iris->post_depth_coverage  = elk->post_depth_coverage;
310    iris->inner_coverage       = elk->inner_coverage;
311    iris->dispatch_8           = elk->dispatch_8;
312    iris->dispatch_16          = elk->dispatch_16;
313    iris->dispatch_32          = elk->dispatch_32;
314    iris->dual_src_blend       = elk->dual_src_blend;
315    iris->uses_pos_offset      = elk->uses_pos_offset;
316    iris->uses_omask           = elk->uses_omask;
317    iris->uses_kill            = elk->uses_kill;
318    iris->uses_src_depth       = elk->uses_src_depth;
319    iris->uses_src_w           = elk->uses_src_w;
320    iris->uses_sample_mask     = elk->uses_sample_mask;
321    iris->uses_vmask           = elk->uses_vmask;
322    iris->pulls_bary           = elk->pulls_bary;
323    iris->has_side_effects     = elk->has_side_effects;
324 
325    iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
326 
327    iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
328 }
329 
330 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)331 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
332                             const struct elk_cs_prog_data *elk)
333 {
334    assert(shader->stage == MESA_SHADER_COMPUTE);
335    struct iris_cs_data *iris = &shader->cs;
336 
337    iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
338    iris->push.cross_thread.regs   = elk->push.cross_thread.regs;
339    iris->push.cross_thread.size   = elk->push.cross_thread.size;
340 
341    iris->push.per_thread.dwords = elk->push.per_thread.dwords;
342    iris->push.per_thread.regs   = elk->push.per_thread.regs;
343    iris->push.per_thread.size   = elk->push.per_thread.size;
344 
345    iris->local_size[0]  = elk->local_size[0];
346    iris->local_size[1]  = elk->local_size[1];
347    iris->local_size[2]  = elk->local_size[2];
348    iris->prog_offset[0] = elk->prog_offset[0];
349    iris->prog_offset[1] = elk->prog_offset[1];
350    iris->prog_offset[2] = elk->prog_offset[2];
351 
352    iris->uses_barrier      = elk->uses_barrier;
353    iris->prog_mask         = elk->prog_mask;
354 
355    iris->first_param_is_builtin_subgroup_id =
356       elk->base.nr_params > 0 &&
357       elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
358 }
359 
360 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)361 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
362                              struct iris_vue_data *iris)
363 {
364    memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
365 
366    iris->urb_read_length     = elk->urb_read_length;
367    iris->cull_distance_mask  = elk->cull_distance_mask;
368    iris->urb_entry_size      = elk->urb_entry_size;
369    iris->dispatch_mode       = elk->dispatch_mode;
370    iris->include_vue_handles = elk->include_vue_handles;
371 }
372 
373 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)374 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
375                             const struct elk_vs_prog_data *elk)
376 {
377    assert(shader->stage == MESA_SHADER_VERTEX);
378    struct iris_vs_data *iris = &shader->vs;
379 
380    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
381 
382    iris->uses_vertexid     = elk->uses_vertexid;
383    iris->uses_instanceid   = elk->uses_instanceid;
384    iris->uses_firstvertex  = elk->uses_firstvertex;
385    iris->uses_baseinstance = elk->uses_baseinstance;
386    iris->uses_drawid       = elk->uses_drawid;
387 }
388 
389 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)390 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
391                              const struct elk_tcs_prog_data *elk)
392 {
393    assert(shader->stage == MESA_SHADER_TESS_CTRL);
394    struct iris_tcs_data *iris = &shader->tcs;
395 
396    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
397 
398    iris->instances             = elk->instances;
399    iris->patch_count_threshold = elk->patch_count_threshold;
400    iris->include_primitive_id  = elk->include_primitive_id;
401 }
402 
403 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)404 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
405                              const struct elk_tes_prog_data *elk)
406 {
407    assert(shader->stage == MESA_SHADER_TESS_EVAL);
408    struct iris_tes_data *iris = &shader->tes;
409 
410    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
411 
412    iris->partitioning         = elk->partitioning;
413    iris->output_topology      = elk->output_topology;
414    iris->domain               = elk->domain;
415    iris->include_primitive_id = elk->include_primitive_id;
416 }
417 
418 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)419 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
420                             const struct elk_gs_prog_data *elk)
421 {
422    assert(shader->stage == MESA_SHADER_GEOMETRY);
423    struct iris_gs_data *iris = &shader->gs;
424 
425    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
426 
427    iris->vertices_in                     = elk->vertices_in;
428    iris->output_vertex_size_hwords       = elk->output_vertex_size_hwords;
429    iris->output_topology                 = elk->output_topology;
430    iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
431    iris->control_data_format             = elk->control_data_format;
432    iris->static_vertex_count             = elk->static_vertex_count;
433    iris->invocations                     = elk->invocations;
434    iris->include_primitive_id            = elk->include_primitive_id;
435 }
436 
437 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)438 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
439                          struct elk_stage_prog_data *elk)
440 {
441    STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
442    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
443       shader->ubo_ranges[i].block  = elk->ubo_ranges[i].block;
444       shader->ubo_ranges[i].start  = elk->ubo_ranges[i].start;
445       shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
446    }
447 
448    shader->nr_params              = elk->nr_params;
449    shader->total_scratch          = elk->total_scratch;
450    shader->total_shared           = elk->total_shared;
451    shader->program_size           = elk->program_size;
452    shader->const_data_offset      = elk->const_data_offset;
453    shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
454    shader->has_ubo_pull           = elk->has_ubo_pull;
455    shader->use_alt_mode           = elk->use_alt_mode;
456 
457    switch (shader->stage) {
458    case MESA_SHADER_FRAGMENT:
459       iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
460       break;
461    case MESA_SHADER_COMPUTE:
462       iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
463       break;
464    case MESA_SHADER_VERTEX:
465       iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
466       break;
467    case MESA_SHADER_TESS_CTRL:
468       iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
469       break;
470    case MESA_SHADER_TESS_EVAL:
471       iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
472       break;
473    case MESA_SHADER_GEOMETRY:
474       iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
475       break;
476    default:
477       unreachable("invalid shader stage");
478    }
479 
480    shader->elk_prog_data = elk;
481 
482    ralloc_steal(shader, shader->elk_prog_data);
483    ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
484    ralloc_steal(shader->elk_prog_data, elk->param);
485 }
486 
487 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)488 iris_finalize_program(struct iris_compiled_shader *shader,
489                       uint32_t *streamout,
490                       uint32_t *system_values,
491                       unsigned num_system_values,
492                       unsigned kernel_input_size,
493                       unsigned num_cbufs,
494                       const struct iris_binding_table *bt)
495 {
496    /* There can be only one. */
497    assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
498 
499    shader->streamout = streamout;
500    shader->system_values = system_values;
501    shader->num_system_values = num_system_values;
502    shader->kernel_input_size = kernel_input_size;
503    shader->num_cbufs = num_cbufs;
504    shader->bt = *bt;
505 
506    ralloc_steal(shader, shader->streamout);
507    ralloc_steal(shader, shader->system_values);
508 }
509 
510 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)511 iris_to_brw_vs_key(const struct iris_screen *screen,
512                    const struct iris_vs_prog_key *key)
513 {
514    return (struct brw_vs_prog_key) {
515       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
516                    key->vue.base.limit_trig_input_range),
517    };
518 }
519 
520 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)521 iris_to_brw_tcs_key(const struct iris_screen *screen,
522                     const struct iris_tcs_prog_key *key)
523 {
524    return (struct brw_tcs_prog_key) {
525       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
526                    key->vue.base.limit_trig_input_range),
527       ._tes_primitive_mode = key->_tes_primitive_mode,
528       .input_vertices = key->input_vertices,
529       .patch_outputs_written = key->patch_outputs_written,
530       .outputs_written = key->outputs_written,
531    };
532 }
533 
534 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)535 iris_to_brw_tes_key(const struct iris_screen *screen,
536                     const struct iris_tes_prog_key *key)
537 {
538    return (struct brw_tes_prog_key) {
539       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
540                    key->vue.base.limit_trig_input_range),
541       .patch_inputs_read = key->patch_inputs_read,
542       .inputs_read = key->inputs_read,
543    };
544 }
545 
546 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)547 iris_to_brw_gs_key(const struct iris_screen *screen,
548                    const struct iris_gs_prog_key *key)
549 {
550    return (struct brw_gs_prog_key) {
551       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
552                    key->vue.base.limit_trig_input_range),
553    };
554 }
555 
556 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)557 iris_to_brw_fs_key(const struct iris_screen *screen,
558                    const struct iris_fs_prog_key *key)
559 {
560    return (struct brw_wm_prog_key) {
561       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
562                    key->base.limit_trig_input_range),
563       .nr_color_regions = key->nr_color_regions,
564       .flat_shade = key->flat_shade,
565       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
566       .alpha_to_coverage = key->alpha_to_coverage ? BRW_ALWAYS : BRW_NEVER,
567       .clamp_fragment_color = key->clamp_fragment_color,
568       .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
569       .multisample_fbo = key->multisample_fbo ? BRW_ALWAYS : BRW_NEVER,
570       .force_dual_color_blend = key->force_dual_color_blend,
571       .coherent_fb_fetch = key->coherent_fb_fetch,
572       .color_outputs_valid = key->color_outputs_valid,
573       .input_slots_valid = key->input_slots_valid,
574       .ignore_sample_mask_out = !key->multisample_fbo,
575       .null_push_constant_tbimr_workaround =
576          screen->devinfo->needs_null_push_constant_tbimr_workaround,
577    };
578 }
579 
580 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)581 iris_to_brw_cs_key(const struct iris_screen *screen,
582                    const struct iris_cs_prog_key *key)
583 {
584    return (struct brw_cs_prog_key) {
585       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
586                    key->base.limit_trig_input_range),
587    };
588 }
589 
590 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)591 iris_to_elk_vs_key(const struct iris_screen *screen,
592                    const struct iris_vs_prog_key *key)
593 {
594    return (struct elk_vs_prog_key) {
595       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
596                    key->vue.base.limit_trig_input_range),
597 
598       /* Don't tell the backend about our clip plane constants, we've
599        * already lowered them in NIR and don't want it doing it again.
600        */
601       .nr_userclip_plane_consts = 0,
602    };
603 }
604 
605 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)606 iris_to_elk_tcs_key(const struct iris_screen *screen,
607                     const struct iris_tcs_prog_key *key)
608 {
609    return (struct elk_tcs_prog_key) {
610       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
611                    key->vue.base.limit_trig_input_range),
612       ._tes_primitive_mode = key->_tes_primitive_mode,
613       .input_vertices = key->input_vertices,
614       .patch_outputs_written = key->patch_outputs_written,
615       .outputs_written = key->outputs_written,
616       .quads_workaround = key->quads_workaround,
617    };
618 }
619 
620 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)621 iris_to_elk_tes_key(const struct iris_screen *screen,
622                     const struct iris_tes_prog_key *key)
623 {
624    return (struct elk_tes_prog_key) {
625       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
626                    key->vue.base.limit_trig_input_range),
627       .patch_inputs_read = key->patch_inputs_read,
628       .inputs_read = key->inputs_read,
629    };
630 }
631 
632 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)633 iris_to_elk_gs_key(const struct iris_screen *screen,
634                    const struct iris_gs_prog_key *key)
635 {
636    return (struct elk_gs_prog_key) {
637       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
638                    key->vue.base.limit_trig_input_range),
639    };
640 }
641 
642 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)643 iris_to_elk_fs_key(const struct iris_screen *screen,
644                    const struct iris_fs_prog_key *key)
645 {
646    return (struct elk_wm_prog_key) {
647       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
648                    key->base.limit_trig_input_range),
649       .nr_color_regions = key->nr_color_regions,
650       .flat_shade = key->flat_shade,
651       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
652       .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
653       .clamp_fragment_color = key->clamp_fragment_color,
654       .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
655       .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
656       .force_dual_color_blend = key->force_dual_color_blend,
657       .coherent_fb_fetch = key->coherent_fb_fetch,
658       .color_outputs_valid = key->color_outputs_valid,
659       .input_slots_valid = key->input_slots_valid,
660       .ignore_sample_mask_out = !key->multisample_fbo,
661    };
662 }
663 
664 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)665 iris_to_elk_cs_key(const struct iris_screen *screen,
666                    const struct iris_cs_prog_key *key)
667 {
668    return (struct elk_cs_prog_key) {
669       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
670                    key->base.limit_trig_input_range),
671    };
672 }
673 
674 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)675 upload_state(struct u_upload_mgr *uploader,
676              struct iris_state_ref *ref,
677              unsigned size,
678              unsigned alignment)
679 {
680    void *p = NULL;
681    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
682    return p;
683 }
684 
685 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)686 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
687                                 struct pipe_shader_buffer *buf,
688                                 struct iris_state_ref *surf_state,
689                                 isl_surf_usage_flags_t usage)
690 {
691    struct pipe_context *ctx = &ice->ctx;
692    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
693    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
694 
695    void *map =
696       upload_state(ice->state.surface_uploader, surf_state,
697                    screen->isl_dev.ss.size, 64);
698    if (!unlikely(map)) {
699       surf_state->res = NULL;
700       return;
701    }
702 
703    struct iris_resource *res = (void *) buf->buffer;
704    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
705    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
706 
707    const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
708 
709    isl_buffer_fill_state(&screen->isl_dev, map,
710                          .address = res->bo->address + res->offset +
711                                     buf->buffer_offset,
712                          .size_B = buf->buffer_size - res->offset,
713                          .format = dataport ? ISL_FORMAT_RAW
714                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
715                          .swizzle = ISL_SWIZZLE_IDENTITY,
716                          .stride_B = 1,
717                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
718 }
719 
720 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)721 get_aoa_deref_offset(nir_builder *b,
722                      nir_deref_instr *deref,
723                      unsigned elem_size)
724 {
725    unsigned array_size = elem_size;
726    nir_def *offset = nir_imm_int(b, 0);
727 
728    while (deref->deref_type != nir_deref_type_var) {
729       assert(deref->deref_type == nir_deref_type_array);
730 
731       /* This level's element size is the previous level's array size */
732       nir_def *index = deref->arr.index.ssa;
733       assert(deref->arr.index.ssa);
734       offset = nir_iadd(b, offset,
735                            nir_imul_imm(b, index, array_size));
736 
737       deref = nir_deref_instr_parent(deref);
738       assert(glsl_type_is_array(deref->type));
739       array_size *= glsl_get_length(deref->type);
740    }
741 
742    /* Accessing an invalid surface index with the dataport can result in a
743     * hang.  According to the spec "if the index used to select an individual
744     * element is negative or greater than or equal to the size of the array,
745     * the results of the operation are undefined but may not lead to
746     * termination" -- which is one of the possible outcomes of the hang.
747     * Clamp the index to prevent access outside of the array bounds.
748     */
749    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
750 }
751 
752 static void
iris_lower_storage_image_derefs(nir_shader * nir)753 iris_lower_storage_image_derefs(nir_shader *nir)
754 {
755    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
756 
757    nir_builder b = nir_builder_create(impl);
758 
759    nir_foreach_block(block, impl) {
760       nir_foreach_instr_safe(instr, block) {
761          if (instr->type != nir_instr_type_intrinsic)
762             continue;
763 
764          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
765          switch (intrin->intrinsic) {
766          case nir_intrinsic_image_deref_load:
767          case nir_intrinsic_image_deref_store:
768          case nir_intrinsic_image_deref_atomic:
769          case nir_intrinsic_image_deref_atomic_swap:
770          case nir_intrinsic_image_deref_size:
771          case nir_intrinsic_image_deref_samples:
772          case nir_intrinsic_image_deref_load_raw_intel:
773          case nir_intrinsic_image_deref_store_raw_intel: {
774             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
775             nir_variable *var = nir_deref_instr_get_variable(deref);
776 
777             b.cursor = nir_before_instr(&intrin->instr);
778             nir_def *index =
779                nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
780                                 var->data.driver_location);
781             nir_rewrite_image_intrinsic(intrin, index, false);
782             break;
783          }
784 
785          default:
786             break;
787          }
788       }
789    }
790 }
791 
792 static bool
iris_uses_image_atomic(const nir_shader * shader)793 iris_uses_image_atomic(const nir_shader *shader)
794 {
795    nir_foreach_function_impl(impl, shader) {
796       nir_foreach_block(block, impl) {
797          nir_foreach_instr(instr, block) {
798             if (instr->type != nir_instr_type_intrinsic)
799                continue;
800 
801             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
802             switch (intrin->intrinsic) {
803             case nir_intrinsic_image_deref_atomic:
804             case nir_intrinsic_image_deref_atomic_swap:
805                unreachable("Should have been lowered in "
806                            "iris_lower_storage_image_derefs");
807 
808             case nir_intrinsic_image_atomic:
809             case nir_intrinsic_image_atomic_swap:
810                return true;
811 
812             default:
813                break;
814             }
815          }
816       }
817    }
818 
819    return false;
820 }
821 
822 /**
823  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
824  */
825 static bool
iris_fix_edge_flags(nir_shader * nir)826 iris_fix_edge_flags(nir_shader *nir)
827 {
828    if (nir->info.stage != MESA_SHADER_VERTEX) {
829       nir_shader_preserve_all_metadata(nir);
830       return false;
831    }
832 
833    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
834                                                        VARYING_SLOT_EDGE);
835    if (!var) {
836       nir_shader_preserve_all_metadata(nir);
837       return false;
838    }
839 
840    var->data.mode = nir_var_shader_temp;
841    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
842    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
843    nir_fixup_deref_modes(nir);
844 
845    nir_foreach_function_impl(impl, nir) {
846       nir_metadata_preserve(impl, nir_metadata_control_flow |
847                                   nir_metadata_live_defs |
848                                   nir_metadata_loop_analysis);
849    }
850 
851    return true;
852 }
853 
854 /**
855  * Fix an uncompiled shader's stream output info.
856  *
857  * Core Gallium stores output->register_index as a "slot" number, where
858  * slots are assigned consecutively to all outputs in info->outputs_written.
859  * This naive packing of outputs doesn't work for us - we too have slots,
860  * but the layout is defined by the VUE map, which we won't have until we
861  * compile a specific shader variant.  So, we remap these and simply store
862  * VARYING_SLOT_* in our copy's output->register_index fields.
863  *
864  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
865  * components of our VUE header.  See brw_vue_map.c for the layout.
866  */
867 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)868 update_so_info(struct pipe_stream_output_info *so_info,
869                uint64_t outputs_written)
870 {
871    uint8_t reverse_map[64] = {};
872    unsigned slot = 0;
873    while (outputs_written) {
874       reverse_map[slot++] = u_bit_scan64(&outputs_written);
875    }
876 
877    for (unsigned i = 0; i < so_info->num_outputs; i++) {
878       struct pipe_stream_output *output = &so_info->output[i];
879 
880       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
881       output->register_index = reverse_map[output->register_index];
882 
883       /* The VUE header contains three scalar fields packed together:
884        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
885        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
886        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
887        */
888       switch (output->register_index) {
889       case VARYING_SLOT_LAYER:
890          assert(output->num_components == 1);
891          output->register_index = VARYING_SLOT_PSIZ;
892          output->start_component = 1;
893          break;
894       case VARYING_SLOT_VIEWPORT:
895          assert(output->num_components == 1);
896          output->register_index = VARYING_SLOT_PSIZ;
897          output->start_component = 2;
898          break;
899       case VARYING_SLOT_PSIZ:
900          assert(output->num_components == 1);
901          output->start_component = 3;
902          break;
903       }
904 
905       //info->outputs_written |= 1ull << output->register_index;
906    }
907 }
908 
909 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)910 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
911                         unsigned offset, unsigned n)
912 {
913    assert(offset % sizeof(uint32_t) == 0);
914 
915    for (unsigned i = 0; i < n; ++i)
916       sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
917 
918    for (unsigned i = n; i < 4; ++i)
919       sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
920 }
921 
922 /**
923  * Associate NIR uniform variables with the prog_data->param[] mechanism
924  * used by the backend.  Also, decide which UBOs we'd like to push in an
925  * ideal situation (though the backend can reduce this).
926  */
927 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)928 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
929                     void *mem_ctx,
930                     nir_shader *nir,
931                     unsigned kernel_input_size,
932                     uint32_t **out_system_values,
933                     unsigned *out_num_system_values,
934                     unsigned *out_num_cbufs)
935 {
936    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
937 
938    const unsigned IRIS_MAX_SYSTEM_VALUES =
939       PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
940    unsigned *system_values =
941       rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
942    unsigned num_system_values = 0;
943 
944    unsigned patch_vert_idx = -1;
945    unsigned tess_outer_default_idx = -1;
946    unsigned tess_inner_default_idx = -1;
947    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
948    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
949    unsigned variable_group_size_idx = -1;
950    unsigned work_dim_idx = -1;
951    memset(ucp_idx, -1, sizeof(ucp_idx));
952    memset(img_idx, -1, sizeof(img_idx));
953 
954    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
955 
956    nir_builder b = nir_builder_at(nir_before_impl(impl));
957 
958    nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
959 
960    /* Turn system value intrinsics into uniforms */
961    nir_foreach_block(block, impl) {
962       nir_foreach_instr_safe(instr, block) {
963          if (instr->type != nir_instr_type_intrinsic)
964             continue;
965 
966          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
967          nir_def *offset;
968 
969          switch (intrin->intrinsic) {
970          case nir_intrinsic_load_base_workgroup_id: {
971             /* GL doesn't have a concept of base workgroup */
972             b.cursor = nir_instr_remove(&intrin->instr);
973             nir_def_rewrite_uses(&intrin->def,
974                                      nir_imm_zero(&b, 3, 32));
975             continue;
976          }
977          case nir_intrinsic_load_constant: {
978             unsigned load_size = intrin->def.num_components *
979                                  intrin->def.bit_size / 8;
980             unsigned load_align = intrin->def.bit_size / 8;
981 
982             /* This one is special because it reads from the shader constant
983              * data and not cbuf0 which gallium uploads for us.
984              */
985             b.cursor = nir_instr_remove(&intrin->instr);
986 
987             nir_def *offset =
988                nir_iadd_imm(&b, intrin->src[0].ssa,
989                                 nir_intrinsic_base(intrin));
990 
991             assert(load_size < b.shader->constant_data_size);
992             unsigned max_offset = b.shader->constant_data_size - load_size;
993             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
994 
995             /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
996              * and cannot cross that 4GB boundary, so we can do the address
997              * calculation with 32-bit adds.  Also, we can ignore the high
998              * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
999              */
1000             assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
1001 
1002             nir_def *const_data_addr =
1003                nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1004 
1005             nir_def *data =
1006                nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1007                                         load_align,
1008                                         intrin->def.num_components,
1009                                         intrin->def.bit_size);
1010 
1011             nir_def_rewrite_uses(&intrin->def,
1012                                      data);
1013             continue;
1014          }
1015          case nir_intrinsic_load_user_clip_plane: {
1016             unsigned ucp = nir_intrinsic_ucp_id(intrin);
1017 
1018             if (ucp_idx[ucp] == -1) {
1019                ucp_idx[ucp] = num_system_values;
1020                num_system_values += 4;
1021             }
1022 
1023             for (int i = 0; i < 4; i++) {
1024                system_values[ucp_idx[ucp] + i] =
1025                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1026             }
1027 
1028             b.cursor = nir_before_instr(instr);
1029             offset = nir_imm_int(&b, system_values_start +
1030                                      ucp_idx[ucp] * sizeof(uint32_t));
1031             break;
1032          }
1033          case nir_intrinsic_load_patch_vertices_in:
1034             if (patch_vert_idx == -1)
1035                patch_vert_idx = num_system_values++;
1036 
1037             system_values[patch_vert_idx] =
1038                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1039 
1040             b.cursor = nir_before_instr(instr);
1041             offset = nir_imm_int(&b, system_values_start +
1042                                      patch_vert_idx * sizeof(uint32_t));
1043             break;
1044          case nir_intrinsic_load_tess_level_outer_default:
1045             if (tess_outer_default_idx == -1) {
1046                tess_outer_default_idx = num_system_values;
1047                num_system_values += 4;
1048             }
1049 
1050             for (int i = 0; i < 4; i++) {
1051                system_values[tess_outer_default_idx + i] =
1052                   BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1053             }
1054 
1055             b.cursor = nir_before_instr(instr);
1056             offset = nir_imm_int(&b, system_values_start +
1057                                  tess_outer_default_idx * sizeof(uint32_t));
1058             break;
1059          case nir_intrinsic_load_tess_level_inner_default:
1060             if (tess_inner_default_idx == -1) {
1061                tess_inner_default_idx = num_system_values;
1062                num_system_values += 2;
1063             }
1064 
1065             for (int i = 0; i < 2; i++) {
1066                system_values[tess_inner_default_idx + i] =
1067                   BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1068             }
1069 
1070             b.cursor = nir_before_instr(instr);
1071             offset = nir_imm_int(&b, system_values_start +
1072                                  tess_inner_default_idx * sizeof(uint32_t));
1073             break;
1074          case nir_intrinsic_image_deref_load_param_intel: {
1075             assert(devinfo->ver < 9);
1076             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1077             nir_variable *var = nir_deref_instr_get_variable(deref);
1078 
1079             if (img_idx[var->data.binding] == -1) {
1080                /* GL only allows arrays of arrays of images. */
1081                assert(glsl_type_is_image(glsl_without_array(var->type)));
1082                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1083 
1084                for (int i = 0; i < num_images; i++) {
1085                   const unsigned img = var->data.binding + i;
1086 
1087                   img_idx[img] = num_system_values;
1088                   num_system_values += ISL_IMAGE_PARAM_SIZE;
1089 
1090                   uint32_t *img_sv = &system_values[img_idx[img]];
1091 
1092                   setup_vec4_image_sysval(
1093                      img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1094                      offsetof(struct isl_image_param, offset), 2);
1095                   setup_vec4_image_sysval(
1096                      img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1097                      offsetof(struct isl_image_param, size), 3);
1098                   setup_vec4_image_sysval(
1099                      img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1100                      offsetof(struct isl_image_param, stride), 4);
1101                   setup_vec4_image_sysval(
1102                      img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1103                      offsetof(struct isl_image_param, tiling), 3);
1104                   setup_vec4_image_sysval(
1105                      img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1106                      offsetof(struct isl_image_param, swizzling), 2);
1107                }
1108             }
1109 
1110             b.cursor = nir_before_instr(instr);
1111             offset = nir_iadd_imm(&b,
1112                get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1113                system_values_start +
1114                img_idx[var->data.binding] * 4 +
1115                nir_intrinsic_base(intrin) * 16);
1116             break;
1117          }
1118          case nir_intrinsic_load_workgroup_size: {
1119             assert(nir->info.workgroup_size_variable);
1120             if (variable_group_size_idx == -1) {
1121                variable_group_size_idx = num_system_values;
1122                num_system_values += 3;
1123                for (int i = 0; i < 3; i++) {
1124                   system_values[variable_group_size_idx + i] =
1125                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1126                }
1127             }
1128 
1129             b.cursor = nir_before_instr(instr);
1130             offset = nir_imm_int(&b, system_values_start +
1131                                      variable_group_size_idx * sizeof(uint32_t));
1132             break;
1133          }
1134          case nir_intrinsic_load_work_dim: {
1135             if (work_dim_idx == -1) {
1136                work_dim_idx = num_system_values++;
1137                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1138             }
1139             b.cursor = nir_before_instr(instr);
1140             offset = nir_imm_int(&b, system_values_start +
1141                                      work_dim_idx * sizeof(uint32_t));
1142             break;
1143          }
1144          case nir_intrinsic_load_kernel_input: {
1145             assert(nir_intrinsic_base(intrin) +
1146                    nir_intrinsic_range(intrin) <= kernel_input_size);
1147             b.cursor = nir_before_instr(instr);
1148             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1149                                       nir_intrinsic_base(intrin));
1150             break;
1151          }
1152          default:
1153             continue;
1154          }
1155 
1156          nir_def *load =
1157             nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1158                          temp_ubo_name, offset,
1159                          .align_mul = 4,
1160                          .align_offset = 0,
1161                          .range_base = 0,
1162                          .range = ~0);
1163 
1164          nir_def_rewrite_uses(&intrin->def,
1165                                   load);
1166          nir_instr_remove(instr);
1167       }
1168    }
1169 
1170    nir_validate_shader(nir, "before remapping");
1171 
1172    /* Uniforms are stored in constant buffer 0, the
1173     * user-facing UBOs are indexed by one.  So if any constant buffer is
1174     * needed, the constant buffer 0 will be needed, so account for it.
1175     */
1176    unsigned num_cbufs = nir->info.num_ubos;
1177    if (num_cbufs || nir->num_uniforms)
1178       num_cbufs++;
1179 
1180    /* Place the new params in a new cbuf. */
1181    if (num_system_values > 0 || kernel_input_size > 0) {
1182       unsigned sysval_cbuf_index = num_cbufs;
1183       num_cbufs++;
1184 
1185       system_values = reralloc(mem_ctx, system_values, unsigned,
1186                                num_system_values);
1187 
1188       nir_foreach_block(block, impl) {
1189          nir_foreach_instr_safe(instr, block) {
1190             if (instr->type != nir_instr_type_intrinsic)
1191                continue;
1192 
1193             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1194 
1195             if (load->intrinsic != nir_intrinsic_load_ubo)
1196                continue;
1197 
1198             b.cursor = nir_before_instr(instr);
1199 
1200             if (load->src[0].ssa == temp_ubo_name) {
1201                nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1202                nir_src_rewrite(&load->src[0], imm);
1203             }
1204          }
1205       }
1206 
1207       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1208       nir_opt_constant_folding(nir);
1209    } else {
1210       ralloc_free(system_values);
1211       system_values = NULL;
1212    }
1213 
1214    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1215    nir_validate_shader(nir, "after remap");
1216 
1217    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
1218     * to detect when cbuf0 exists but we don't need it anymore when we get
1219     * here.  Instead, zero it out so that the back-end doesn't get confused
1220     * when nr_params * 4 != num_uniforms != nr_params * 4.
1221     */
1222    nir->num_uniforms = 0;
1223 
1224    *out_system_values = system_values;
1225    *out_num_system_values = num_system_values;
1226    *out_num_cbufs = num_cbufs;
1227 }
1228 
1229 static const char *surface_group_names[] = {
1230    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
1231    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1232    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
1233    [IRIS_SURFACE_GROUP_TEXTURE_LOW64]      = "texture",
1234    [IRIS_SURFACE_GROUP_TEXTURE_HIGH64]     = "texture",
1235    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
1236    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
1237    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
1238 };
1239 
1240 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1241 iris_print_binding_table(FILE *fp, const char *name,
1242                          const struct iris_binding_table *bt)
1243 {
1244    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1245 
1246    uint32_t total = 0;
1247    uint32_t compacted = 0;
1248 
1249    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1250       uint32_t size = bt->sizes[i];
1251       total += size;
1252       if (size)
1253          compacted += util_bitcount64(bt->used_mask[i]);
1254    }
1255 
1256    if (total == 0) {
1257       fprintf(fp, "Binding table for %s is empty\n\n", name);
1258       return;
1259    }
1260 
1261    if (total != compacted) {
1262       fprintf(fp, "Binding table for %s "
1263               "(compacted to %u entries from %u entries)\n",
1264               name, compacted, total);
1265    } else {
1266       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1267    }
1268 
1269    uint32_t entry = 0;
1270    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1271       uint64_t mask = bt->used_mask[i];
1272       while (mask) {
1273          int index = u_bit_scan64(&mask);
1274          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
1275       }
1276    }
1277    fprintf(fp, "\n");
1278 }
1279 
1280 enum {
1281    /* Max elements in a surface group. */
1282    SURFACE_GROUP_MAX_ELEMENTS = 64,
1283 };
1284 
1285 /**
1286  * Map a <group, index> pair to a binding table index.
1287  *
1288  * For example: <UBO, 5> => binding table index 12
1289  */
1290 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1291 iris_group_index_to_bti(const struct iris_binding_table *bt,
1292                         enum iris_surface_group group, uint32_t index)
1293 {
1294    assert(index < bt->sizes[group]);
1295    uint64_t mask = bt->used_mask[group];
1296    uint64_t bit = 1ull << index;
1297    if (bit & mask) {
1298       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1299    } else {
1300       return IRIS_SURFACE_NOT_USED;
1301    }
1302 }
1303 
1304 /**
1305  * Map a binding table index back to a <group, index> pair.
1306  *
1307  * For example: binding table index 12 => <UBO, 5>
1308  */
1309 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1310 iris_bti_to_group_index(const struct iris_binding_table *bt,
1311                         enum iris_surface_group group, uint32_t bti)
1312 {
1313    uint64_t used_mask = bt->used_mask[group];
1314    assert(bti >= bt->offsets[group]);
1315 
1316    uint32_t c = bti - bt->offsets[group];
1317    while (used_mask) {
1318       int i = u_bit_scan64(&used_mask);
1319       if (c == 0)
1320          return i;
1321       c--;
1322    }
1323 
1324    return IRIS_SURFACE_NOT_USED;
1325 }
1326 
1327 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1328 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1329                      nir_instr *instr, nir_src *src,
1330                      enum iris_surface_group group)
1331 {
1332    assert(bt->sizes[group] > 0);
1333 
1334    b->cursor = nir_before_instr(instr);
1335    nir_def *bti;
1336    if (nir_src_is_const(*src)) {
1337       uint32_t index = nir_src_as_uint(*src);
1338       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1339                            src->ssa->bit_size);
1340    } else {
1341       /* Indirect usage makes all the surfaces of the group to be available,
1342        * so we can just add the base.
1343        */
1344       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1345       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1346    }
1347    nir_src_rewrite(src, bti);
1348 }
1349 
1350 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1351 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1352                    enum iris_surface_group group)
1353 {
1354    assert(bt->sizes[group] > 0);
1355 
1356    if (nir_src_is_const(*src)) {
1357       uint64_t index = nir_src_as_uint(*src);
1358       assert(index < bt->sizes[group]);
1359       bt->used_mask[group] |= 1ull << index;
1360    } else {
1361       /* There's an indirect usage, we need all the surfaces. */
1362       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1363    }
1364 }
1365 
1366 static bool
skip_compacting_binding_tables(void)1367 skip_compacting_binding_tables(void)
1368 {
1369    static int skip = -1;
1370    if (skip < 0)
1371       skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1372    return skip;
1373 }
1374 
1375 /**
1376  * Set up the binding table indices and apply to the shader.
1377  */
1378 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)1379 iris_setup_binding_table(const struct intel_device_info *devinfo,
1380                          struct nir_shader *nir,
1381                          struct iris_binding_table *bt,
1382                          unsigned num_render_targets,
1383                          unsigned num_system_values,
1384                          unsigned num_cbufs)
1385 {
1386    const struct shader_info *info = &nir->info;
1387 
1388    memset(bt, 0, sizeof(*bt));
1389 
1390    /* Set the sizes for each surface group.  For some groups, we already know
1391     * upfront how many will be used, so mark them.
1392     */
1393    if (info->stage == MESA_SHADER_FRAGMENT) {
1394       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1395       /* All render targets used. */
1396       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1397          BITFIELD64_MASK(num_render_targets);
1398 
1399       /* Setup render target read surface group in order to support non-coherent
1400        * framebuffer fetch on Gfx8
1401        */
1402       if (devinfo->ver == 8 && info->outputs_read) {
1403          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1404          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1405             BITFIELD64_MASK(num_render_targets);
1406       }
1407    } else if (info->stage == MESA_SHADER_COMPUTE) {
1408       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1409    }
1410 
1411    assert(ARRAY_SIZE(info->textures_used) >= 4);
1412    int max_tex = BITSET_LAST_BIT(info->textures_used);
1413    assert(max_tex <= 128);
1414    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1415    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1416    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1417       info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1418    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1419       info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1420    bt->samplers_used_mask = info->samplers_used[0];
1421 
1422    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1423 
1424    /* Allocate an extra slot in the UBO section for NIR constants.
1425     * Binding table compaction will remove it if unnecessary.
1426     *
1427     * We don't include them in iris_compiled_shader::num_cbufs because
1428     * they are uploaded separately from shs->constbuf[], but from a shader
1429     * point of view, they're another UBO (at the end of the section).
1430     */
1431    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1432 
1433    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1434 
1435    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1436       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1437 
1438    /* Mark surfaces used for the cases we don't have the information available
1439     * upfront.
1440     */
1441    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1442    nir_foreach_block (block, impl) {
1443       nir_foreach_instr (instr, block) {
1444          if (instr->type != nir_instr_type_intrinsic)
1445             continue;
1446 
1447          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1448          switch (intrin->intrinsic) {
1449          case nir_intrinsic_load_num_workgroups:
1450             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1451             break;
1452 
1453          case nir_intrinsic_load_output:
1454             if (devinfo->ver == 8) {
1455                mark_used_with_src(bt, &intrin->src[0],
1456                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1457             }
1458             break;
1459 
1460          case nir_intrinsic_image_size:
1461          case nir_intrinsic_image_load:
1462          case nir_intrinsic_image_store:
1463          case nir_intrinsic_image_atomic:
1464          case nir_intrinsic_image_atomic_swap:
1465          case nir_intrinsic_image_load_raw_intel:
1466          case nir_intrinsic_image_store_raw_intel:
1467             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1468             break;
1469 
1470          case nir_intrinsic_load_ubo:
1471             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1472             break;
1473 
1474          case nir_intrinsic_store_ssbo:
1475             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1476             break;
1477 
1478          case nir_intrinsic_get_ssbo_size:
1479          case nir_intrinsic_ssbo_atomic:
1480          case nir_intrinsic_ssbo_atomic_swap:
1481          case nir_intrinsic_load_ssbo:
1482             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1483             break;
1484 
1485          default:
1486             break;
1487          }
1488       }
1489    }
1490 
1491    /* When disable we just mark everything as used. */
1492    if (unlikely(skip_compacting_binding_tables())) {
1493       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1494          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1495    }
1496 
1497    /* Calculate the offsets and the binding table size based on the used
1498     * surfaces.  After this point, the functions to go between "group indices"
1499     * and binding table indices can be used.
1500     */
1501    uint32_t next = 0;
1502    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1503       if (bt->used_mask[i] != 0) {
1504          bt->offsets[i] = next;
1505          next += util_bitcount64(bt->used_mask[i]);
1506       }
1507    }
1508    bt->size_bytes = next * 4;
1509 
1510    if (INTEL_DEBUG(DEBUG_BT)) {
1511       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1512    }
1513 
1514    /* Apply the binding table indices.  The backend compiler is not expected
1515     * to change those, as we haven't set any of the *_start entries in brw
1516     * binding_table.
1517     */
1518    nir_builder b = nir_builder_create(impl);
1519 
1520    nir_foreach_block (block, impl) {
1521       nir_foreach_instr (instr, block) {
1522          if (instr->type == nir_instr_type_tex) {
1523             nir_tex_instr *tex = nir_instr_as_tex(instr);
1524             if (tex->texture_index < 64) {
1525                tex->texture_index =
1526                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1527                                           tex->texture_index);
1528             } else {
1529                tex->texture_index =
1530                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1531                                           tex->texture_index - 64);
1532             }
1533             continue;
1534          }
1535 
1536          if (instr->type != nir_instr_type_intrinsic)
1537             continue;
1538 
1539          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1540          switch (intrin->intrinsic) {
1541          case nir_intrinsic_image_size:
1542          case nir_intrinsic_image_load:
1543          case nir_intrinsic_image_store:
1544          case nir_intrinsic_image_atomic:
1545          case nir_intrinsic_image_atomic_swap:
1546          case nir_intrinsic_image_load_raw_intel:
1547          case nir_intrinsic_image_store_raw_intel:
1548             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1549                                  IRIS_SURFACE_GROUP_IMAGE);
1550             break;
1551 
1552          case nir_intrinsic_load_ubo:
1553             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1554                                  IRIS_SURFACE_GROUP_UBO);
1555             break;
1556 
1557          case nir_intrinsic_store_ssbo:
1558             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1559                                  IRIS_SURFACE_GROUP_SSBO);
1560             break;
1561 
1562          case nir_intrinsic_load_output:
1563             if (devinfo->ver == 8) {
1564                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1565                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1566             }
1567             break;
1568 
1569          case nir_intrinsic_get_ssbo_size:
1570          case nir_intrinsic_ssbo_atomic:
1571          case nir_intrinsic_ssbo_atomic_swap:
1572          case nir_intrinsic_load_ssbo:
1573             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1574                                  IRIS_SURFACE_GROUP_SSBO);
1575             break;
1576 
1577          default:
1578             break;
1579          }
1580       }
1581    }
1582 }
1583 
1584 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1585 iris_debug_recompile_brw(struct iris_screen *screen,
1586                          struct util_debug_callback *dbg,
1587                          struct iris_uncompiled_shader *ish,
1588                          const struct brw_base_prog_key *key)
1589 {
1590    if (!ish || list_is_empty(&ish->variants)
1591             || list_is_singular(&ish->variants))
1592       return;
1593 
1594    const struct brw_compiler *c = screen->brw;
1595    const struct shader_info *info = &ish->nir->info;
1596 
1597    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1598                        _mesa_shader_stage_to_string(info->stage),
1599                        info->name ? info->name : "(no identifier)",
1600                        info->label ? info->label : "");
1601 
1602    struct iris_compiled_shader *shader =
1603       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1604    const void *old_iris_key = &shader->key;
1605 
1606    union brw_any_prog_key old_key;
1607 
1608    switch (info->stage) {
1609    case MESA_SHADER_VERTEX:
1610       old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1611       break;
1612    case MESA_SHADER_TESS_CTRL:
1613       old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1614       break;
1615    case MESA_SHADER_TESS_EVAL:
1616       old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1617       break;
1618    case MESA_SHADER_GEOMETRY:
1619       old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1620       break;
1621    case MESA_SHADER_FRAGMENT:
1622       old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1623       break;
1624    case MESA_SHADER_COMPUTE:
1625       old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1626       break;
1627    default:
1628       unreachable("invalid shader stage");
1629    }
1630 
1631    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1632 }
1633 
1634 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1635 iris_debug_recompile_elk(struct iris_screen *screen,
1636                          struct util_debug_callback *dbg,
1637                          struct iris_uncompiled_shader *ish,
1638                          const struct elk_base_prog_key *key)
1639 {
1640    if (!ish || list_is_empty(&ish->variants)
1641             || list_is_singular(&ish->variants))
1642       return;
1643 
1644    const struct elk_compiler *c = screen->elk;
1645    const struct shader_info *info = &ish->nir->info;
1646 
1647    elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1648                        _mesa_shader_stage_to_string(info->stage),
1649                        info->name ? info->name : "(no identifier)",
1650                        info->label ? info->label : "");
1651 
1652    struct iris_compiled_shader *shader =
1653       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1654    const void *old_iris_key = &shader->key;
1655 
1656    union elk_any_prog_key old_key;
1657 
1658    switch (info->stage) {
1659    case MESA_SHADER_VERTEX:
1660       old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1661       break;
1662    case MESA_SHADER_TESS_CTRL:
1663       old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1664       break;
1665    case MESA_SHADER_TESS_EVAL:
1666       old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1667       break;
1668    case MESA_SHADER_GEOMETRY:
1669       old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1670       break;
1671    case MESA_SHADER_FRAGMENT:
1672       old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1673       break;
1674    case MESA_SHADER_COMPUTE:
1675       old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1676       break;
1677    default:
1678       unreachable("invalid shader stage");
1679    }
1680 
1681    elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1682 }
1683 
1684 
1685 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1686 check_urb_size(struct iris_context *ice,
1687                unsigned needed_size,
1688                gl_shader_stage stage)
1689 {
1690    unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1691 
1692    /* If the last URB allocation wasn't large enough for our needs,
1693     * flag it as needing to be reconfigured.  Otherwise, we can use
1694     * the existing config.  However, if the URB is constrained, and
1695     * we can shrink our size for this stage, we may be able to gain
1696     * extra concurrency by reconfiguring it to be smaller.  Do so.
1697     */
1698    if (last_allocated_size < needed_size ||
1699        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1700       ice->state.dirty |= IRIS_DIRTY_URB;
1701    }
1702 }
1703 
1704 /**
1705  * Get the shader for the last enabled geometry stage.
1706  *
1707  * This stage is the one which will feed stream output and the rasterizer.
1708  */
1709 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1710 last_vue_stage(struct iris_context *ice)
1711 {
1712    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1713       return MESA_SHADER_GEOMETRY;
1714 
1715    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1716       return MESA_SHADER_TESS_EVAL;
1717 
1718    return MESA_SHADER_VERTEX;
1719 }
1720 
1721 /**
1722  * \param added  Set to \c true if the variant was added to the list (i.e., a
1723  *               variant matching \c key was not found).  Set to \c false
1724  *               otherwise.
1725  */
1726 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1727 find_or_add_variant(const struct iris_screen *screen,
1728                     struct iris_uncompiled_shader *ish,
1729                     enum iris_program_cache_id cache_id,
1730                     const void *key, unsigned key_size,
1731                     bool *added)
1732 {
1733    struct list_head *start = ish->variants.next;
1734 
1735    *added = false;
1736 
1737    if (screen->precompile) {
1738       /* Check the first list entry.  There will always be at least one
1739        * variant in the list (most likely the precompile variant), and
1740        * other contexts only append new variants, so we can safely check
1741        * it without locking, saving that cost in the common case.
1742        */
1743       struct iris_compiled_shader *first =
1744          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1745 
1746       if (memcmp(&first->key, key, key_size) == 0) {
1747          util_queue_fence_wait(&first->ready);
1748          return first;
1749       }
1750 
1751       /* Skip this one in the loop below */
1752       start = first->link.next;
1753    }
1754 
1755    struct iris_compiled_shader *variant = NULL;
1756 
1757    /* If it doesn't match, we have to walk the list; other contexts may be
1758     * concurrently appending shaders to it, so we need to lock here.
1759     */
1760    simple_mtx_lock(&ish->lock);
1761 
1762    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1763                             &ish->variants, link) {
1764       if (memcmp(&v->key, key, key_size) == 0) {
1765          variant = v;
1766          break;
1767       }
1768    }
1769 
1770    gl_shader_stage stage = ish->nir->info.stage;
1771 
1772    if (variant == NULL) {
1773       variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1774                                            key_size, key);
1775 
1776       /* Append our new variant to the shader's variant list. */
1777       list_addtail(&variant->link, &ish->variants);
1778       *added = true;
1779 
1780       simple_mtx_unlock(&ish->lock);
1781    } else {
1782       simple_mtx_unlock(&ish->lock);
1783 
1784       util_queue_fence_wait(&variant->ready);
1785    }
1786 
1787    assert(stage == variant->stage);
1788    return variant;
1789 }
1790 
1791 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1792 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1793                                  UNUSED int thread_index)
1794 {
1795    free(_job);
1796 }
1797 
1798 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1799 iris_schedule_compile(struct iris_screen *screen,
1800                       struct util_queue_fence *ready_fence,
1801                       struct util_debug_callback *dbg,
1802                       struct iris_threaded_compile_job *job,
1803                       util_queue_execute_func execute)
1804 
1805 {
1806    struct util_async_debug_callback async_debug;
1807 
1808    if (dbg) {
1809       u_async_debug_init(&async_debug);
1810       job->dbg = &async_debug.base;
1811    }
1812 
1813    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1814                       iris_threaded_compile_job_delete, 0);
1815 
1816    if (screen->driconf.sync_compile || dbg)
1817       util_queue_fence_wait(ready_fence);
1818 
1819    if (dbg) {
1820       u_async_debug_drain(&async_debug, dbg);
1821       u_async_debug_cleanup(&async_debug);
1822    }
1823 }
1824 
1825 /**
1826  * Compile a vertex shader, and upload the assembly.
1827  */
1828 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1829 iris_compile_vs(struct iris_screen *screen,
1830                 struct u_upload_mgr *uploader,
1831                 struct util_debug_callback *dbg,
1832                 struct iris_uncompiled_shader *ish,
1833                 struct iris_compiled_shader *shader)
1834 {
1835    const struct intel_device_info *devinfo = screen->devinfo;
1836    void *mem_ctx = ralloc_context(NULL);
1837    uint32_t *system_values;
1838    unsigned num_system_values;
1839    unsigned num_cbufs;
1840 
1841    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1842    const struct iris_vs_prog_key *const key = &shader->key.vs;
1843 
1844    if (key->vue.nr_userclip_plane_consts) {
1845       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1846       /* Check if variables were found. */
1847       if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1848                             true, false, NULL)) {
1849          nir_lower_io_to_temporaries(nir, impl, true, false);
1850          nir_lower_global_vars_to_local(nir);
1851          nir_lower_vars_to_ssa(nir);
1852          nir_shader_gather_info(nir, impl);
1853       }
1854    }
1855 
1856    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1857                        &num_system_values, &num_cbufs);
1858 
1859    struct iris_binding_table bt;
1860    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1861                             num_system_values, num_cbufs);
1862 
1863    const char *error;
1864    const unsigned *program;
1865    if (screen->brw) {
1866       struct brw_vs_prog_data *brw_prog_data =
1867          rzalloc(mem_ctx, struct brw_vs_prog_data);
1868 
1869       brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1870 
1871       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1872 
1873       brw_compute_vue_map(devinfo,
1874                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
1875                           nir->info.separate_shader, /* pos_slots */ 1);
1876 
1877       struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1878 
1879       struct brw_compile_vs_params params = {
1880          .base = {
1881             .mem_ctx = mem_ctx,
1882             .nir = nir,
1883             .log_data = dbg,
1884             .source_hash = ish->source_hash,
1885          },
1886          .key = &brw_key,
1887          .prog_data = brw_prog_data,
1888       };
1889 
1890       program = brw_compile_vs(screen->brw, &params);
1891       error = params.base.error_str;
1892       if (program) {
1893          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1894          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1895       }
1896    } else {
1897       struct elk_vs_prog_data *elk_prog_data =
1898          rzalloc(mem_ctx, struct elk_vs_prog_data);
1899 
1900       elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1901 
1902       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1903 
1904       elk_compute_vue_map(devinfo,
1905                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
1906                           nir->info.separate_shader, /* pos_slots */ 1);
1907 
1908       struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1909 
1910       struct elk_compile_vs_params params = {
1911          .base = {
1912             .mem_ctx = mem_ctx,
1913             .nir = nir,
1914             .log_data = dbg,
1915             .source_hash = ish->source_hash,
1916          },
1917          .key = &elk_key,
1918          .prog_data = elk_prog_data,
1919       };
1920 
1921       program = elk_compile_vs(screen->elk, &params);
1922       error = params.base.error_str;
1923       if (program) {
1924          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1925          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1926       }
1927    }
1928 
1929    if (program == NULL) {
1930       dbg_printf("Failed to compile vertex shader: %s\n", error);
1931       ralloc_free(mem_ctx);
1932 
1933       shader->compilation_failed = true;
1934       util_queue_fence_signal(&shader->ready);
1935 
1936       return;
1937    }
1938 
1939    shader->compilation_failed = false;
1940 
1941    uint32_t *so_decls =
1942       screen->vtbl.create_so_decl_list(&ish->stream_output,
1943                                        &iris_vue_data(shader)->vue_map);
1944 
1945    iris_finalize_program(shader, so_decls,
1946                          system_values, num_system_values, 0, num_cbufs, &bt);
1947 
1948    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1949                       sizeof(*key), key, program);
1950 
1951    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1952 
1953    ralloc_free(mem_ctx);
1954 }
1955 
1956 /**
1957  * Update the current vertex shader variant.
1958  *
1959  * Fill out the key, look in the cache, compile and bind if needed.
1960  */
1961 static void
iris_update_compiled_vs(struct iris_context * ice)1962 iris_update_compiled_vs(struct iris_context *ice)
1963 {
1964    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1965    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1966    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1967    struct iris_uncompiled_shader *ish =
1968       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1969 
1970    struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1971    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1972 
1973    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1974    bool added;
1975    struct iris_compiled_shader *shader =
1976       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1977 
1978    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1979                                           &key, sizeof(key))) {
1980       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1981    }
1982 
1983    if (shader->compilation_failed)
1984       shader = NULL;
1985 
1986    if (old != shader) {
1987       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1988                                     shader);
1989       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1990       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1991                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
1992                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1993       shs->sysvals_need_upload = true;
1994 
1995       unsigned urb_entry_size = shader ?
1996          iris_vue_data(shader)->urb_entry_size : 0;
1997       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1998    }
1999 }
2000 
2001 /**
2002  * Get the shader_info for a given stage, or NULL if the stage is disabled.
2003  */
2004 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2005 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2006 {
2007    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2008 
2009    if (!ish)
2010       return NULL;
2011 
2012    const nir_shader *nir = ish->nir;
2013    return &nir->info;
2014 }
2015 
2016 /**
2017  * Get the union of TCS output and TES input slots.
2018  *
2019  * TCS and TES need to agree on a common URB entry layout.  In particular,
2020  * the data for all patch vertices is stored in a single URB entry (unlike
2021  * GS which has one entry per input vertex).  This means that per-vertex
2022  * array indexing needs a stride.
2023  *
2024  * SSO requires locations to match, but doesn't require the number of
2025  * outputs/inputs to match (in fact, the TCS often has extra outputs).
2026  * So, we need to take the extra step of unifying these on the fly.
2027  */
2028 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2029 get_unified_tess_slots(const struct iris_context *ice,
2030                        uint64_t *per_vertex_slots,
2031                        uint32_t *per_patch_slots)
2032 {
2033    const struct shader_info *tcs =
2034       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2035    const struct shader_info *tes =
2036       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2037 
2038    *per_vertex_slots = tes->inputs_read;
2039    *per_patch_slots = tes->patch_inputs_read;
2040 
2041    if (tcs) {
2042       *per_vertex_slots |= tcs->outputs_written;
2043       *per_patch_slots |= tcs->patch_outputs_written;
2044    }
2045 }
2046 
2047 /**
2048  * Compile a tessellation control shader, and upload the assembly.
2049  */
2050 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2051 iris_compile_tcs(struct iris_screen *screen,
2052                  struct hash_table *passthrough_ht,
2053                  struct u_upload_mgr *uploader,
2054                  struct util_debug_callback *dbg,
2055                  struct iris_uncompiled_shader *ish,
2056                  struct iris_compiled_shader *shader)
2057 {
2058    void *mem_ctx = ralloc_context(NULL);
2059    const struct intel_device_info *devinfo = screen->devinfo;
2060    uint32_t *system_values = NULL;
2061    unsigned num_system_values = 0;
2062    unsigned num_cbufs = 0;
2063 
2064    nir_shader *nir;
2065 
2066    struct iris_binding_table bt;
2067 
2068    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2069    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2070    struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2071    uint32_t source_hash;
2072 
2073    if (ish) {
2074       nir = nir_shader_clone(mem_ctx, ish->nir);
2075       source_hash = ish->source_hash;
2076    } else {
2077       if (screen->brw) {
2078          nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2079       } else {
2080          assert(screen->elk);
2081          nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2082       }
2083       source_hash = *(uint32_t*)nir->info.source_blake3;
2084    }
2085 
2086    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2087                        &num_system_values, &num_cbufs);
2088    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2089                             num_system_values, num_cbufs);
2090 
2091    const char *error = NULL;
2092    const unsigned *program;
2093    if (screen->brw) {
2094       struct brw_tcs_prog_data *brw_prog_data =
2095          rzalloc(mem_ctx, struct brw_tcs_prog_data);
2096       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2097 
2098       struct brw_compile_tcs_params params = {
2099          .base = {
2100             .mem_ctx = mem_ctx,
2101             .nir = nir,
2102             .log_data = dbg,
2103             .source_hash = source_hash,
2104          },
2105          .key = &brw_key,
2106          .prog_data = brw_prog_data,
2107       };
2108 
2109       program = brw_compile_tcs(screen->brw, &params);
2110       error = params.base.error_str;
2111 
2112       if (program) {
2113          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2114          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2115       }
2116    } else {
2117       assert(screen->elk);
2118       struct elk_tcs_prog_data *elk_prog_data =
2119          rzalloc(mem_ctx, struct elk_tcs_prog_data);
2120       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2121 
2122       struct elk_compile_tcs_params params = {
2123          .base = {
2124             .mem_ctx = mem_ctx,
2125             .nir = nir,
2126             .log_data = dbg,
2127             .source_hash = source_hash,
2128          },
2129          .key = &elk_key,
2130          .prog_data = elk_prog_data,
2131       };
2132 
2133       program = elk_compile_tcs(screen->elk, &params);
2134       error = params.base.error_str;
2135 
2136       if (program) {
2137          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2138          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2139       }
2140    }
2141 
2142    if (program == NULL) {
2143       dbg_printf("Failed to compile control shader: %s\n", error);
2144       ralloc_free(mem_ctx);
2145 
2146       shader->compilation_failed = true;
2147       util_queue_fence_signal(&shader->ready);
2148 
2149       return;
2150    }
2151 
2152    shader->compilation_failed = false;
2153 
2154    iris_finalize_program(shader, NULL, system_values,
2155                          num_system_values, 0, num_cbufs, &bt);
2156 
2157    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2158                       IRIS_CACHE_TCS, sizeof(*key), key, program);
2159 
2160    if (ish)
2161       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2162 
2163    ralloc_free(mem_ctx);
2164 }
2165 
2166 /**
2167  * Update the current tessellation control shader variant.
2168  *
2169  * Fill out the key, look in the cache, compile and bind if needed.
2170  */
2171 static void
iris_update_compiled_tcs(struct iris_context * ice)2172 iris_update_compiled_tcs(struct iris_context *ice)
2173 {
2174    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2175    struct iris_uncompiled_shader *tcs =
2176       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2177    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2178    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2179    const struct intel_device_info *devinfo = screen->devinfo;
2180 
2181    const struct shader_info *tes_info =
2182       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2183    struct iris_tcs_prog_key key = {
2184       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2185       ._tes_primitive_mode = tes_info->tess._primitive_mode,
2186       .input_vertices =
2187          !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2188       .quads_workaround = devinfo->ver < 9 &&
2189                           tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2190                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
2191    };
2192    get_unified_tess_slots(ice, &key.outputs_written,
2193                           &key.patch_outputs_written);
2194    screen->vtbl.populate_tcs_key(ice, &key);
2195 
2196    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2197    struct iris_compiled_shader *shader;
2198    bool added = false;
2199 
2200    if (tcs != NULL) {
2201       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2202                                    sizeof(key), &added);
2203    } else {
2204       /* Look for and possibly create a passthrough TCS */
2205       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2206 
2207 
2208       if (shader == NULL) {
2209          shader = iris_create_shader_variant(screen, ice->shaders.cache,
2210                                              MESA_SHADER_TESS_CTRL,
2211                                              IRIS_CACHE_TCS, sizeof(key), &key);
2212          added = true;
2213       }
2214 
2215    }
2216 
2217    /* If the shader was not found in (whichever cache), call iris_compile_tcs
2218     * if either ish is NULL or the shader could not be found in the disk
2219     * cache.
2220     */
2221    if (added &&
2222        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2223                                                  &key, sizeof(key)))) {
2224       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2225                        shader);
2226    }
2227 
2228    if (shader->compilation_failed)
2229       shader = NULL;
2230 
2231    if (old != shader) {
2232       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2233                                     shader);
2234       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2235                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2236                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2237       shs->sysvals_need_upload = true;
2238 
2239       unsigned urb_entry_size = shader ?
2240          iris_vue_data(shader)->urb_entry_size : 0;
2241       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2242    }
2243 }
2244 
2245 /**
2246  * Compile a tessellation evaluation shader, and upload the assembly.
2247  */
2248 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2249 iris_compile_tes(struct iris_screen *screen,
2250                  struct u_upload_mgr *uploader,
2251                  struct util_debug_callback *dbg,
2252                  struct iris_uncompiled_shader *ish,
2253                  struct iris_compiled_shader *shader)
2254 {
2255    void *mem_ctx = ralloc_context(NULL);
2256    uint32_t *system_values;
2257    const struct intel_device_info *devinfo = screen->devinfo;
2258    unsigned num_system_values;
2259    unsigned num_cbufs;
2260 
2261    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2262    const struct iris_tes_prog_key *const key = &shader->key.tes;
2263 
2264    if (key->vue.nr_userclip_plane_consts) {
2265       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2266       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2267                         true, false, NULL);
2268       nir_lower_io_to_temporaries(nir, impl, true, false);
2269       nir_lower_global_vars_to_local(nir);
2270       nir_lower_vars_to_ssa(nir);
2271       nir_shader_gather_info(nir, impl);
2272    }
2273 
2274    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2275                        &num_system_values, &num_cbufs);
2276 
2277    struct iris_binding_table bt;
2278    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2279                             num_system_values, num_cbufs);
2280 
2281    const char *error;
2282    const unsigned *program;
2283 
2284    if (screen->brw) {
2285       struct brw_tes_prog_data *brw_prog_data =
2286          rzalloc(mem_ctx, struct brw_tes_prog_data);
2287 
2288       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2289 
2290       struct intel_vue_map input_vue_map;
2291       brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2292                                key->patch_inputs_read);
2293 
2294       struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2295 
2296       struct brw_compile_tes_params params = {
2297          .base = {
2298             .mem_ctx = mem_ctx,
2299             .nir = nir,
2300             .log_data = dbg,
2301             .source_hash = ish->source_hash,
2302          },
2303          .key = &brw_key,
2304          .prog_data = brw_prog_data,
2305          .input_vue_map = &input_vue_map,
2306       };
2307 
2308       program = brw_compile_tes(screen->brw, &params);
2309       error = params.base.error_str;
2310 
2311       if (program) {
2312          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2313          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2314       }
2315    } else {
2316       struct elk_tes_prog_data *elk_prog_data =
2317          rzalloc(mem_ctx, struct elk_tes_prog_data);
2318 
2319       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2320 
2321       struct intel_vue_map input_vue_map;
2322       elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2323                                key->patch_inputs_read);
2324 
2325       struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2326 
2327       struct elk_compile_tes_params params = {
2328          .base = {
2329             .mem_ctx = mem_ctx,
2330             .nir = nir,
2331             .log_data = dbg,
2332             .source_hash = ish->source_hash,
2333          },
2334          .key = &elk_key,
2335          .prog_data = elk_prog_data,
2336          .input_vue_map = &input_vue_map,
2337       };
2338 
2339       program = elk_compile_tes(screen->elk, &params);
2340       error = params.base.error_str;
2341 
2342       if (program) {
2343          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2344          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2345       }
2346    }
2347 
2348    if (program == NULL) {
2349       dbg_printf("Failed to compile evaluation shader: %s\n", error);
2350       ralloc_free(mem_ctx);
2351 
2352       shader->compilation_failed = true;
2353       util_queue_fence_signal(&shader->ready);
2354 
2355       return;
2356    }
2357 
2358    shader->compilation_failed = false;
2359 
2360    uint32_t *so_decls =
2361       screen->vtbl.create_so_decl_list(&ish->stream_output,
2362                                        &iris_vue_data(shader)->vue_map);
2363 
2364    iris_finalize_program(shader, so_decls, system_values,
2365                          num_system_values, 0, num_cbufs, &bt);
2366 
2367    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2368                       sizeof(*key), key, program);
2369 
2370    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2371 
2372    ralloc_free(mem_ctx);
2373 }
2374 
2375 /**
2376  * Update the current tessellation evaluation shader variant.
2377  *
2378  * Fill out the key, look in the cache, compile and bind if needed.
2379  */
2380 static void
iris_update_compiled_tes(struct iris_context * ice)2381 iris_update_compiled_tes(struct iris_context *ice)
2382 {
2383    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2384    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2385    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2386    struct iris_uncompiled_shader *ish =
2387       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2388 
2389    struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2390    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2391    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2392 
2393    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2394    bool added;
2395    struct iris_compiled_shader *shader =
2396       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2397 
2398    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2399                                           &key, sizeof(key))) {
2400       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2401    }
2402 
2403    if (shader->compilation_failed)
2404       shader = NULL;
2405 
2406    if (old != shader) {
2407       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2408                                     shader);
2409       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2410                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
2411                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2412       shs->sysvals_need_upload = true;
2413 
2414       unsigned urb_entry_size = shader ?
2415          iris_vue_data(shader)->urb_entry_size : 0;
2416       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2417    }
2418 
2419    /* TODO: Could compare and avoid flagging this. */
2420    const struct shader_info *tes_info = &ish->nir->info;
2421    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2422       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2423       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2424    }
2425 }
2426 
2427 /**
2428  * Compile a geometry shader, and upload the assembly.
2429  */
2430 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2431 iris_compile_gs(struct iris_screen *screen,
2432                 struct u_upload_mgr *uploader,
2433                 struct util_debug_callback *dbg,
2434                 struct iris_uncompiled_shader *ish,
2435                 struct iris_compiled_shader *shader)
2436 {
2437    const struct intel_device_info *devinfo = screen->devinfo;
2438    void *mem_ctx = ralloc_context(NULL);
2439    uint32_t *system_values;
2440    unsigned num_system_values;
2441    unsigned num_cbufs;
2442 
2443    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2444    const struct iris_gs_prog_key *const key = &shader->key.gs;
2445 
2446    if (key->vue.nr_userclip_plane_consts) {
2447       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2448       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2449                         false, NULL);
2450       nir_lower_io_to_temporaries(nir, impl, true, false);
2451       nir_lower_global_vars_to_local(nir);
2452       nir_lower_vars_to_ssa(nir);
2453       nir_shader_gather_info(nir, impl);
2454    }
2455 
2456    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2457                        &num_system_values, &num_cbufs);
2458 
2459    struct iris_binding_table bt;
2460    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2461                             num_system_values, num_cbufs);
2462 
2463    const char *error;
2464    const unsigned *program;
2465    if (screen->brw) {
2466       struct brw_gs_prog_data *brw_prog_data =
2467          rzalloc(mem_ctx, struct brw_gs_prog_data);
2468 
2469       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2470 
2471       brw_compute_vue_map(devinfo,
2472                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
2473                           nir->info.separate_shader, /* pos_slots */ 1);
2474 
2475       struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2476 
2477       struct brw_compile_gs_params params = {
2478          .base = {
2479             .mem_ctx = mem_ctx,
2480             .nir = nir,
2481             .log_data = dbg,
2482             .source_hash = ish->source_hash,
2483          },
2484          .key = &brw_key,
2485          .prog_data = brw_prog_data,
2486       };
2487 
2488       program = brw_compile_gs(screen->brw, &params);
2489       error = params.base.error_str;
2490       if (program) {
2491          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2492          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2493       }
2494    } else {
2495       struct elk_gs_prog_data *elk_prog_data =
2496          rzalloc(mem_ctx, struct elk_gs_prog_data);
2497 
2498       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2499 
2500       elk_compute_vue_map(devinfo,
2501                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
2502                           nir->info.separate_shader, /* pos_slots */ 1);
2503 
2504       struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2505 
2506       struct elk_compile_gs_params params = {
2507          .base = {
2508             .mem_ctx = mem_ctx,
2509             .nir = nir,
2510             .log_data = dbg,
2511             .source_hash = ish->source_hash,
2512          },
2513          .key = &elk_key,
2514          .prog_data = elk_prog_data,
2515       };
2516 
2517       program = elk_compile_gs(screen->elk, &params);
2518       error = params.base.error_str;
2519       if (program) {
2520          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2521          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2522       }
2523    }
2524 
2525    if (program == NULL) {
2526       dbg_printf("Failed to compile geometry shader: %s\n", error);
2527       ralloc_free(mem_ctx);
2528 
2529       shader->compilation_failed = true;
2530       util_queue_fence_signal(&shader->ready);
2531 
2532       return;
2533    }
2534 
2535    shader->compilation_failed = false;
2536 
2537    uint32_t *so_decls =
2538       screen->vtbl.create_so_decl_list(&ish->stream_output,
2539                                        &iris_vue_data(shader)->vue_map);
2540 
2541    iris_finalize_program(shader, so_decls, system_values,
2542                          num_system_values, 0, num_cbufs, &bt);
2543 
2544    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2545                       sizeof(*key), key, program);
2546 
2547    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2548 
2549    ralloc_free(mem_ctx);
2550 }
2551 
2552 /**
2553  * Update the current geometry shader variant.
2554  *
2555  * Fill out the key, look in the cache, compile and bind if needed.
2556  */
2557 static void
iris_update_compiled_gs(struct iris_context * ice)2558 iris_update_compiled_gs(struct iris_context *ice)
2559 {
2560    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2561    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2562    struct iris_uncompiled_shader *ish =
2563       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2564    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2565    struct iris_compiled_shader *shader = NULL;
2566    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2567 
2568    if (ish) {
2569       struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2570       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2571 
2572       bool added;
2573 
2574       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2575                                    sizeof(key), &added);
2576 
2577       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2578                                              &key, sizeof(key))) {
2579          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2580       }
2581 
2582       if (shader->compilation_failed)
2583          shader = NULL;
2584    }
2585 
2586    if (old != shader) {
2587       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2588                                     shader);
2589       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2590                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
2591                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2592       shs->sysvals_need_upload = true;
2593 
2594       unsigned urb_entry_size = shader ?
2595          iris_vue_data(shader)->urb_entry_size : 0;
2596       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2597    }
2598 }
2599 
2600 /**
2601  * Compile a fragment (pixel) shader, and upload the assembly.
2602  */
2603 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2604 iris_compile_fs(struct iris_screen *screen,
2605                 struct u_upload_mgr *uploader,
2606                 struct util_debug_callback *dbg,
2607                 struct iris_uncompiled_shader *ish,
2608                 struct iris_compiled_shader *shader,
2609                 struct intel_vue_map *vue_map)
2610 {
2611    void *mem_ctx = ralloc_context(NULL);
2612    uint32_t *system_values;
2613    const struct intel_device_info *devinfo = screen->devinfo;
2614    unsigned num_system_values;
2615    unsigned num_cbufs;
2616 
2617    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2618    const struct iris_fs_prog_key *const key = &shader->key.fs;
2619 
2620    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2621                        &num_system_values, &num_cbufs);
2622 
2623    /* Lower output variables to load_output intrinsics before setting up
2624     * binding tables, so iris_setup_binding_table can map any load_output
2625     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2626     * non-coherent framebuffer fetches.
2627     */
2628    brw_nir_lower_fs_outputs(nir);
2629 
2630    /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
2631     * and do not need a binding table entry with a null surface.  Earlier
2632     * generations need an entry for a null surface.
2633     */
2634    int null_rts = devinfo->ver < 11 ? 1 : 0;
2635 
2636    struct iris_binding_table bt;
2637    iris_setup_binding_table(devinfo, nir, &bt,
2638                             MAX2(key->nr_color_regions, null_rts),
2639                             num_system_values, num_cbufs);
2640 
2641    const char *error;
2642    const unsigned *program;
2643 
2644    if (screen->brw) {
2645       struct brw_wm_prog_data *brw_prog_data =
2646          rzalloc(mem_ctx, struct brw_wm_prog_data);
2647 
2648       brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2649 
2650       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2651 
2652       struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2653 
2654       struct brw_compile_fs_params params = {
2655          .base = {
2656             .mem_ctx = mem_ctx,
2657             .nir = nir,
2658             .log_data = dbg,
2659             .source_hash = ish->source_hash,
2660          },
2661          .key = &brw_key,
2662          .prog_data = brw_prog_data,
2663 
2664          .allow_spilling = true,
2665          .max_polygons = UCHAR_MAX,
2666          .vue_map = vue_map,
2667       };
2668 
2669       program = brw_compile_fs(screen->brw, &params);
2670       error = params.base.error_str;
2671       if (program) {
2672          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2673          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2674       }
2675    } else {
2676       struct elk_wm_prog_data *elk_prog_data =
2677          rzalloc(mem_ctx, struct elk_wm_prog_data);
2678 
2679       elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2680 
2681       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2682 
2683       struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2684 
2685       struct elk_compile_fs_params params = {
2686          .base = {
2687             .mem_ctx = mem_ctx,
2688             .nir = nir,
2689             .log_data = dbg,
2690             .source_hash = ish->source_hash,
2691          },
2692          .key = &elk_key,
2693          .prog_data = elk_prog_data,
2694 
2695          .allow_spilling = true,
2696          .max_polygons = UCHAR_MAX,
2697          .vue_map = vue_map,
2698       };
2699 
2700       program = elk_compile_fs(screen->elk, &params);
2701       error = params.base.error_str;
2702       if (program) {
2703          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2704          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2705       }
2706    }
2707 
2708    if (program == NULL) {
2709       dbg_printf("Failed to compile fragment shader: %s\n", error);
2710       ralloc_free(mem_ctx);
2711 
2712       shader->compilation_failed = true;
2713       util_queue_fence_signal(&shader->ready);
2714 
2715       return;
2716    }
2717 
2718    shader->compilation_failed = false;
2719 
2720    iris_finalize_program(shader, NULL, system_values,
2721                          num_system_values, 0, num_cbufs, &bt);
2722 
2723    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2724                       sizeof(*key), key, program);
2725 
2726    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2727 
2728    ralloc_free(mem_ctx);
2729 }
2730 
2731 /**
2732  * Update the current fragment shader variant.
2733  *
2734  * Fill out the key, look in the cache, compile and bind if needed.
2735  */
2736 static void
iris_update_compiled_fs(struct iris_context * ice)2737 iris_update_compiled_fs(struct iris_context *ice)
2738 {
2739    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2740    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2741    struct iris_uncompiled_shader *ish =
2742       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2743    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2744    struct iris_fs_prog_key key = { KEY_INIT(base) };
2745    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2746 
2747    struct intel_vue_map *last_vue_map =
2748       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2749 
2750    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2751       key.input_slots_valid = last_vue_map->slots_valid;
2752 
2753    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2754    bool added;
2755    struct iris_compiled_shader *shader =
2756       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2757                           sizeof(key), &added);
2758 
2759    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2760                                           &key, sizeof(key))) {
2761       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2762    }
2763 
2764    if (shader->compilation_failed)
2765       shader = NULL;
2766 
2767    if (old != shader) {
2768       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2769       // toggles.  might be able to avoid flagging SBE too.
2770       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2771                                     shader);
2772       ice->state.dirty |= IRIS_DIRTY_WM |
2773                           IRIS_DIRTY_CLIP |
2774                           IRIS_DIRTY_SBE;
2775       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2776                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2777                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2778       shs->sysvals_need_upload = true;
2779    }
2780 }
2781 
2782 /**
2783  * Update the last enabled stage's VUE map.
2784  *
2785  * When the shader feeding the rasterizer's output interface changes, we
2786  * need to re-emit various packets.
2787  */
2788 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2789 update_last_vue_map(struct iris_context *ice,
2790                     struct iris_compiled_shader *shader)
2791 {
2792    const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2793    const struct intel_vue_map *old_map =
2794       !ice->shaders.last_vue_shader ? NULL :
2795       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2796    const uint64_t changed_slots =
2797       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2798 
2799    if (changed_slots & VARYING_BIT_VIEWPORT) {
2800       ice->state.num_viewports =
2801          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2802       ice->state.dirty |= IRIS_DIRTY_CLIP |
2803                           IRIS_DIRTY_SF_CL_VIEWPORT |
2804                           IRIS_DIRTY_CC_VIEWPORT |
2805                           IRIS_DIRTY_SCISSOR_RECT;
2806       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2807          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2808    }
2809 
2810    if (changed_slots & VARYING_BIT_LAYER) {
2811       ice->state.dirty |= IRIS_DIRTY_CLIP;
2812    }
2813 
2814    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2815       ice->state.dirty |= IRIS_DIRTY_SBE;
2816    }
2817 
2818    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2819 }
2820 
2821 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2822 iris_update_pull_constant_descriptors(struct iris_context *ice,
2823                                       gl_shader_stage stage)
2824 {
2825    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2826 
2827    if (!shader || !shader->has_ubo_pull)
2828       return;
2829 
2830    struct iris_shader_state *shs = &ice->state.shaders[stage];
2831    bool any_new_descriptors =
2832       shader->num_system_values > 0 && shs->sysvals_need_upload;
2833 
2834    unsigned bound_cbufs = shs->bound_cbufs;
2835 
2836    while (bound_cbufs) {
2837       const int i = u_bit_scan(&bound_cbufs);
2838       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2839       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2840       if (!surf_state->res && cbuf->buffer) {
2841          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2842                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2843          any_new_descriptors = true;
2844       }
2845    }
2846 
2847    if (any_new_descriptors)
2848       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2849 }
2850 
2851 /**
2852  * Update the current shader variants for the given state.
2853  *
2854  * This should be called on every draw call to ensure that the correct
2855  * shaders are bound.  It will also flag any dirty state triggered by
2856  * swapping out those shaders.
2857  */
2858 void
iris_update_compiled_shaders(struct iris_context * ice)2859 iris_update_compiled_shaders(struct iris_context *ice)
2860 {
2861    const uint64_t stage_dirty = ice->state.stage_dirty;
2862 
2863    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2864                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2865        struct iris_uncompiled_shader *tes =
2866           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2867        if (tes) {
2868           iris_update_compiled_tcs(ice);
2869           iris_update_compiled_tes(ice);
2870        } else {
2871          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2872          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2873           ice->state.stage_dirty |=
2874              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2875              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2876              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2877 
2878           if (ice->shaders.urb.constrained)
2879              ice->state.dirty |= IRIS_DIRTY_URB;
2880        }
2881    }
2882 
2883    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2884       iris_update_compiled_vs(ice);
2885    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2886       iris_update_compiled_gs(ice);
2887 
2888    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2889                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2890       const struct iris_compiled_shader *gs =
2891          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2892       const struct iris_compiled_shader *tes =
2893          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2894 
2895       bool points_or_lines = false;
2896 
2897       if (gs) {
2898          const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2899          points_or_lines =
2900             gs_data->output_topology == _3DPRIM_POINTLIST ||
2901             gs_data->output_topology == _3DPRIM_LINESTRIP;
2902       } else if (tes) {
2903          const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2904          points_or_lines =
2905             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2906             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2907       }
2908 
2909       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2910          /* Outbound to XY Clip enables */
2911          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2912          ice->state.dirty |= IRIS_DIRTY_CLIP;
2913       }
2914    }
2915 
2916    gl_shader_stage last_stage = last_vue_stage(ice);
2917    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2918    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2919    update_last_vue_map(ice, shader);
2920    if (ice->state.streamout != shader->streamout) {
2921       ice->state.streamout = shader->streamout;
2922       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2923    }
2924 
2925    if (ice->state.streamout_active) {
2926       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2927          struct iris_stream_output_target *so =
2928             (void *) ice->state.so_target[i];
2929          if (so)
2930             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2931       }
2932    }
2933 
2934    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2935       iris_update_compiled_fs(ice);
2936 
2937    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2938       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2939          iris_update_pull_constant_descriptors(ice, i);
2940    }
2941 }
2942 
2943 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2944 iris_compile_cs(struct iris_screen *screen,
2945                 struct u_upload_mgr *uploader,
2946                 struct util_debug_callback *dbg,
2947                 struct iris_uncompiled_shader *ish,
2948                 struct iris_compiled_shader *shader)
2949 {
2950    void *mem_ctx = ralloc_context(NULL);
2951    uint32_t *system_values;
2952    const struct intel_device_info *devinfo = screen->devinfo;
2953    unsigned num_system_values;
2954    unsigned num_cbufs;
2955 
2956    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2957    const struct iris_cs_prog_key *const key = &shader->key.cs;
2958 
2959    if (screen->brw)
2960       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2961    else
2962       NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2963 
2964    iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2965                        &system_values, &num_system_values, &num_cbufs);
2966 
2967    struct iris_binding_table bt;
2968    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2969                             num_system_values, num_cbufs);
2970 
2971    const char *error;
2972    const unsigned *program;
2973 
2974    if (screen->brw) {
2975       struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2976 
2977       struct brw_cs_prog_data *brw_prog_data =
2978          rzalloc(mem_ctx, struct brw_cs_prog_data);
2979 
2980       struct brw_compile_cs_params params = {
2981          .base = {
2982             .mem_ctx = mem_ctx,
2983             .nir = nir,
2984             .log_data = dbg,
2985             .source_hash = ish->source_hash,
2986          },
2987          .key = &brw_key,
2988          .prog_data = brw_prog_data,
2989       };
2990 
2991       program = brw_compile_cs(screen->brw, &params);
2992       error = params.base.error_str;
2993       if (program) {
2994          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2995          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2996       }
2997    } else {
2998       struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
2999 
3000       struct elk_cs_prog_data *elk_prog_data =
3001          rzalloc(mem_ctx, struct elk_cs_prog_data);
3002 
3003       struct elk_compile_cs_params params = {
3004          .base = {
3005             .mem_ctx = mem_ctx,
3006             .nir = nir,
3007             .log_data = dbg,
3008             .source_hash = ish->source_hash,
3009          },
3010          .key = &elk_key,
3011          .prog_data = elk_prog_data,
3012       };
3013 
3014       program = elk_compile_cs(screen->elk, &params);
3015       error = params.base.error_str;
3016       if (program) {
3017          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3018          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3019       }
3020    }
3021 
3022    if (program == NULL) {
3023       dbg_printf("Failed to compile compute shader: %s\n", error);
3024 
3025       shader->compilation_failed = true;
3026       util_queue_fence_signal(&shader->ready);
3027 
3028       return;
3029    }
3030 
3031    shader->compilation_failed = false;
3032 
3033    iris_finalize_program(shader, NULL, system_values,
3034                          num_system_values, ish->kernel_input_size, num_cbufs,
3035                          &bt);
3036 
3037    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3038                       sizeof(*key), key, program);
3039 
3040    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3041 
3042    ralloc_free(mem_ctx);
3043 }
3044 
3045 static void
iris_update_compiled_cs(struct iris_context * ice)3046 iris_update_compiled_cs(struct iris_context *ice)
3047 {
3048    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3049    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3050    struct iris_uncompiled_shader *ish =
3051       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3052    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3053    struct iris_cs_prog_key key = { KEY_INIT(base) };
3054    screen->vtbl.populate_cs_key(ice, &key);
3055 
3056    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3057    bool added;
3058    struct iris_compiled_shader *shader =
3059       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3060                           sizeof(key), &added);
3061 
3062    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3063                                           &key, sizeof(key))) {
3064       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3065    }
3066 
3067    if (shader->compilation_failed)
3068       shader = NULL;
3069 
3070    if (old != shader) {
3071       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3072                                     shader);
3073       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3074                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
3075                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3076       shs->sysvals_need_upload = true;
3077    }
3078 }
3079 
3080 void
iris_update_compiled_compute_shader(struct iris_context * ice)3081 iris_update_compiled_compute_shader(struct iris_context *ice)
3082 {
3083    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3084       iris_update_compiled_cs(ice);
3085 
3086    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3087       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3088 }
3089 
3090 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3091 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3092                                struct iris_compiled_shader *shader,
3093                                unsigned threads,
3094                                uint32_t *dst)
3095 {
3096    struct iris_cs_data *cs_data = iris_cs_data(shader);
3097    assert(iris_cs_push_const_total_size(shader, threads) > 0);
3098    assert(cs_data->push.cross_thread.size == 0);
3099    assert(cs_data->push.per_thread.dwords == 1);
3100    assert(cs_data->first_param_is_builtin_subgroup_id);
3101    for (unsigned t = 0; t < threads; t++)
3102       dst[8 * t] = t;
3103 }
3104 
3105 /**
3106  * Allocate scratch BOs as needed for the given per-thread size and stage.
3107  */
3108 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3109 iris_get_scratch_space(struct iris_context *ice,
3110                        unsigned per_thread_scratch,
3111                        gl_shader_stage stage)
3112 {
3113    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3114    struct iris_bufmgr *bufmgr = screen->bufmgr;
3115    const struct intel_device_info *devinfo = screen->devinfo;
3116 
3117    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3118    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3119    assert(per_thread_scratch == 1 << (encoded_size + 10));
3120 
3121    /* On GFX version 12.5, scratch access changed to a surface-based model.
3122     * Instead of each shader type having its own layout based on IDs passed
3123     * from the relevant fixed-function unit, all scratch access is based on
3124     * thread IDs like it always has been for compute.
3125     */
3126    if (devinfo->verx10 >= 125)
3127       stage = MESA_SHADER_COMPUTE;
3128 
3129    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3130 
3131    if (!*bop) {
3132       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3133       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3134       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3135                            IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3136    }
3137 
3138    return *bop;
3139 }
3140 
3141 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3142 iris_get_scratch_surf(struct iris_context *ice,
3143                       unsigned per_thread_scratch)
3144 {
3145    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3146    ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3147 
3148    assert(devinfo->verx10 >= 125);
3149 
3150    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3151    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3152    assert(per_thread_scratch == 1 << (encoded_size + 10));
3153 
3154    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3155 
3156    if (ref->res)
3157       return ref;
3158 
3159    struct iris_bo *scratch_bo =
3160       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3161 
3162    void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3163                             screen->isl_dev.ss.size, 64);
3164 
3165    isl_buffer_fill_state(&screen->isl_dev, map,
3166                          .address = scratch_bo->address,
3167                          .size_B = scratch_bo->size,
3168                          .format = ISL_FORMAT_RAW,
3169                          .swizzle = ISL_SWIZZLE_IDENTITY,
3170                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3171                          .stride_B = per_thread_scratch,
3172                          .is_scratch = true);
3173 
3174    return ref;
3175 }
3176 
3177 /* ------------------------------------------------------------------- */
3178 
3179 /**
3180  * The pipe->create_[stage]_state() driver hooks.
3181  *
3182  * Performs basic NIR preprocessing, records any state dependencies, and
3183  * returns an iris_uncompiled_shader as the Gallium CSO.
3184  *
3185  * Actual shader compilation to assembly happens later, at first use.
3186  */
3187 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3188 iris_create_uncompiled_shader(struct iris_screen *screen,
3189                               nir_shader *nir,
3190                               const struct pipe_stream_output_info *so_info)
3191 {
3192    struct iris_uncompiled_shader *ish =
3193       calloc(1, sizeof(struct iris_uncompiled_shader));
3194    if (!ish)
3195       return NULL;
3196 
3197    pipe_reference_init(&ish->ref, 1);
3198    list_inithead(&ish->variants);
3199    simple_mtx_init(&ish->lock, mtx_plain);
3200    util_queue_fence_init(&ish->ready);
3201 
3202    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3203 
3204    ish->program_id = get_new_program_id(screen);
3205    ish->nir = nir;
3206    if (so_info) {
3207       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3208       update_so_info(&ish->stream_output, nir->info.outputs_written);
3209    }
3210 
3211    /* Use lowest dword of source shader blake3 for shader hash. */
3212    ish->source_hash = *(uint32_t*)nir->info.source_blake3;
3213 
3214    if (screen->disk_cache) {
3215       /* Serialize the NIR to a binary blob that we can hash for the disk
3216        * cache.  Drop unnecessary information (like variable names)
3217        * so the serialized NIR is smaller, and also to let us detect more
3218        * isomorphic shaders when hashing, increasing cache hits.
3219        */
3220       struct blob blob;
3221       blob_init(&blob);
3222       nir_serialize(&blob, nir, true);
3223       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3224       blob_finish(&blob);
3225    }
3226 
3227    return ish;
3228 }
3229 
3230 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3231 iris_create_compute_state(struct pipe_context *ctx,
3232                           const struct pipe_compute_state *state)
3233 {
3234    struct iris_context *ice = (void *) ctx;
3235    struct iris_screen *screen = (void *) ctx->screen;
3236    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3237    const nir_shader_compiler_options *options =
3238       screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
3239                   : screen->elk->nir_options[MESA_SHADER_COMPUTE];
3240 
3241    nir_shader *nir;
3242    switch (state->ir_type) {
3243    case PIPE_SHADER_IR_NIR:
3244       nir = (void *)state->prog;
3245       break;
3246 
3247    case PIPE_SHADER_IR_NIR_SERIALIZED: {
3248       struct blob_reader reader;
3249       const struct pipe_binary_program_header *hdr = state->prog;
3250       blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
3251       nir = nir_deserialize(NULL, options, &reader);
3252       break;
3253    }
3254 
3255    default:
3256       unreachable("Unsupported IR");
3257    }
3258 
3259    /* Most of iris doesn't really care about the difference between compute
3260     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
3261     * it's way easier if we just normalize to COMPUTE here.
3262     */
3263    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3264           nir->info.stage == MESA_SHADER_KERNEL);
3265    nir->info.stage = MESA_SHADER_COMPUTE;
3266 
3267    struct iris_uncompiled_shader *ish =
3268       iris_create_uncompiled_shader(screen, nir, NULL);
3269    ish->kernel_input_size = state->req_input_mem;
3270    ish->kernel_shared_size = state->static_shared_mem;
3271 
3272    // XXX: disallow more than 64KB of shared variables
3273 
3274    if (screen->precompile) {
3275       struct iris_cs_prog_key key = { KEY_INIT(base) };
3276 
3277       struct iris_compiled_shader *shader =
3278          iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3279                                     IRIS_CACHE_CS, sizeof(key), &key);
3280 
3281       /* Append our new variant to the shader's variant list. */
3282       list_addtail(&shader->link, &ish->variants);
3283 
3284       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3285                                     &key, sizeof(key))) {
3286          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3287       }
3288    }
3289 
3290    return ish;
3291 }
3292 
3293 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3294 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3295                             struct pipe_compute_state_object_info *info)
3296 {
3297    struct iris_screen *screen = (void *) ctx->screen;
3298    struct iris_uncompiled_shader *ish = state;
3299 
3300    info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3301    info->private_memory = 0;
3302    info->preferred_simd_size = 32;
3303    info->simd_sizes = 8 | 16 | 32;
3304 
3305    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3306                             &ish->variants, link) {
3307       info->private_memory = MAX2(info->private_memory,
3308                                   shader->total_scratch);
3309    }
3310 }
3311 
3312 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3313 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3314                                      const uint32_t block[3])
3315 {
3316    struct iris_context *ice = (void *) ctx;
3317    struct iris_screen *screen = (void *) ctx->screen;
3318    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3319    struct iris_uncompiled_shader *ish = state;
3320 
3321    struct iris_cs_prog_key key = { KEY_INIT(base) };
3322    screen->vtbl.populate_cs_key(ice, &key);
3323 
3324    bool added;
3325    struct iris_compiled_shader *shader =
3326       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3327                           sizeof(key), &added);
3328 
3329    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3330                                           &key, sizeof(key))) {
3331       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3332    }
3333 
3334    return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3335 }
3336 
3337 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3338 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3339 {
3340    const struct iris_threaded_compile_job *job =
3341       (struct iris_threaded_compile_job *) _job;
3342 
3343    struct iris_screen *screen = job->screen;
3344    struct u_upload_mgr *uploader = job->uploader;
3345    struct util_debug_callback *dbg = job->dbg;
3346    struct iris_uncompiled_shader *ish = job->ish;
3347    struct iris_compiled_shader *shader = job->shader;
3348 
3349    switch (ish->nir->info.stage) {
3350    case MESA_SHADER_VERTEX:
3351       iris_compile_vs(screen, uploader, dbg, ish, shader);
3352       break;
3353    case MESA_SHADER_TESS_CTRL:
3354       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3355       break;
3356    case MESA_SHADER_TESS_EVAL:
3357       iris_compile_tes(screen, uploader, dbg, ish, shader);
3358       break;
3359    case MESA_SHADER_GEOMETRY:
3360       iris_compile_gs(screen, uploader, dbg, ish, shader);
3361       break;
3362    case MESA_SHADER_FRAGMENT:
3363       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3364       break;
3365 
3366    default:
3367       unreachable("Invalid shader stage.");
3368    }
3369 }
3370 
3371 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3372 iris_create_shader_state(struct pipe_context *ctx,
3373                          const struct pipe_shader_state *state)
3374 {
3375    struct iris_context *ice = (void *) ctx;
3376    struct iris_screen *screen = (void *) ctx->screen;
3377    struct nir_shader *nir;
3378 
3379    if (state->type == PIPE_SHADER_IR_TGSI)
3380       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3381    else
3382       nir = state->ir.nir;
3383 
3384    const struct shader_info *const info = &nir->info;
3385    struct iris_uncompiled_shader *ish =
3386       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3387 
3388    union iris_any_prog_key key;
3389    unsigned key_size = 0;
3390 
3391    memset(&key, 0, sizeof(key));
3392 
3393    switch (info->stage) {
3394    case MESA_SHADER_VERTEX:
3395       /* User clip planes */
3396       if (info->clip_distance_array_size == 0)
3397          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3398 
3399       key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3400       key_size = sizeof(key.vs);
3401       break;
3402 
3403    case MESA_SHADER_TESS_CTRL: {
3404       key.tcs = (struct iris_tcs_prog_key) {
3405          KEY_INIT(vue.base),
3406          // XXX: make sure the linker fills this out from the TES...
3407          ._tes_primitive_mode =
3408          info->tess._primitive_mode ? info->tess._primitive_mode
3409                                    : TESS_PRIMITIVE_TRIANGLES,
3410          .outputs_written = info->outputs_written,
3411          .patch_outputs_written = info->patch_outputs_written,
3412       };
3413 
3414       /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3415        * We don't have that information, so we randomly guess that the input
3416        * and output patches are the same size.  This is a bad guess, but we
3417        * can't do much better.
3418        */
3419       if (iris_use_tcs_multi_patch(screen))
3420          key.tcs.input_vertices = info->tess.tcs_vertices_out;
3421 
3422       key_size = sizeof(key.tcs);
3423       break;
3424    }
3425 
3426    case MESA_SHADER_TESS_EVAL:
3427       /* User clip planes */
3428       if (info->clip_distance_array_size == 0)
3429          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3430 
3431       key.tes = (struct iris_tes_prog_key) {
3432          KEY_INIT(vue.base),
3433          // XXX: not ideal, need TCS output/TES input unification
3434          .inputs_read = info->inputs_read,
3435          .patch_inputs_read = info->patch_inputs_read,
3436       };
3437 
3438       key_size = sizeof(key.tes);
3439       break;
3440 
3441    case MESA_SHADER_GEOMETRY:
3442       /* User clip planes */
3443       if (info->clip_distance_array_size == 0)
3444          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3445 
3446       key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3447       key_size = sizeof(key.gs);
3448       break;
3449 
3450    case MESA_SHADER_FRAGMENT:
3451       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3452                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3453                   (1ull << IRIS_NOS_RASTERIZER) |
3454                   (1ull << IRIS_NOS_BLEND);
3455 
3456       STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3457 
3458       /* The program key needs the VUE map if there are > 16 inputs */
3459       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3460          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3461       }
3462 
3463       const uint64_t color_outputs = info->outputs_written &
3464          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3465            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3466            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3467 
3468       bool can_rearrange_varyings =
3469          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3470 
3471       const struct intel_device_info *devinfo = screen->devinfo;
3472 
3473       key.fs = (struct iris_fs_prog_key) {
3474          KEY_INIT(base),
3475          .nr_color_regions = util_bitcount(color_outputs),
3476          .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
3477          .input_slots_valid =
3478             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3479       };
3480 
3481       key_size = sizeof(key.fs);
3482       break;
3483 
3484    default:
3485       unreachable("Invalid shader stage.");
3486    }
3487 
3488    if (screen->precompile) {
3489       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3490 
3491       struct iris_compiled_shader *shader =
3492          iris_create_shader_variant(screen, NULL, info->stage,
3493                                     (enum iris_program_cache_id) info->stage,
3494                                     key_size, &key);
3495 
3496       /* Append our new variant to the shader's variant list. */
3497       list_addtail(&shader->link, &ish->variants);
3498 
3499       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3500                                     &key, key_size)) {
3501          assert(!util_queue_fence_is_signalled(&shader->ready));
3502 
3503          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3504 
3505          job->screen = screen;
3506          job->uploader = uploader;
3507          job->ish = ish;
3508          job->shader = shader;
3509 
3510          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3511                                iris_compile_shader);
3512       }
3513    }
3514 
3515    return ish;
3516 }
3517 
3518 /**
3519  * Called when the refcount on the iris_uncompiled_shader reaches 0.
3520  *
3521  * Frees the iris_uncompiled_shader.
3522  *
3523  * \sa iris_delete_shader_state
3524  */
3525 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3526 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3527 {
3528    struct iris_uncompiled_shader *ish = state;
3529 
3530    /* No need to take ish->lock; we hold the last reference to ish */
3531    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3532                             &ish->variants, link) {
3533       list_del(&shader->link);
3534 
3535       iris_shader_variant_reference(&shader, NULL);
3536    }
3537 
3538    simple_mtx_destroy(&ish->lock);
3539    util_queue_fence_destroy(&ish->ready);
3540 
3541    ralloc_free(ish->nir);
3542    free(ish);
3543 }
3544 
3545 /**
3546  * The pipe->delete_[stage]_state() driver hooks.
3547  *
3548  * \sa iris_destroy_shader_state
3549  */
3550 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3551 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3552 {
3553    struct iris_uncompiled_shader *ish = state;
3554    struct iris_context *ice = (void *) ctx;
3555 
3556    const gl_shader_stage stage = ish->nir->info.stage;
3557 
3558    if (ice->shaders.uncompiled[stage] == ish) {
3559       ice->shaders.uncompiled[stage] = NULL;
3560       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3561    }
3562 
3563    if (pipe_reference(&ish->ref, NULL))
3564       iris_destroy_shader_state(ctx, state);
3565 }
3566 
3567 /**
3568  * The pipe->bind_[stage]_state() driver hook.
3569  *
3570  * Binds an uncompiled shader as the current one for a particular stage.
3571  * Updates dirty tracking to account for the shader's NOS.
3572  */
3573 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3574 bind_shader_state(struct iris_context *ice,
3575                   struct iris_uncompiled_shader *ish,
3576                   gl_shader_stage stage)
3577 {
3578    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3579    const uint64_t nos = ish ? ish->nos : 0;
3580 
3581    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3582    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3583 
3584    if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3585        (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3586       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3587    }
3588 
3589    ice->shaders.uncompiled[stage] = ish;
3590    ice->state.stage_dirty |= stage_dirty_bit;
3591 
3592    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3593     * (or that they no longer need to do so).
3594     */
3595    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3596       if (nos & (1 << i))
3597          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3598       else
3599          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3600    }
3601 }
3602 
3603 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3604 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3605 {
3606    struct iris_context *ice = (struct iris_context *)ctx;
3607    struct iris_uncompiled_shader *ish = state;
3608 
3609    if (ish) {
3610       const struct shader_info *info = &ish->nir->info;
3611       if (ice->state.window_space_position != info->vs.window_space_position) {
3612          ice->state.window_space_position = info->vs.window_space_position;
3613 
3614          ice->state.dirty |= IRIS_DIRTY_CLIP |
3615                              IRIS_DIRTY_RASTER |
3616                              IRIS_DIRTY_CC_VIEWPORT;
3617       }
3618 
3619       const bool uses_draw_params =
3620          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3621          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3622       const bool uses_derived_draw_params =
3623          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3624          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3625       const bool needs_sgvs_element = uses_draw_params ||
3626          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3627          BITSET_TEST(info->system_values_read,
3628                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3629 
3630       if (ice->state.vs_uses_draw_params != uses_draw_params ||
3631           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3632           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3633           ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3634          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3635                              IRIS_DIRTY_VERTEX_ELEMENTS;
3636       }
3637 
3638       ice->state.vs_uses_draw_params = uses_draw_params;
3639       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3640       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3641       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3642    }
3643 
3644    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3645 }
3646 
3647 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3648 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3649 {
3650    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3651 }
3652 
3653 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3654 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3655 {
3656    struct iris_context *ice = (struct iris_context *)ctx;
3657    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3658    const struct intel_device_info *devinfo = screen->devinfo;
3659 
3660    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3661    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3662       ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3663                                             IRIS_DIRTY_VFG : 0);
3664 
3665    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3666 }
3667 
3668 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3669 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3670 {
3671    struct iris_context *ice = (struct iris_context *)ctx;
3672 
3673    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3674    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3675       ice->state.dirty |= IRIS_DIRTY_URB;
3676 
3677    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3678 }
3679 
3680 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3681 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3682 {
3683    struct iris_context *ice = (struct iris_context *) ctx;
3684    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3685    const struct intel_device_info *devinfo = screen->devinfo;
3686    struct iris_uncompiled_shader *old_ish =
3687       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3688    struct iris_uncompiled_shader *new_ish = state;
3689 
3690    const unsigned color_bits =
3691       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3692       BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3693 
3694    /* Fragment shader outputs influence HasWriteableRT */
3695    if (!old_ish || !new_ish ||
3696        (old_ish->nir->info.outputs_written & color_bits) !=
3697        (new_ish->nir->info.outputs_written & color_bits))
3698       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3699 
3700    if (devinfo->ver == 8)
3701       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3702 
3703    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3704 }
3705 
3706 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3707 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3708 {
3709    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3710 }
3711 
3712 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)3713 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
3714 {
3715    struct iris_screen *screen = (struct iris_screen *)_screen;
3716    struct nir_shader *nir = (struct nir_shader *) nirptr;
3717    const struct intel_device_info *devinfo = screen->devinfo;
3718 
3719    NIR_PASS_V(nir, iris_fix_edge_flags);
3720 
3721    if (screen->brw) {
3722       struct brw_nir_compiler_opts opts = {};
3723       brw_preprocess_nir(screen->brw, nir, &opts);
3724 
3725       NIR_PASS_V(nir, brw_nir_lower_storage_image,
3726                  &(struct brw_nir_lower_storage_image_opts) {
3727                     .devinfo      = devinfo,
3728                     .lower_loads  = true,
3729                     .lower_stores = true,
3730                  });
3731    } else {
3732       assert(screen->elk);
3733 
3734       struct elk_nir_compiler_opts opts = {};
3735       elk_preprocess_nir(screen->elk, nir, &opts);
3736 
3737       NIR_PASS_V(nir, elk_nir_lower_storage_image,
3738                  &(struct elk_nir_lower_storage_image_opts) {
3739                     .devinfo        = devinfo,
3740                     .lower_loads    = true,
3741                     .lower_stores   = true,
3742 
3743                     /* Iris uploads image params used by
3744                      * get_size lowering only in Gfx8.
3745                      */
3746                     .lower_get_size = true,
3747                  });
3748    }
3749 
3750    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3751 
3752    nir_sweep(nir);
3753 
3754    return NULL;
3755 }
3756 
3757 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3758 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3759                                      unsigned max_threads)
3760 {
3761    struct iris_screen *screen = (struct iris_screen *) pscreen;
3762    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3763                                  false);
3764 }
3765 
3766 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3767 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3768                                              void *v_shader,
3769                                              enum pipe_shader_type p_stage)
3770 {
3771    struct iris_screen *screen = (struct iris_screen *) pscreen;
3772 
3773    /* Threaded compilation is only used for the precompile.  If precompile is
3774     * disabled, threaded compilation is "done."
3775     */
3776    if (!screen->precompile)
3777       return true;
3778 
3779    struct iris_uncompiled_shader *ish = v_shader;
3780 
3781    /* When precompile is enabled, the first entry is the precompile variant.
3782     * Check the ready fence of the precompile variant.
3783     */
3784    struct iris_compiled_shader *first =
3785       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3786 
3787    return util_queue_fence_is_signalled(&first->ready);
3788 }
3789 
3790 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3791 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3792 {
3793    pscreen->is_parallel_shader_compilation_finished =
3794       iris_is_parallel_shader_compilation_finished;
3795    pscreen->set_max_shader_compiler_threads =
3796       iris_set_max_shader_compiler_threads;
3797    pscreen->finalize_nir = iris_finalize_nir;
3798 }
3799 
3800 void
iris_init_program_functions(struct pipe_context * ctx)3801 iris_init_program_functions(struct pipe_context *ctx)
3802 {
3803    ctx->create_vs_state  = iris_create_shader_state;
3804    ctx->create_tcs_state = iris_create_shader_state;
3805    ctx->create_tes_state = iris_create_shader_state;
3806    ctx->create_gs_state  = iris_create_shader_state;
3807    ctx->create_fs_state  = iris_create_shader_state;
3808    ctx->create_compute_state = iris_create_compute_state;
3809 
3810    ctx->delete_vs_state  = iris_delete_shader_state;
3811    ctx->delete_tcs_state = iris_delete_shader_state;
3812    ctx->delete_tes_state = iris_delete_shader_state;
3813    ctx->delete_gs_state  = iris_delete_shader_state;
3814    ctx->delete_fs_state  = iris_delete_shader_state;
3815    ctx->delete_compute_state = iris_delete_shader_state;
3816 
3817    ctx->bind_vs_state  = iris_bind_vs_state;
3818    ctx->bind_tcs_state = iris_bind_tcs_state;
3819    ctx->bind_tes_state = iris_bind_tes_state;
3820    ctx->bind_gs_state  = iris_bind_gs_state;
3821    ctx->bind_fs_state  = iris_bind_fs_state;
3822    ctx->bind_compute_state = iris_bind_cs_state;
3823 
3824    ctx->get_compute_state_info = iris_get_compute_state_info;
3825    ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3826 }
3827 
3828 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3829 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3830                           const struct iris_compiled_shader *shader,
3831                           const uint32_t block[3])
3832 {
3833    if (shader->brw_prog_data) {
3834       return brw_cs_get_dispatch_info(devinfo,
3835                                       brw_cs_prog_data(shader->brw_prog_data),
3836                                       block);
3837    } else {
3838       assert(shader->elk_prog_data);
3839       return elk_cs_get_dispatch_info(devinfo,
3840                                       elk_cs_prog_data(shader->elk_prog_data),
3841                                       block);
3842    }
3843 }
3844 
3845 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3846 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3847                               unsigned threads)
3848 {
3849    if (shader->brw_prog_data) {
3850       return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3851                                           threads);
3852    } else {
3853       assert(shader->elk_prog_data);
3854       return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3855                                           threads);
3856    }
3857 }
3858 
3859 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3860 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3861                           enum intel_msaa_flags pushed_msaa_flags)
3862 {
3863    if (shader->brw_prog_data) {
3864       return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3865                                             pushed_msaa_flags);
3866    } else {
3867       assert(shader->elk_prog_data);
3868       return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3869                                                 pushed_msaa_flags);
3870    }
3871 }
3872 
3873 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3874 iris_use_tcs_multi_patch(struct iris_screen *screen)
3875 {
3876    return screen->brw && screen->brw->use_tcs_multi_patch;
3877 }
3878 
3879 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3880 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3881 {
3882    if (screen->brw) {
3883       return screen->brw->indirect_ubos_use_sampler;
3884    } else {
3885       assert(screen->elk);
3886       return screen->elk->indirect_ubos_use_sampler;
3887    }
3888 }
3889 
3890 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3891 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3892 {
3893    struct util_debug_callback *dbg = data;
3894    va_list args;
3895 
3896    if (!dbg->debug_message)
3897       return;
3898 
3899    va_start(args, fmt);
3900    dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3901    va_end(args);
3902 }
3903 
3904 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3905 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3906 {
3907    struct util_debug_callback *dbg = data;
3908    va_list args;
3909    va_start(args, fmt);
3910 
3911    if (INTEL_DEBUG(DEBUG_PERF)) {
3912       va_list args_copy;
3913       va_copy(args_copy, args);
3914       vfprintf(stderr, fmt, args_copy);
3915       va_end(args_copy);
3916    }
3917 
3918    if (dbg->debug_message) {
3919       dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3920    }
3921 
3922    va_end(args);
3923 }
3924 
3925 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3926 iris_get_compiler_options(struct pipe_screen *pscreen,
3927                           enum pipe_shader_ir ir,
3928                           enum pipe_shader_type pstage)
3929 {
3930    struct iris_screen *screen = (struct iris_screen *) pscreen;
3931    gl_shader_stage stage = stage_from_pipe(pstage);
3932    assert(ir == PIPE_SHADER_IR_NIR);
3933 
3934    return screen->brw ? screen->brw->nir_options[stage]
3935                       : screen->elk->nir_options[stage];
3936 }
3937 
3938 void
iris_compiler_init(struct iris_screen * screen)3939 iris_compiler_init(struct iris_screen *screen)
3940 {
3941    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3942    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3943    STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3944 
3945    if (screen->devinfo->ver >= 9) {
3946       screen->brw = brw_compiler_create(screen, screen->devinfo);
3947       screen->brw->shader_debug_log = iris_shader_debug_log;
3948       screen->brw->shader_perf_log = iris_shader_perf_log;
3949    } else {
3950       screen->elk = elk_compiler_create(screen, screen->devinfo);
3951       screen->elk->shader_debug_log = iris_shader_debug_log;
3952       screen->elk->shader_perf_log = iris_shader_perf_log;
3953       screen->elk->supports_shader_constants = true;
3954    }
3955 }
3956