1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "zink_program.h"
25
26 #include "zink_compiler.h"
27 #include "zink_context.h"
28 #include "zink_descriptors.h"
29 #include "zink_helpers.h"
30 #include "zink_pipeline.h"
31 #include "zink_render_pass.h"
32 #include "zink_resource.h"
33 #include "zink_screen.h"
34 #include "zink_state.h"
35 #include "zink_inlines.h"
36
37 #include "util/memstream.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_prim.h"
41 #include "nir_serialize.h"
42 #include "nir/nir_draw_helpers.h"
43
44 /* for pipeline cache */
45 #define XXH_INLINE_ALL
46 #include "util/xxhash.h"
47
48 static void
49 gfx_program_precompile_job(void *data, void *gdata, int thread_index);
50 struct zink_gfx_program *
51 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
52
53 void
debug_describe_zink_gfx_program(char * buf,const struct zink_gfx_program * ptr)54 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
55 {
56 sprintf(buf, "zink_gfx_program");
57 }
58
59 void
debug_describe_zink_compute_program(char * buf,const struct zink_compute_program * ptr)60 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
61 {
62 sprintf(buf, "zink_compute_program");
63 }
64
65 ALWAYS_INLINE static bool
shader_key_matches_tcs_nongenerated(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms)66 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
67 {
68 if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
69 zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
70 return false;
71 const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
72 return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
73 (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
74 key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
75 }
76
77 ALWAYS_INLINE static bool
shader_key_matches(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms,bool has_inline,bool has_nonseamless)78 shader_key_matches(const struct zink_shader_module *zm,
79 const struct zink_shader_key *key, unsigned num_uniforms,
80 bool has_inline, bool has_nonseamless)
81 {
82 const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
83 if (has_inline) {
84 if (zm->num_uniforms != num_uniforms ||
85 (num_uniforms &&
86 memcmp(zm->key + zm->key_size + nonseamless_size,
87 key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
88 return false;
89 }
90 if (!has_nonseamless) {
91 if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
92 (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
93 return false;
94 }
95 if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
96 return false;
97 return !memcmp(zm->key, key, zm->key_size);
98 }
99
100 static uint32_t
shader_module_hash(const struct zink_shader_module * zm)101 shader_module_hash(const struct zink_shader_module *zm)
102 {
103 const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
104 unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
105 return _mesa_hash_data(zm->key, key_size);
106 }
107
108 ALWAYS_INLINE static void
gather_shader_module_info(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless,unsigned * inline_size,unsigned * nonseamless_size)109 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
110 struct zink_shader *zs, struct zink_gfx_program *prog,
111 struct zink_gfx_pipeline_state *state,
112 bool has_inline, //is inlining enabled?
113 bool has_nonseamless, //is nonseamless ext present?
114 unsigned *inline_size, unsigned *nonseamless_size)
115 {
116 gl_shader_stage stage = zs->info.stage;
117 struct zink_shader_key *key = &state->shader_keys.key[stage];
118 if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
119 ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
120 if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
121 *inline_size = zs->info.num_inlinable_uniforms;
122 else
123 key->inline_uniforms = false;
124 }
125 if (!has_nonseamless && key->base.nonseamless_cube_mask)
126 *nonseamless_size = sizeof(uint32_t);
127 }
128
129 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)130 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
131 struct zink_shader *zs, struct zink_gfx_program *prog,
132 gl_shader_stage stage,
133 struct zink_gfx_pipeline_state *state,
134 unsigned inline_size, unsigned nonseamless_size,
135 bool has_inline, //is inlining enabled?
136 bool has_nonseamless) //is nonseamless ext present?
137 {
138 struct zink_shader_module *zm;
139 const struct zink_shader_key *key = &state->shader_keys.key[stage];
140 /* non-generated tcs won't use the shader key */
141 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
142 const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
143 (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
144 zm = malloc(sizeof(struct zink_shader_module) + key->size +
145 (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
146 (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
147 if (!zm) {
148 return NULL;
149 }
150 unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
151 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
152 assert(ctx); //TODO async
153 zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
154 } else {
155 zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
156 }
157 if (!zm->obj.mod) {
158 FREE(zm);
159 return NULL;
160 }
161 zm->shobj = prog->base.uses_shobj;
162 zm->num_uniforms = inline_size;
163 if (!is_nongenerated_tcs) {
164 zm->key_size = key->size;
165 memcpy(zm->key, key, key->size);
166 } else {
167 zm->key_size = 0;
168 memset(zm->key, 0, key->size);
169 }
170 if (!has_nonseamless && nonseamless_size) {
171 /* nonseamless mask gets added to base key if it exists */
172 memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
173 }
174 zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
175 zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
176 if (inline_size)
177 memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
178 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
179 zm->hash = patch_vertices;
180 else
181 zm->hash = shader_module_hash(zm);
182 if (unlikely(shadow_needs_shader_swizzle)) {
183 memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
184 zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
185 }
186 zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
187 if (inline_size)
188 prog->inlined_variant_count[stage]++;
189 util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
190 return zm;
191 }
192
193 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)194 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
195 struct zink_shader *zs, struct zink_gfx_program *prog,
196 gl_shader_stage stage,
197 struct zink_gfx_pipeline_state *state,
198 unsigned inline_size, unsigned nonseamless_size,
199 bool has_inline, //is inlining enabled?
200 bool has_nonseamless) //is nonseamless ext present?
201 {
202 const struct zink_shader_key *key = &state->shader_keys.key[stage];
203 /* non-generated tcs won't use the shader key */
204 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
205 const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
206 (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
207
208 struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
209 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
210 struct zink_shader_module **pzm = shader_cache->data;
211 for (unsigned i = 0; i < count; i++) {
212 struct zink_shader_module *iter = pzm[i];
213 if (is_nongenerated_tcs) {
214 if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
215 continue;
216 } else {
217 if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
218 continue;
219 if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
220 continue;
221 if (unlikely(shadow_needs_shader_swizzle)) {
222 /* shadow swizzle data needs a manual compare since it's so fat */
223 if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
224 &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
225 continue;
226 }
227 }
228 if (i > 0) {
229 struct zink_shader_module *zero = pzm[0];
230 pzm[0] = iter;
231 pzm[i] = zero;
232 }
233 return iter;
234 }
235
236 return NULL;
237 }
238
239 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)240 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
241 struct zink_shader *zs, struct zink_gfx_program *prog,
242 gl_shader_stage stage,
243 struct zink_gfx_pipeline_state *state)
244 {
245 struct zink_shader_module *zm;
246 uint16_t *key;
247 unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
248 bool shadow_needs_shader_swizzle = false;
249 if (zs == prog->last_vertex_stage) {
250 key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
251 } else if (stage == MESA_SHADER_FRAGMENT) {
252 key = (uint16_t*)&state->shader_keys_optimal.key.fs;
253 shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
254 } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
255 key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
256 } else {
257 key = NULL;
258 }
259 size_t key_size = sizeof(uint16_t);
260 zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
261 if (!zm) {
262 return NULL;
263 }
264 if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
265 assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints);
266 unsigned patch_vertices = 3;
267 if (ctx) {
268 struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
269 patch_vertices = tcs->patch_vertices;
270 }
271 zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
272 } else {
273 zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
274 (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
275 }
276 if (!zm->obj.mod) {
277 FREE(zm);
278 return NULL;
279 }
280 zm->shobj = prog->base.uses_shobj;
281 /* non-generated tcs won't use the shader key */
282 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
283 if (key && !is_nongenerated_tcs) {
284 zm->key_size = key_size;
285 uint16_t *data = (uint16_t*)zm->key;
286 /* sanitize actual key bits */
287 *data = (*key) & mask;
288 if (unlikely(shadow_needs_shader_swizzle))
289 memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
290 }
291 zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
292 util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
293 return zm;
294 }
295
296 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)297 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
298 struct zink_shader *zs, struct zink_gfx_program *prog,
299 gl_shader_stage stage,
300 struct zink_gfx_pipeline_state *state)
301 {
302 /* non-generated tcs won't use the shader key */
303 const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
304 bool shadow_needs_shader_swizzle = false;
305 uint16_t *key;
306 unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
307 if (zs == prog->last_vertex_stage) {
308 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
309 } else if (stage == MESA_SHADER_FRAGMENT) {
310 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
311 shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
312 } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
313 key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
314 } else {
315 key = NULL;
316 }
317 struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
318 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
319 struct zink_shader_module **pzm = shader_cache->data;
320 for (unsigned i = 0; i < count; i++) {
321 struct zink_shader_module *iter = pzm[i];
322 if (is_nongenerated_tcs) {
323 /* always match */
324 } else if (key) {
325 uint16_t val = (*key) & mask;
326 /* no key is bigger than uint16_t */
327 if (memcmp(iter->key, &val, sizeof(uint16_t)))
328 continue;
329 if (unlikely(shadow_needs_shader_swizzle)) {
330 /* shadow swizzle data needs a manual compare since it's so fat */
331 if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
332 continue;
333 }
334 }
335 if (i > 0) {
336 struct zink_shader_module *zero = pzm[0];
337 pzm[0] = iter;
338 pzm[i] = zero;
339 }
340 return iter;
341 }
342
343 return NULL;
344 }
345
346 static void
zink_destroy_shader_module(struct zink_screen * screen,struct zink_shader_module * zm)347 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
348 {
349 if (zm->shobj)
350 VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL);
351 else
352 VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL);
353 ralloc_free(zm->obj.spirv);
354 free(zm);
355 }
356
357 static void
destroy_shader_cache(struct zink_screen * screen,struct util_dynarray * sc)358 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
359 {
360 while (util_dynarray_contains(sc, void*)) {
361 struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
362 zink_destroy_shader_module(screen, zm);
363 }
364 }
365
366 ALWAYS_INLINE static void
update_gfx_shader_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,uint32_t mask,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless)367 update_gfx_shader_modules(struct zink_context *ctx,
368 struct zink_screen *screen,
369 struct zink_gfx_program *prog, uint32_t mask,
370 struct zink_gfx_pipeline_state *state,
371 bool has_inline, //is inlining enabled?
372 bool has_nonseamless) //is nonseamless ext present?
373 {
374 bool hash_changed = false;
375 bool default_variants = true;
376 assert(prog->objs[MESA_SHADER_VERTEX].mod);
377 uint32_t variant_hash = prog->last_variant_hash;
378 prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
379 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
380 if (!(mask & BITFIELD_BIT(i)))
381 continue;
382
383 assert(prog->shaders[i]);
384
385 unsigned inline_size = 0, nonseamless_size = 0;
386 gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
387 struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
388 inline_size, nonseamless_size, has_inline, has_nonseamless);
389 if (!zm)
390 zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
391 inline_size, nonseamless_size, has_inline, has_nonseamless);
392 state->modules[i] = zm->obj.mod;
393 if (prog->objs[i].mod == zm->obj.mod)
394 continue;
395 prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
396 variant_hash ^= prog->module_hash[i];
397 hash_changed = true;
398 default_variants &= zm->default_variant;
399 prog->objs[i] = zm->obj;
400 prog->objects[i] = zm->obj.obj;
401 prog->module_hash[i] = zm->hash;
402 if (has_inline) {
403 if (zm->num_uniforms)
404 prog->inline_variants |= BITFIELD_BIT(i);
405 else
406 prog->inline_variants &= ~BITFIELD_BIT(i);
407 }
408 variant_hash ^= prog->module_hash[i];
409 }
410
411 if (hash_changed && state) {
412 if (default_variants)
413 prog->last_variant_hash = prog->default_variant_hash;
414 else
415 prog->last_variant_hash = variant_hash;
416
417 state->modules_changed = true;
418 }
419 }
420
421 static void
generate_gfx_program_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)422 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
423 {
424 assert(!prog->objs[MESA_SHADER_VERTEX].mod);
425 uint32_t variant_hash = 0;
426 bool default_variants = true;
427 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
428 if (!(prog->stages_present & BITFIELD_BIT(i)))
429 continue;
430
431 assert(prog->shaders[i]);
432
433 unsigned inline_size = 0, nonseamless_size = 0;
434 gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
435 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
436 &inline_size, &nonseamless_size);
437 struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
438 inline_size, nonseamless_size,
439 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
440 state->modules[i] = zm->obj.mod;
441 prog->objs[i] = zm->obj;
442 prog->objects[i] = zm->obj.obj;
443 prog->module_hash[i] = zm->hash;
444 if (zm->num_uniforms)
445 prog->inline_variants |= BITFIELD_BIT(i);
446 default_variants &= zm->default_variant;
447 variant_hash ^= prog->module_hash[i];
448 }
449
450 state->modules_changed = true;
451
452 prog->last_variant_hash = variant_hash;
453 if (default_variants)
454 prog->default_variant_hash = prog->last_variant_hash;
455 }
456
457 static void
generate_gfx_program_modules_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)458 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
459 {
460 assert(!prog->objs[MESA_SHADER_VERTEX].mod);
461 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
462 if (!(prog->stages_present & BITFIELD_BIT(i)))
463 continue;
464
465 assert(prog->shaders[i]);
466
467 struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
468 prog->objs[i] = zm->obj;
469 prog->objects[i] = zm->obj.obj;
470 }
471
472 state->modules_changed = true;
473 prog->last_variant_hash = state->optimal_key;
474 }
475
476 static uint32_t
hash_pipeline_lib_generated_tcs(const void * key)477 hash_pipeline_lib_generated_tcs(const void *key)
478 {
479 const struct zink_gfx_library_key *gkey = key;
480 return gkey->optimal_key;
481 }
482
483
484 static bool
equals_pipeline_lib_generated_tcs(const void * a,const void * b)485 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
486 {
487 return !memcmp(a, b, sizeof(uint32_t));
488 }
489
490 static uint32_t
hash_pipeline_lib(const void * key)491 hash_pipeline_lib(const void *key)
492 {
493 const struct zink_gfx_library_key *gkey = key;
494 /* remove generated tcs bits */
495 return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
496 }
497
498 static bool
equals_pipeline_lib(const void * a,const void * b)499 equals_pipeline_lib(const void *a, const void *b)
500 {
501 const struct zink_gfx_library_key *ak = a;
502 const struct zink_gfx_library_key *bk = b;
503 /* remove generated tcs bits */
504 uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
505 uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
506 return val_a == val_b;
507 }
508
509 uint32_t
hash_gfx_input_dynamic(const void * key)510 hash_gfx_input_dynamic(const void *key)
511 {
512 const struct zink_gfx_input_key *ikey = key;
513 return ikey->idx;
514 }
515
516 static bool
equals_gfx_input_dynamic(const void * a,const void * b)517 equals_gfx_input_dynamic(const void *a, const void *b)
518 {
519 const struct zink_gfx_input_key *ikey_a = a;
520 const struct zink_gfx_input_key *ikey_b = b;
521 return ikey_a->idx == ikey_b->idx;
522 }
523
524 uint32_t
hash_gfx_input(const void * key)525 hash_gfx_input(const void *key)
526 {
527 const struct zink_gfx_input_key *ikey = key;
528 if (ikey->uses_dynamic_stride)
529 return ikey->input;
530 return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
531 }
532
533 static bool
equals_gfx_input(const void * a,const void * b)534 equals_gfx_input(const void *a, const void *b)
535 {
536 const struct zink_gfx_input_key *ikey_a = a;
537 const struct zink_gfx_input_key *ikey_b = b;
538 if (ikey_a->uses_dynamic_stride)
539 return ikey_a->element_state == ikey_b->element_state &&
540 !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
541 return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
542 }
543
544 uint32_t
hash_gfx_output_ds3(const void * key)545 hash_gfx_output_ds3(const void *key)
546 {
547 const uint8_t *data = key;
548 return _mesa_hash_data(data, sizeof(uint32_t));
549 }
550
551 static bool
equals_gfx_output_ds3(const void * a,const void * b)552 equals_gfx_output_ds3(const void *a, const void *b)
553 {
554 const uint8_t *da = a;
555 const uint8_t *db = b;
556 return !memcmp(da, db, sizeof(uint32_t));
557 }
558
559 uint32_t
hash_gfx_output(const void * key)560 hash_gfx_output(const void *key)
561 {
562 const uint8_t *data = key;
563 return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
564 }
565
566 static bool
equals_gfx_output(const void * a,const void * b)567 equals_gfx_output(const void *a, const void *b)
568 {
569 const uint8_t *da = a;
570 const uint8_t *db = b;
571 return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
572 }
573
574 ALWAYS_INLINE static void
update_gfx_program_nonseamless(struct zink_context * ctx,struct zink_gfx_program * prog,bool has_nonseamless)575 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
576 {
577 struct zink_screen *screen = zink_screen(ctx->base.screen);
578 if (screen->driconf.inline_uniforms || prog->needs_inlining)
579 update_gfx_shader_modules(ctx, screen, prog,
580 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
581 true, has_nonseamless);
582 else
583 update_gfx_shader_modules(ctx, screen, prog,
584 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
585 false, has_nonseamless);
586 }
587
588 static void
update_gfx_program(struct zink_context * ctx,struct zink_gfx_program * prog)589 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
590 {
591 struct zink_screen *screen = zink_screen(ctx->base.screen);
592 if (screen->info.have_EXT_non_seamless_cube_map)
593 update_gfx_program_nonseamless(ctx, prog, true);
594 else
595 update_gfx_program_nonseamless(ctx, prog, false);
596 }
597
598 void
zink_gfx_program_update(struct zink_context * ctx)599 zink_gfx_program_update(struct zink_context *ctx)
600 {
601 if (ctx->last_vertex_stage_dirty) {
602 gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
603 ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
604 memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
605 &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
606 sizeof(struct zink_vs_key_base));
607 ctx->last_vertex_stage_dirty = false;
608 }
609 if (ctx->gfx_dirty) {
610 struct zink_gfx_program *prog = NULL;
611
612 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
613 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
614 const uint32_t hash = ctx->gfx_hash;
615 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
616 /* this must be done before prog is updated */
617 if (ctx->curr_program)
618 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
619 if (entry) {
620 prog = (struct zink_gfx_program*)entry->data;
621 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
622 if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
623 ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod;
624 }
625 /* ensure variants are always updated if keys have changed since last use */
626 ctx->dirty_gfx_stages |= prog->stages_present;
627 update_gfx_program(ctx, prog);
628 } else {
629 ctx->dirty_gfx_stages |= ctx->shader_stages;
630 prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
631 zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
632 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
633 prog->base.removed = false;
634 generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
635 }
636 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
637 if (prog && prog != ctx->curr_program)
638 zink_batch_reference_program(ctx, &prog->base);
639 ctx->curr_program = prog;
640 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
641 ctx->gfx_dirty = false;
642 } else if (ctx->dirty_gfx_stages) {
643 /* remove old hash */
644 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
645 update_gfx_program(ctx, ctx->curr_program);
646 /* apply new hash */
647 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
648 }
649 ctx->dirty_gfx_stages = 0;
650 }
651
652 ALWAYS_INLINE static bool
update_gfx_shader_module_optimal(struct zink_context * ctx,struct zink_gfx_program * prog,gl_shader_stage pstage)653 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
654 {
655 struct zink_screen *screen = zink_screen(ctx->base.screen);
656 if (screen->info.have_EXT_graphics_pipeline_library)
657 util_queue_fence_wait(&prog->base.cache_fence);
658 struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
659 if (!zm) {
660 zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
661 perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage));
662 }
663
664 bool changed = prog->objs[pstage].mod != zm->obj.mod;
665 prog->objs[pstage] = zm->obj;
666 prog->objects[pstage] = zm->obj.obj;
667 return changed;
668 }
669
670 static void
update_gfx_program_optimal(struct zink_context * ctx,struct zink_gfx_program * prog)671 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
672 {
673 const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key;
674 const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
675 if (key->vs_bits != last_prog_key->vs_bits) {
676 assert(!prog->is_separable);
677 bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
678 ctx->gfx_pipeline_state.modules_changed |= changed;
679 }
680 const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
681 if (key->fs_bits != last_prog_key->fs_bits ||
682 /* always recheck shadow swizzles since they aren't directly part of the key */
683 unlikely(shadow_needs_shader_swizzle)) {
684 assert(!prog->is_separable);
685 bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
686 ctx->gfx_pipeline_state.modules_changed |= changed;
687 if (unlikely(shadow_needs_shader_swizzle)) {
688 struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
689 ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
690 }
691 }
692 if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
693 key->tcs_bits != last_prog_key->tcs_bits) {
694 assert(!prog->is_separable);
695 bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
696 ctx->gfx_pipeline_state.modules_changed |= changed;
697 }
698 prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
699 }
700
701 static struct zink_gfx_program *
replace_separable_prog(struct zink_context * ctx,struct hash_entry * entry,struct zink_gfx_program * prog)702 replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog)
703 {
704 struct zink_screen *screen = zink_screen(ctx->base.screen);
705 struct zink_gfx_program *real = prog->full_prog ?
706 prog->full_prog :
707 /* this will be NULL with ZINK_DEBUG_NOOPT */
708 zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash);
709 entry->data = real;
710 entry->key = real->shaders;
711 real->base.removed = false;
712 zink_gfx_program_reference(screen, &prog->full_prog, NULL);
713 prog->base.removed = true;
714 return real;
715 }
716
717 void
zink_gfx_program_update_optimal(struct zink_context * ctx)718 zink_gfx_program_update_optimal(struct zink_context *ctx)
719 {
720 struct zink_screen *screen = zink_screen(ctx->base.screen);
721 if (ctx->gfx_dirty) {
722 struct zink_gfx_program *prog = NULL;
723 ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
724 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
725 const uint32_t hash = ctx->gfx_hash;
726 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
727 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
728
729 if (ctx->curr_program)
730 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
731 if (entry) {
732 prog = (struct zink_gfx_program*)entry->data;
733 bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx));
734 if (prog->is_separable) {
735 /* shader variants can't be handled by separable programs: sync and compile */
736 if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)
737 util_queue_fence_wait(&prog->base.cache_fence);
738 /* If the optimized linked pipeline is done compiling, swap it into place. */
739 if (util_queue_fence_is_signalled(&prog->base.cache_fence) &&
740 /* but only if needed for ZINK_DEBUG=noopt */
741 (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) {
742 prog = replace_separable_prog(ctx, entry, prog);
743 }
744 }
745 update_gfx_program_optimal(ctx, prog);
746 } else {
747 ctx->dirty_gfx_stages |= ctx->shader_stages;
748 prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
749 prog->base.removed = false;
750 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
751 if (!prog->is_separable) {
752 zink_screen_get_pipeline_cache(screen, &prog->base, false);
753 perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n");
754 generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
755 }
756 }
757 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
758 if (prog && prog != ctx->curr_program)
759 zink_batch_reference_program(ctx, &prog->base);
760 ctx->curr_program = prog;
761 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
762 } else if (ctx->dirty_gfx_stages) {
763 /* remove old hash */
764 ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
765 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
766
767 bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx));
768 if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) {
769 struct zink_gfx_program *prog = ctx->curr_program;
770
771 util_queue_fence_wait(&prog->base.cache_fence);
772 /* shader variants can't be handled by separable programs: sync and compile */
773 perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n");
774 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
775 const uint32_t hash = ctx->gfx_hash;
776 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
777 struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
778 ctx->curr_program = replace_separable_prog(ctx, entry, prog);
779 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
780 }
781 update_gfx_program_optimal(ctx, ctx->curr_program);
782 /* apply new hash */
783 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
784 }
785 ctx->dirty_gfx_stages = 0;
786 ctx->gfx_dirty = false;
787 ctx->last_vertex_stage_dirty = false;
788 }
789
790 static void
optimized_compile_job(void * data,void * gdata,int thread_index)791 optimized_compile_job(void *data, void *gdata, int thread_index)
792 {
793 struct zink_gfx_pipeline_cache_entry *pc_entry = data;
794 struct zink_screen *screen = gdata;
795 VkPipeline pipeline;
796 if (pc_entry->gpl.gkey)
797 pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false);
798 else
799 pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
800 if (pipeline) {
801 pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline;
802 pc_entry->pipeline = pipeline;
803 }
804 }
805
806 static void
optimized_shobj_compile_job(void * data,void * gdata,int thread_index)807 optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
808 {
809 struct zink_gfx_pipeline_cache_entry *pc_entry = data;
810 struct zink_screen *screen = gdata;
811
812 struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
813 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
814 objs[i].mod = VK_NULL_HANDLE;
815 objs[i].spirv = pc_entry->shobjs[i].spirv;
816 }
817 pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
818 /* no unoptimized_pipeline dance */
819 }
820
821 void
zink_gfx_program_compile_queue(struct zink_context * ctx,struct zink_gfx_pipeline_cache_entry * pc_entry)822 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
823 {
824 struct zink_screen *screen = zink_screen(ctx->base.screen);
825 if (screen->driver_workarounds.disable_optimized_compile)
826 return;
827 if (zink_debug & ZINK_DEBUG_NOBGC) {
828 if (pc_entry->prog->base.uses_shobj)
829 optimized_shobj_compile_job(pc_entry, screen, 0);
830 else
831 optimized_compile_job(pc_entry, screen, 0);
832 } else {
833 util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
834 pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
835 }
836 }
837
838 void
zink_program_finish(struct zink_context * ctx,struct zink_program * pg)839 zink_program_finish(struct zink_context *ctx, struct zink_program *pg)
840 {
841 util_queue_fence_wait(&pg->cache_fence);
842 if (pg->is_compute)
843 return;
844 struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
845 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
846 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
847 hash_table_foreach(&prog->pipelines[r][i], entry) {
848 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
849 util_queue_fence_wait(&pc_entry->fence);
850 }
851 }
852 }
853 }
854
855 static void
update_cs_shader_module(struct zink_context * ctx,struct zink_compute_program * comp)856 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
857 {
858 struct zink_screen *screen = zink_screen(ctx->base.screen);
859 struct zink_shader *zs = comp->shader;
860 struct zink_shader_module *zm = NULL;
861 unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
862 struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
863 ASSERTED bool check_robustness = screen->driver_compiler_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
864 assert(zink_cs_key(key)->robust_access == check_robustness);
865
866 if (ctx && zs->info.num_inlinable_uniforms &&
867 ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
868 if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
869 inline_size = zs->info.num_inlinable_uniforms;
870 else
871 key->inline_uniforms = false;
872 }
873 if (key->base.nonseamless_cube_mask)
874 nonseamless_size = sizeof(uint32_t);
875 if (key->base.needs_zs_shader_swizzle)
876 zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
877
878 if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
879 struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
880 unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
881 struct zink_shader_module **pzm = shader_cache->data;
882 for (unsigned i = 0; i < count; i++) {
883 struct zink_shader_module *iter = pzm[i];
884 if (!shader_key_matches(iter, key, inline_size,
885 screen->driconf.inline_uniforms,
886 screen->info.have_EXT_non_seamless_cube_map))
887 continue;
888 if (unlikely(zs_swizzle_size)) {
889 /* zs swizzle data needs a manual compare since it's so fat */
890 if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
891 &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
892 continue;
893 }
894 if (i > 0) {
895 struct zink_shader_module *zero = pzm[0];
896 pzm[0] = iter;
897 pzm[i] = zero;
898 }
899 zm = iter;
900 }
901 } else {
902 zm = comp->module;
903 }
904
905 if (!zm) {
906 zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
907 if (!zm) {
908 return;
909 }
910 zm->shobj = false;
911 zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
912 if (!zm->obj.spirv) {
913 FREE(zm);
914 return;
915 }
916 zm->num_uniforms = inline_size;
917 zm->key_size = key->size;
918 memcpy(zm->key, key, key->size);
919 zm->has_nonseamless = !!nonseamless_size;
920 zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
921 assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
922 if (nonseamless_size)
923 memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
924 if (inline_size)
925 memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
926 if (zs_swizzle_size)
927 memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
928
929 zm->hash = shader_module_hash(zm);
930 zm->default_variant = false;
931 if (inline_size)
932 comp->inlined_variant_count++;
933
934 /* this is otherwise the default variant, which is stored as comp->module */
935 if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
936 util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
937 }
938 if (comp->curr == zm)
939 return;
940 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
941 comp->curr = zm;
942 ctx->compute_pipeline_state.module_hash = zm->hash;
943 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
944 ctx->compute_pipeline_state.module_changed = true;
945 }
946
947 void
zink_update_compute_program(struct zink_context * ctx)948 zink_update_compute_program(struct zink_context *ctx)
949 {
950 util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
951 update_cs_shader_module(ctx, ctx->curr_compute);
952 }
953
954 VkPipelineLayout
zink_pipeline_layout_create(struct zink_screen * screen,VkDescriptorSetLayout * dsl,unsigned num_dsl,bool is_compute,VkPipelineLayoutCreateFlags flags)955 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
956 {
957 VkPipelineLayoutCreateInfo plci = {0};
958 plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
959 plci.flags = flags;
960
961 plci.pSetLayouts = dsl;
962 plci.setLayoutCount = num_dsl;
963
964 VkPushConstantRange pcr;
965 if (!is_compute) {
966 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
967 pcr.offset = 0;
968 pcr.size = sizeof(struct zink_gfx_push_constant);
969 plci.pushConstantRangeCount = 1;
970 plci.pPushConstantRanges = &pcr;
971 }
972
973 VkPipelineLayout layout;
974 VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
975 if (result != VK_SUCCESS) {
976 mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
977 return VK_NULL_HANDLE;
978 }
979
980 return layout;
981 }
982
983 static void *
create_program(struct zink_context * ctx,bool is_compute)984 create_program(struct zink_context *ctx, bool is_compute)
985 {
986 struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
987 if (!pg)
988 return NULL;
989
990 pipe_reference_init(&pg->reference, 1);
991 u_rwlock_init(&pg->pipeline_cache_lock);
992 util_queue_fence_init(&pg->cache_fence);
993 pg->is_compute = is_compute;
994 pg->ctx = ctx;
995 return (void*)pg;
996 }
997
998 static void
assign_io(struct zink_screen * screen,nir_shader * shaders[ZINK_GFX_SHADER_COUNT])999 assign_io(struct zink_screen *screen,
1000 nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
1001 {
1002 for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
1003 nir_shader *producer = shaders[i];
1004 for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
1005 nir_shader *consumer = shaders[j];
1006 if (!consumer)
1007 continue;
1008 zink_compiler_assign_io(screen, producer, consumer);
1009 i = j;
1010 break;
1011 }
1012 }
1013 }
1014
1015 void
zink_gfx_lib_cache_unref(struct zink_screen * screen,struct zink_gfx_lib_cache * libs)1016 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
1017 {
1018 if (!p_atomic_dec_zero(&libs->refcount))
1019 return;
1020
1021 simple_mtx_destroy(&libs->lock);
1022 set_foreach_remove(&libs->libs, he) {
1023 struct zink_gfx_library_key *gkey = (void*)he->key;
1024 VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
1025 FREE(gkey);
1026 }
1027 ralloc_free(libs->libs.table);
1028 FREE(libs);
1029 }
1030
1031 static struct zink_gfx_lib_cache *
create_lib_cache(struct zink_gfx_program * prog,bool generated_tcs)1032 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
1033 {
1034 struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
1035 libs->stages_present = prog->stages_present;
1036 if (generated_tcs)
1037 libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1038 simple_mtx_init(&libs->lock, mtx_plain);
1039 if (generated_tcs)
1040 _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
1041 else
1042 _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
1043 return libs;
1044 }
1045
1046 static struct zink_gfx_lib_cache *
find_or_create_lib_cache(struct zink_screen * screen,struct zink_gfx_program * prog)1047 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
1048 {
1049 unsigned stages_present = prog->stages_present;
1050 bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
1051 if (generated_tcs)
1052 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1053 unsigned idx = zink_program_cache_stages(stages_present);
1054 struct set *ht = &screen->pipeline_libs[idx];
1055 const uint32_t hash = prog->gfx_hash;
1056
1057 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
1058 bool found = false;
1059 struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
1060 struct zink_gfx_lib_cache *libs;
1061 if (found) {
1062 libs = (void*)entry->key;
1063 } else {
1064 libs = create_lib_cache(prog, generated_tcs);
1065 memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1066 entry->key = libs;
1067 unsigned refs = 0;
1068 for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1069 if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1070 simple_mtx_lock(&prog->shaders[i]->lock);
1071 util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1072 simple_mtx_unlock(&prog->shaders[i]->lock);
1073 refs++;
1074 }
1075 }
1076 p_atomic_set(&libs->refcount, refs);
1077 }
1078 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1079 return libs;
1080 }
1081
1082 static struct zink_gfx_program *
gfx_program_create(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1083 gfx_program_create(struct zink_context *ctx,
1084 struct zink_shader **stages,
1085 unsigned vertices_per_patch,
1086 uint32_t gfx_hash)
1087 {
1088 struct zink_screen *screen = zink_screen(ctx->base.screen);
1089 struct zink_gfx_program *prog = create_program(ctx, false);
1090 if (!prog)
1091 goto fail;
1092
1093 prog->gfx_hash = gfx_hash;
1094 prog->base.removed = true;
1095 prog->optimal_keys = screen->optimal_keys;
1096
1097 prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1098 prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1099 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1100 util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1101 util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1102 util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1103 util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1104 if (stages[i]) {
1105 prog->shaders[i] = stages[i];
1106 prog->stages_present |= BITFIELD_BIT(i);
1107 if (i != MESA_SHADER_FRAGMENT)
1108 prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
1109 prog->needs_inlining |= prog->shaders[i]->needs_inlining;
1110 }
1111 }
1112 if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1113 util_queue_fence_wait(&stages[MESA_SHADER_TESS_EVAL]->precompile.fence);
1114 prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1115 prog->shaders[MESA_SHADER_TESS_CTRL] =
1116 zink_shader_tcs_create(screen, vertices_per_patch);
1117 prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1118 }
1119 prog->stages_remaining = prog->stages_present;
1120 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1121 if (prog->shaders[i]) {
1122 simple_mtx_lock(&prog->shaders[i]->lock);
1123 _mesa_set_add(prog->shaders[i]->programs, prog);
1124 simple_mtx_unlock(&prog->shaders[i]->lock);
1125 zink_gfx_program_reference(screen, NULL, prog);
1126 }
1127 }
1128 p_atomic_dec(&prog->base.reference.count);
1129
1130 if (stages[MESA_SHADER_GEOMETRY])
1131 prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1132 else if (stages[MESA_SHADER_TESS_EVAL])
1133 prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1134 else
1135 prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1136
1137 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1138 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1139 _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1140 /* only need first 3/4 for point/line/tri/patch */
1141 if (screen->info.have_EXT_extended_dynamic_state &&
1142 i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1143 break;
1144 }
1145 }
1146 return prog;
1147
1148 fail:
1149 if (prog)
1150 zink_destroy_gfx_program(screen, prog);
1151 return NULL;
1152 }
1153
1154 /* NO THREAD-UNSAFE ctx USAGE! */
1155 static struct zink_gfx_program *
gfx_program_init(struct zink_context * ctx,struct zink_gfx_program * prog)1156 gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog)
1157 {
1158 struct zink_screen *screen = zink_screen(ctx->base.screen);
1159 nir_shader *nir[ZINK_GFX_SHADER_COUNT];
1160
1161 /* iterate in reverse order to create TES before generated TCS */
1162 for (int i = MESA_SHADER_FRAGMENT; i >= MESA_SHADER_VERTEX; i--) {
1163 if (prog->shaders[i]) {
1164 util_queue_fence_wait(&prog->shaders[i]->precompile.fence);
1165 /* this may have already been precompiled for separate shader */
1166 if (i == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir)
1167 zink_shader_tcs_init(screen, prog->shaders[MESA_SHADER_TESS_CTRL], nir[MESA_SHADER_TESS_EVAL], &nir[i]);
1168 else
1169 nir[i] = zink_shader_deserialize(screen, prog->shaders[i]);
1170 } else {
1171 nir[i] = NULL;
1172 }
1173 }
1174 assign_io(screen, nir);
1175 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1176 if (nir[i])
1177 zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
1178 ralloc_free(nir[i]);
1179 }
1180
1181 if (screen->optimal_keys)
1182 prog->libs = find_or_create_lib_cache(screen, prog);
1183 if (prog->libs)
1184 p_atomic_inc(&prog->libs->refcount);
1185
1186 struct mesa_blake3 sctx;
1187 _mesa_blake3_init(&sctx);
1188 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1189 if (prog->shaders[i])
1190 _mesa_blake3_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1191 }
1192 _mesa_blake3_final(&sctx, prog->base.blake3);
1193
1194 if (!zink_descriptor_program_init(ctx, &prog->base))
1195 goto fail;
1196
1197 return prog;
1198
1199 fail:
1200 if (prog)
1201 zink_destroy_gfx_program(screen, prog);
1202 return NULL;
1203 }
1204
1205 struct zink_gfx_program *
zink_create_gfx_program(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1206 zink_create_gfx_program(struct zink_context *ctx,
1207 struct zink_shader **stages,
1208 unsigned vertices_per_patch,
1209 uint32_t gfx_hash)
1210 {
1211 struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash);
1212 if (prog)
1213 prog = gfx_program_init(ctx, prog);
1214 return prog;
1215 }
1216
1217 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1218 * be swapped in in place of the fast-linked separable program once it's done compiling.
1219 */
1220 static void
create_linked_separable_job(void * data,void * gdata,int thread_index)1221 create_linked_separable_job(void *data, void *gdata, int thread_index)
1222 {
1223 struct zink_gfx_program *prog = data;
1224 /* this is a dead program */
1225 if (prog->base.removed)
1226 return;
1227 prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash);
1228 /* block gfx_shader_prune in the main thread */
1229 util_queue_fence_reset(&prog->full_prog->base.cache_fence);
1230 /* add an ownership ref */
1231 zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog);
1232 /* this is otherwise a dead program */
1233 if (prog->full_prog->stages_present == prog->full_prog->stages_remaining)
1234 gfx_program_precompile_job(prog->full_prog, gdata, thread_index);
1235 util_queue_fence_signal(&prog->full_prog->base.cache_fence);
1236 }
1237
1238 struct zink_gfx_program *
create_gfx_program_separable(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch)1239 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1240 {
1241 struct zink_screen *screen = zink_screen(ctx->base.screen);
1242 bool is_separate = true;
1243 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
1244 is_separate &= !stages[i] || stages[i]->info.separate_shader;
1245 /* filter cases that need real pipelines */
1246 if (!is_separate ||
1247 /* TODO: maybe try variants? grimace */
1248 !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1249 !zink_can_use_pipeline_libs(ctx))
1250 return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1251 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1252 /* ensure async shader creation is done */
1253 if (stages[i]) {
1254 util_queue_fence_wait(&stages[i]->precompile.fence);
1255 if (!stages[i]->precompile.obj.mod)
1256 return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1257 }
1258 }
1259
1260 struct zink_gfx_program *prog = create_program(ctx, false);
1261 if (!prog)
1262 goto fail;
1263
1264 prog->is_separable = true;
1265 prog->gfx_hash = ctx->gfx_hash;
1266 prog->base.uses_shobj = screen->info.have_EXT_shader_object;
1267
1268 prog->stages_remaining = prog->stages_present = ctx->shader_stages;
1269 memcpy(prog->shaders, stages, sizeof(prog->shaders));
1270 prog->last_vertex_stage = ctx->last_vertex_stage;
1271
1272 if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1273 prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
1274 prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1275 }
1276
1277 if (!screen->info.have_EXT_shader_object) {
1278 prog->libs = create_lib_cache(prog, false);
1279 /* this libs cache is owned by the program */
1280 p_atomic_set(&prog->libs->refcount, 1);
1281 }
1282
1283 unsigned refs = 0;
1284 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1285 if (prog->shaders[i]) {
1286 simple_mtx_lock(&prog->shaders[i]->lock);
1287 _mesa_set_add(prog->shaders[i]->programs, prog);
1288 simple_mtx_unlock(&prog->shaders[i]->lock);
1289 if (screen->info.have_EXT_shader_object) {
1290 if (!prog->objects[i])
1291 prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
1292 }
1293 refs++;
1294 }
1295 }
1296 /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1297 * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
1298 */
1299 p_atomic_add(&prog->base.reference.count, refs - 1);
1300
1301 for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1302 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1303 _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1304 /* only need first 3/4 for point/line/tri/patch */
1305 if (screen->info.have_EXT_extended_dynamic_state &&
1306 i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1307 break;
1308 }
1309 }
1310
1311 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1312 if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
1313 continue;
1314 int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
1315 prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
1316 prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
1317 /* guarantee a null dsl if previous stages don't have descriptors */
1318 if (prog->shaders[i]->precompile.dsl)
1319 prog->base.num_dsl = idx + 1;
1320 prog->base.dd.bindless |= prog->shaders[i]->bindless;
1321 }
1322 if (prog->base.dd.bindless) {
1323 prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1324 prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1325 }
1326 prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1327
1328 prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1329
1330 if (!screen->info.have_EXT_shader_object) {
1331 VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1332 struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1333 if (!gkey) {
1334 mesa_loge("ZINK: failed to allocate gkey!");
1335 goto fail;
1336 }
1337 gkey->optimal_key = prog->last_variant_hash;
1338 assert(gkey->optimal_key);
1339 gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false);
1340 _mesa_set_add(&prog->libs->libs, gkey);
1341 }
1342
1343 if (!(zink_debug & ZINK_DEBUG_NOOPT))
1344 util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1345
1346 return prog;
1347 fail:
1348 if (prog)
1349 zink_destroy_gfx_program(screen, prog);
1350 return NULL;
1351 }
1352
1353 static void
print_pipeline_stats(struct zink_screen * screen,VkPipeline pipeline,struct util_debug_callback * debug)1354 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug)
1355 {
1356 VkPipelineInfoKHR pinfo = {
1357 VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1358 NULL,
1359 pipeline
1360 };
1361 unsigned exe_count = 0;
1362 VkPipelineExecutablePropertiesKHR props[10] = {0};
1363 for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1364 props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1365 props[i].pNext = NULL;
1366 }
1367 VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1368 VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1369 for (unsigned e = 0; e < exe_count; e++) {
1370 VkPipelineExecutableInfoKHR info = {
1371 VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1372 NULL,
1373 pipeline,
1374 e
1375 };
1376 unsigned count = 0;
1377
1378 struct u_memstream stream;
1379 char *print_buf;
1380 size_t print_buf_sz;
1381
1382 if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) {
1383 mesa_loge("ZINK: failed to open memstream!");
1384 return;
1385 }
1386
1387 FILE *f = u_memstream_get(&stream);
1388 fprintf(f, "%s shader: ", props[e].name);
1389 VkPipelineExecutableStatisticKHR *stats = NULL;
1390 VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1391 stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1392 if (!stats) {
1393 mesa_loge("ZINK: failed to allocate stats!");
1394 return;
1395 }
1396
1397 for (unsigned i = 0; i < count; i++)
1398 stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1399 VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1400
1401 for (unsigned i = 0; i < count; i++) {
1402 if (i)
1403 fprintf(f, ", ");
1404
1405 switch (stats[i].format) {
1406 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1407 fprintf(f, "%u %s", stats[i].value.b32, stats[i].name);
1408 break;
1409 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1410 fprintf(f, "%" PRIi64 " %s", stats[i].value.i64, stats[i].name);
1411 break;
1412 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1413 fprintf(f, "%" PRIu64 " %s", stats[i].value.u64, stats[i].name);
1414 break;
1415 case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
1416 fprintf(f, "%g %s", stats[i].value.f64, stats[i].name);
1417 break;
1418 default:
1419 unreachable("unknown statistic");
1420 }
1421 }
1422
1423 /* print_buf is only valid after flushing. */
1424 fflush(f);
1425 util_debug_message(debug, SHADER_INFO, "%s", print_buf);
1426
1427 u_memstream_close(&stream);
1428 free(print_buf);
1429 }
1430 }
1431
1432 static uint32_t
hash_compute_pipeline_state_local_size(const void * key)1433 hash_compute_pipeline_state_local_size(const void *key)
1434 {
1435 const struct zink_compute_pipeline_state *state = key;
1436 uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1437 hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1438 return hash;
1439 }
1440
1441 static uint32_t
hash_compute_pipeline_state(const void * key)1442 hash_compute_pipeline_state(const void *key)
1443 {
1444 const struct zink_compute_pipeline_state *state = key;
1445 return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1446 }
1447
1448 void
zink_program_update_compute_pipeline_state(struct zink_context * ctx,struct zink_compute_program * comp,const struct pipe_grid_info * info)1449 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info)
1450 {
1451 if (comp->use_local_size) {
1452 for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1453 if (ctx->compute_pipeline_state.local_size[i] != info->block[i])
1454 ctx->compute_pipeline_state.dirty = true;
1455 ctx->compute_pipeline_state.local_size[i] = info->block[i];
1456 }
1457 }
1458 if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) {
1459 ctx->compute_pipeline_state.dirty = true;
1460 ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem;
1461 }
1462 }
1463
1464 static bool
equals_compute_pipeline_state(const void * a,const void * b)1465 equals_compute_pipeline_state(const void *a, const void *b)
1466 {
1467 const struct zink_compute_pipeline_state *sa = a;
1468 const struct zink_compute_pipeline_state *sb = b;
1469 return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1470 sa->module == sb->module;
1471 }
1472
1473 static bool
equals_compute_pipeline_state_local_size(const void * a,const void * b)1474 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1475 {
1476 const struct zink_compute_pipeline_state *sa = a;
1477 const struct zink_compute_pipeline_state *sb = b;
1478 return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1479 !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1480 sa->module == sb->module;
1481 }
1482
1483 static void
precompile_compute_job(void * data,void * gdata,int thread_index)1484 precompile_compute_job(void *data, void *gdata, int thread_index)
1485 {
1486 struct zink_compute_program *comp = data;
1487 struct zink_screen *screen = gdata;
1488
1489 comp->shader = zink_shader_create(screen, comp->nir);
1490 zink_shader_init(screen, comp->shader);
1491 comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1492 assert(comp->module);
1493 comp->module->shobj = false;
1494 comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
1495 /* comp->nir will be freed by zink_shader_compile */
1496 comp->nir = NULL;
1497 assert(comp->module->obj.spirv);
1498 util_dynarray_init(&comp->shader_cache[0], comp);
1499 util_dynarray_init(&comp->shader_cache[1], comp);
1500
1501 struct mesa_blake3 blake3_ctx;
1502 _mesa_blake3_init(&blake3_ctx);
1503 _mesa_blake3_update(&blake3_ctx, comp->shader->blob.data, comp->shader->blob.size);
1504 _mesa_blake3_final(&blake3_ctx, comp->base.blake3);
1505
1506 zink_descriptor_program_init(comp->base.ctx, &comp->base);
1507
1508 zink_screen_get_pipeline_cache(screen, &comp->base, true);
1509 if (comp->base.can_precompile)
1510 comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1511 if (comp->base_pipeline)
1512 zink_screen_update_pipeline_cache(screen, &comp->base, true);
1513 }
1514
1515 static struct zink_compute_program *
create_compute_program(struct zink_context * ctx,nir_shader * nir)1516 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1517 {
1518 struct zink_screen *screen = zink_screen(ctx->base.screen);
1519 struct zink_compute_program *comp = create_program(ctx, true);
1520 if (!comp)
1521 return NULL;
1522 simple_mtx_init(&comp->cache_lock, mtx_plain);
1523 comp->scratch_size = nir->scratch_size;
1524 comp->nir = nir;
1525 comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
1526
1527 comp->use_local_size = !(nir->info.workgroup_size[0] ||
1528 nir->info.workgroup_size[1] ||
1529 nir->info.workgroup_size[2]);
1530 comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem;
1531 comp->base.can_precompile = !comp->use_local_size &&
1532 (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1533 (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1534 _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1535 equals_compute_pipeline_state_local_size :
1536 equals_compute_pipeline_state);
1537
1538 if (zink_debug & (ZINK_DEBUG_NOBGC|ZINK_DEBUG_SHADERDB))
1539 precompile_compute_job(comp, screen, 0);
1540 else
1541 util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1542 precompile_compute_job, NULL, 0);
1543
1544 if (zink_debug & ZINK_DEBUG_SHADERDB) {
1545 print_pipeline_stats(screen, comp->base_pipeline, &ctx->dbg);
1546 }
1547
1548 return comp;
1549 }
1550
1551 bool
zink_program_descriptor_is_buffer(struct zink_context * ctx,gl_shader_stage stage,enum zink_descriptor_type type,unsigned i)1552 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1553 {
1554 struct zink_shader *zs = NULL;
1555 switch (stage) {
1556 case MESA_SHADER_VERTEX:
1557 case MESA_SHADER_TESS_CTRL:
1558 case MESA_SHADER_TESS_EVAL:
1559 case MESA_SHADER_GEOMETRY:
1560 case MESA_SHADER_FRAGMENT:
1561 zs = ctx->gfx_stages[stage];
1562 break;
1563 case MESA_SHADER_COMPUTE: {
1564 zs = ctx->curr_compute->shader;
1565 break;
1566 }
1567 default:
1568 unreachable("unknown shader type");
1569 }
1570 if (!zs)
1571 return false;
1572 return zink_shader_descriptor_is_buffer(zs, type, i);
1573 }
1574
1575 static unsigned
get_num_bindings(struct zink_shader * zs,enum zink_descriptor_type type)1576 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1577 {
1578 switch (type) {
1579 case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1580 return !!zs->has_uniforms;
1581 case ZINK_DESCRIPTOR_TYPE_UBO:
1582 case ZINK_DESCRIPTOR_TYPE_SSBO:
1583 return zs->num_bindings[type];
1584 default:
1585 break;
1586 }
1587 unsigned num_bindings = 0;
1588 for (int i = 0; i < zs->num_bindings[type]; i++)
1589 num_bindings += zs->bindings[type][i].size;
1590 return num_bindings;
1591 }
1592
1593 unsigned
zink_program_num_bindings_typed(const struct zink_program * pg,enum zink_descriptor_type type)1594 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1595 {
1596 unsigned num_bindings = 0;
1597 if (pg->is_compute) {
1598 struct zink_compute_program *comp = (void*)pg;
1599 return get_num_bindings(comp->shader, type);
1600 }
1601 struct zink_gfx_program *prog = (void*)pg;
1602 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1603 if (prog->shaders[i])
1604 num_bindings += get_num_bindings(prog->shaders[i], type);
1605 }
1606 return num_bindings;
1607 }
1608
1609 unsigned
zink_program_num_bindings(const struct zink_program * pg)1610 zink_program_num_bindings(const struct zink_program *pg)
1611 {
1612 unsigned num_bindings = 0;
1613 for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1614 num_bindings += zink_program_num_bindings_typed(pg, i);
1615 return num_bindings;
1616 }
1617
1618 static void
deinit_program(struct zink_screen * screen,struct zink_program * pg)1619 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1620 {
1621 util_queue_fence_wait(&pg->cache_fence);
1622 if (pg->layout)
1623 VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1624
1625 if (pg->pipeline_cache)
1626 VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1627 u_rwlock_destroy(&pg->pipeline_cache_lock);
1628 zink_descriptor_program_deinit(screen, pg);
1629 }
1630
1631 void
zink_destroy_gfx_program(struct zink_screen * screen,struct zink_gfx_program * prog)1632 zink_destroy_gfx_program(struct zink_screen *screen,
1633 struct zink_gfx_program *prog)
1634 {
1635 unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1636 if (screen->info.have_EXT_extended_dynamic_state) {
1637 /* only need first 3/4 for point/line/tri/patch */
1638 if ((prog->stages_present &
1639 (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1640 BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1641 max_idx = 4;
1642 else
1643 max_idx = 3;
1644 max_idx++;
1645 }
1646
1647 if (prog->is_separable)
1648 zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1649 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1650 for (int i = 0; i < max_idx; ++i) {
1651 hash_table_foreach(&prog->pipelines[r][i], entry) {
1652 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1653
1654 util_queue_fence_wait(&pc_entry->fence);
1655 VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1656 VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL);
1657 free(pc_entry);
1658 }
1659 }
1660 }
1661
1662 deinit_program(screen, &prog->base);
1663
1664 for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1665 if (prog->shaders[i]) {
1666 _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1667 prog->shaders[i] = NULL;
1668 }
1669 if (!prog->is_separable) {
1670 destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1671 destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1672 destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1673 destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1674 blob_finish(&prog->blobs[i]);
1675 }
1676 }
1677 if (prog->libs)
1678 zink_gfx_lib_cache_unref(screen, prog->libs);
1679
1680 ralloc_free(prog);
1681 }
1682
1683 void
zink_destroy_compute_program(struct zink_screen * screen,struct zink_compute_program * comp)1684 zink_destroy_compute_program(struct zink_screen *screen,
1685 struct zink_compute_program *comp)
1686 {
1687 deinit_program(screen, &comp->base);
1688
1689 assert(comp->shader);
1690 assert(!comp->shader->spirv);
1691
1692 zink_shader_free(screen, comp->shader);
1693
1694 destroy_shader_cache(screen, &comp->shader_cache[0]);
1695 destroy_shader_cache(screen, &comp->shader_cache[1]);
1696
1697 hash_table_foreach(&comp->pipelines, entry) {
1698 struct compute_pipeline_cache_entry *pc_entry = entry->data;
1699
1700 VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1701 free(pc_entry);
1702 }
1703 VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1704 zink_destroy_shader_module(screen, comp->module);
1705
1706 ralloc_free(comp);
1707 }
1708
1709 ALWAYS_INLINE static bool
compute_can_shortcut(const struct zink_compute_program * comp)1710 compute_can_shortcut(const struct zink_compute_program *comp)
1711 {
1712 return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1713 }
1714
1715 VkPipeline
zink_get_compute_pipeline(struct zink_screen * screen,struct zink_compute_program * comp,struct zink_compute_pipeline_state * state)1716 zink_get_compute_pipeline(struct zink_screen *screen,
1717 struct zink_compute_program *comp,
1718 struct zink_compute_pipeline_state *state)
1719 {
1720 struct hash_entry *entry = NULL;
1721 struct compute_pipeline_cache_entry *cache_entry;
1722
1723 if (!state->dirty && !state->module_changed)
1724 return state->pipeline;
1725 if (state->dirty) {
1726 if (state->pipeline) //avoid on first hash
1727 state->final_hash ^= state->hash;
1728 if (comp->use_local_size)
1729 state->hash = hash_compute_pipeline_state_local_size(state);
1730 else
1731 state->hash = hash_compute_pipeline_state(state);
1732 state->dirty = false;
1733 state->final_hash ^= state->hash;
1734 }
1735
1736 util_queue_fence_wait(&comp->base.cache_fence);
1737 if (comp->base_pipeline && compute_can_shortcut(comp)) {
1738 state->pipeline = comp->base_pipeline;
1739 return state->pipeline;
1740 }
1741 entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1742
1743 if (!entry) {
1744 simple_mtx_lock(&comp->cache_lock);
1745 entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1746 if (entry) {
1747 simple_mtx_unlock(&comp->cache_lock);
1748 goto out;
1749 }
1750 VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1751
1752 if (pipeline == VK_NULL_HANDLE) {
1753 simple_mtx_unlock(&comp->cache_lock);
1754 return VK_NULL_HANDLE;
1755 }
1756
1757 zink_screen_update_pipeline_cache(screen, &comp->base, false);
1758 if (compute_can_shortcut(comp)) {
1759 simple_mtx_unlock(&comp->cache_lock);
1760 /* don't add base pipeline to cache */
1761 state->pipeline = comp->base_pipeline = pipeline;
1762 return state->pipeline;
1763 }
1764
1765 struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1766 if (!pc_entry) {
1767 simple_mtx_unlock(&comp->cache_lock);
1768 return VK_NULL_HANDLE;
1769 }
1770
1771 memcpy(&pc_entry->state, state, sizeof(*state));
1772 pc_entry->pipeline = pipeline;
1773
1774 entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1775 assert(entry);
1776 simple_mtx_unlock(&comp->cache_lock);
1777 }
1778 out:
1779 cache_entry = entry->data;
1780 state->pipeline = cache_entry->pipeline;
1781 return state->pipeline;
1782 }
1783
1784 static void
bind_gfx_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * shader)1785 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1786 {
1787 if (shader && shader->info.num_inlinable_uniforms)
1788 ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1789 else
1790 ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1791
1792 if (ctx->gfx_stages[stage])
1793 ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1794
1795 if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) {
1796 ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1797 ctx->is_generated_gs_bound = false;
1798 }
1799
1800 ctx->gfx_stages[stage] = shader;
1801 ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1802 ctx->gfx_pipeline_state.modules_changed = true;
1803 if (shader) {
1804 ctx->shader_stages |= BITFIELD_BIT(stage);
1805 ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1806 } else {
1807 ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1808 if (ctx->curr_program)
1809 ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1810 ctx->curr_program = NULL;
1811 ctx->shader_stages &= ~BITFIELD_BIT(stage);
1812 }
1813 }
1814
1815 static enum mesa_prim
gs_output_to_reduced_prim_type(struct shader_info * info)1816 gs_output_to_reduced_prim_type(struct shader_info *info)
1817 {
1818 switch (info->gs.output_primitive) {
1819 case MESA_PRIM_POINTS:
1820 return MESA_PRIM_POINTS;
1821
1822 case MESA_PRIM_LINES:
1823 case MESA_PRIM_LINE_LOOP:
1824 case MESA_PRIM_LINE_STRIP:
1825 case MESA_PRIM_LINES_ADJACENCY:
1826 case MESA_PRIM_LINE_STRIP_ADJACENCY:
1827 return MESA_PRIM_LINES;
1828
1829 case MESA_PRIM_TRIANGLES:
1830 case MESA_PRIM_TRIANGLE_STRIP:
1831 case MESA_PRIM_TRIANGLE_FAN:
1832 case MESA_PRIM_TRIANGLES_ADJACENCY:
1833 case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
1834 return MESA_PRIM_TRIANGLES;
1835
1836 default:
1837 unreachable("unexpected output primitive type");
1838 }
1839 }
1840
1841 static enum mesa_prim
update_rast_prim(struct zink_shader * shader)1842 update_rast_prim(struct zink_shader *shader)
1843 {
1844 struct shader_info *info = &shader->info;
1845 if (info->stage == MESA_SHADER_GEOMETRY)
1846 return gs_output_to_reduced_prim_type(info);
1847 else if (info->stage == MESA_SHADER_TESS_EVAL) {
1848 if (info->tess.point_mode)
1849 return MESA_PRIM_POINTS;
1850 else {
1851 switch (info->tess._primitive_mode) {
1852 case TESS_PRIMITIVE_ISOLINES:
1853 return MESA_PRIM_LINES;
1854 case TESS_PRIMITIVE_TRIANGLES:
1855 case TESS_PRIMITIVE_QUADS:
1856 return MESA_PRIM_TRIANGLES;
1857 default:
1858 return MESA_PRIM_COUNT;
1859 }
1860 }
1861 }
1862 return MESA_PRIM_COUNT;
1863 }
1864
1865 static void
unbind_generated_gs(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1866 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1867 {
1868 if (prev_shader->non_fs.is_generated)
1869 ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1870
1871 if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
1872 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
1873 prev_shader) {
1874 bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1875 }
1876 }
1877
1878 static void
bind_last_vertex_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1879 bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1880 {
1881 if (prev_shader && stage < MESA_SHADER_GEOMETRY)
1882 unbind_generated_gs(ctx, stage, prev_shader);
1883
1884 gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
1885 if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1886 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1887 else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1888 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1889 else
1890 ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1891 gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
1892
1893 /* update rast_prim */
1894 ctx->gfx_pipeline_state.shader_rast_prim =
1895 ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1896 MESA_PRIM_COUNT;
1897
1898 if (old != current) {
1899 if (!zink_screen(ctx->base.screen)->optimal_keys) {
1900 if (old != MESA_SHADER_STAGES) {
1901 memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1902 ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1903 } else {
1904 /* always unset vertex shader values when changing to a non-vs last stage */
1905 memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1906 }
1907 }
1908
1909 unsigned num_viewports = ctx->vp_state.num_viewports;
1910 struct zink_screen *screen = zink_screen(ctx->base.screen);
1911 /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1912 if (ctx->last_vertex_stage) {
1913 if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1914 ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1915 else
1916 ctx->vp_state.num_viewports = 1;
1917 } else {
1918 ctx->vp_state.num_viewports = 1;
1919 }
1920 ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1921 if (!screen->info.have_EXT_extended_dynamic_state) {
1922 if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1923 ctx->gfx_pipeline_state.dirty = true;
1924 ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1925 }
1926 ctx->last_vertex_stage_dirty = true;
1927 }
1928 }
1929
1930 static void
zink_bind_vs_state(struct pipe_context * pctx,void * cso)1931 zink_bind_vs_state(struct pipe_context *pctx,
1932 void *cso)
1933 {
1934 struct zink_context *ctx = zink_context(pctx);
1935 if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1936 return;
1937 struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
1938 bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1939 bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
1940 if (cso) {
1941 struct zink_shader *zs = cso;
1942 ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1943 ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1944 } else {
1945 ctx->shader_reads_drawid = false;
1946 ctx->shader_reads_basevertex = false;
1947 }
1948 }
1949
1950 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1951 * in GL, samples==1 means ignore gl_SampleMask[]
1952 * in VK, gl_SampleMask[] is never ignored
1953 */
1954 void
zink_update_fs_key_samples(struct zink_context * ctx)1955 zink_update_fs_key_samples(struct zink_context *ctx)
1956 {
1957 if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1958 return;
1959 if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) {
1960 bool samples = zink_get_fs_base_key(ctx)->samples;
1961 if (samples != (ctx->fb_state.samples > 1))
1962 zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1963 }
1964 }
1965
zink_update_gs_key_rectangular_line(struct zink_context * ctx)1966 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1967 {
1968 bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1969 if (line_rectangular != ctx->rast_state->base.line_rectangular)
1970 zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1971 }
1972
1973 static void
zink_bind_fs_state(struct pipe_context * pctx,void * cso)1974 zink_bind_fs_state(struct pipe_context *pctx,
1975 void *cso)
1976 {
1977 struct zink_context *ctx = zink_context(pctx);
1978 if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1979 return;
1980 if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) {
1981 ctx->saved_fs = cso;
1982 zink_set_null_fs(ctx);
1983 return;
1984 }
1985 bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true;
1986 unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1987 bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1988 ctx->fbfetch_outputs = 0;
1989 if (cso) {
1990 shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1991 bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0;
1992 if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
1993 writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) {
1994 ctx->blend_state_changed = true;
1995 ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C);
1996 }
1997 if (info->fs.uses_fbfetch_output) {
1998 if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
1999 ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
2000 ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
2001 }
2002 zink_update_fs_key_samples(ctx);
2003 if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
2004 if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
2005 ctx->gfx_pipeline_state.dirty = true;
2006 ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
2007 }
2008 zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
2009 if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
2010 !zink_screen(pctx->screen)->driver_compiler_workarounds.needs_zs_shader_swizzle)
2011 zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
2012 if (!ctx->track_renderpasses && !ctx->blitting)
2013 ctx->rp_tc_info_updated = true;
2014 }
2015 zink_update_fbfetch(ctx);
2016 }
2017
2018 static void
zink_bind_gs_state(struct pipe_context * pctx,void * cso)2019 zink_bind_gs_state(struct pipe_context *pctx,
2020 void *cso)
2021 {
2022 struct zink_context *ctx = zink_context(pctx);
2023 if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
2024 return;
2025 bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
2026 bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2027 }
2028
2029 static void
zink_bind_tcs_state(struct pipe_context * pctx,void * cso)2030 zink_bind_tcs_state(struct pipe_context *pctx,
2031 void *cso)
2032 {
2033 bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
2034 }
2035
2036 static void
zink_bind_tes_state(struct pipe_context * pctx,void * cso)2037 zink_bind_tes_state(struct pipe_context *pctx,
2038 void *cso)
2039 {
2040 struct zink_context *ctx = zink_context(pctx);
2041 if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
2042 return;
2043 if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
2044 if (!cso) {
2045 /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
2046 if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
2047 ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
2048 }
2049 }
2050 struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
2051 bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
2052 bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
2053 }
2054
2055 static void *
zink_create_cs_state(struct pipe_context * pctx,const struct pipe_compute_state * shader)2056 zink_create_cs_state(struct pipe_context *pctx,
2057 const struct pipe_compute_state *shader)
2058 {
2059 struct nir_shader *nir;
2060 if (shader->ir_type != PIPE_SHADER_IR_NIR)
2061 nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
2062 else
2063 nir = (struct nir_shader *)shader->prog;
2064
2065 if (nir->info.uses_bindless)
2066 zink_descriptors_init_bindless(zink_context(pctx));
2067
2068 return create_compute_program(zink_context(pctx), nir);
2069 }
2070
2071 static void
zink_bind_cs_state(struct pipe_context * pctx,void * cso)2072 zink_bind_cs_state(struct pipe_context *pctx,
2073 void *cso)
2074 {
2075 struct zink_context *ctx = zink_context(pctx);
2076 struct zink_compute_program *comp = cso;
2077 if (comp && comp->num_inlinable_uniforms)
2078 ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
2079 else
2080 ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
2081
2082 if (ctx->curr_compute) {
2083 zink_batch_reference_program(ctx, &ctx->curr_compute->base);
2084 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2085 ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
2086 ctx->compute_pipeline_state.module_hash = 0;
2087 }
2088 ctx->compute_pipeline_state.dirty = true;
2089 ctx->curr_compute = comp;
2090 if (comp && comp != ctx->curr_compute) {
2091 ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
2092 if (util_queue_fence_is_signalled(&comp->base.cache_fence))
2093 ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod;
2094 ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2095 if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
2096 ctx->compute_dirty = true;
2097 }
2098 zink_select_launch_grid(ctx);
2099 }
2100
2101 static void
zink_get_compute_state_info(struct pipe_context * pctx,void * cso,struct pipe_compute_state_object_info * info)2102 zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info)
2103 {
2104 struct zink_compute_program *comp = cso;
2105 struct zink_screen *screen = zink_screen(pctx->screen);
2106
2107 info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations;
2108 info->private_memory = comp->scratch_size;
2109 if (screen->info.props11.subgroupSize) {
2110 info->preferred_simd_size = screen->info.props11.subgroupSize;
2111 info->simd_sizes = info->preferred_simd_size;
2112 } else {
2113 // just guess it
2114 info->preferred_simd_size = 64;
2115 // only used for actual subgroup support
2116 info->simd_sizes = 0;
2117 }
2118 }
2119
2120 static void
zink_delete_cs_shader_state(struct pipe_context * pctx,void * cso)2121 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
2122 {
2123 struct zink_compute_program *comp = cso;
2124 zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
2125 }
2126
2127 /* caller must lock prog->libs->lock */
2128 struct zink_gfx_library_key *
zink_create_pipeline_lib(struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)2129 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
2130 {
2131 struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
2132 if (!gkey) {
2133 mesa_loge("ZINK: failed to allocate gkey!");
2134 return NULL;
2135 }
2136
2137 gkey->optimal_key = state->optimal_key;
2138 assert(gkey->optimal_key);
2139 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
2140 gkey->modules[i] = prog->objs[i].mod;
2141 gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
2142 _mesa_set_add(&prog->libs->libs, gkey);
2143 return gkey;
2144 }
2145
2146 static const char *
print_exe_stages(VkShaderStageFlags stages)2147 print_exe_stages(VkShaderStageFlags stages)
2148 {
2149 if (stages == VK_SHADER_STAGE_VERTEX_BIT)
2150 return "VS";
2151 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2152 return "VS+GS";
2153 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
2154 return "VS+TCS+TES";
2155 if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2156 return "VS+TCS+TES+GS";
2157 if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2158 return "TCS";
2159 if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2160 return "TES";
2161 if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
2162 return "GS";
2163 if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
2164 return "FS";
2165 if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
2166 return "CS";
2167 unreachable("unhandled combination of stages!");
2168 }
2169
2170 static void
gfx_program_precompile_job(void * data,void * gdata,int thread_index)2171 gfx_program_precompile_job(void *data, void *gdata, int thread_index)
2172 {
2173 struct zink_screen *screen = gdata;
2174 struct zink_gfx_program *prog = data;
2175
2176 /* this is threadsafe */
2177 gfx_program_init(prog->base.ctx, prog);
2178
2179 struct zink_gfx_pipeline_state state = {0};
2180 state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2181 state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2182 state.optimal_key = state.shader_keys_optimal.key.val;
2183 generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2184 zink_screen_get_pipeline_cache(screen, &prog->base, true);
2185 if (!screen->info.have_EXT_shader_object) {
2186 simple_mtx_lock(&prog->libs->lock);
2187 zink_create_pipeline_lib(screen, prog, &state);
2188 simple_mtx_unlock(&prog->libs->lock);
2189 }
2190 zink_screen_update_pipeline_cache(screen, &prog->base, true);
2191 }
2192
2193 static void
zink_link_gfx_shader(struct pipe_context * pctx,void ** shaders)2194 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2195 {
2196 struct zink_context *ctx = zink_context(pctx);
2197 struct zink_shader **zshaders = (struct zink_shader **)shaders;
2198 if (shaders[MESA_SHADER_COMPUTE])
2199 return;
2200 /* explicitly block sample shading: this needs full pipelines always */
2201 if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
2202 return;
2203 /* can't precompile fixedfunc */
2204 if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2205 /* handled directly from shader create */
2206 return;
2207 }
2208 unsigned hash = 0;
2209 unsigned shader_stages = 0;
2210 for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2211 if (zshaders[i]) {
2212 hash ^= zshaders[i]->hash;
2213 shader_stages |= BITFIELD_BIT(i);
2214 }
2215 }
2216 unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2217 unsigned tess = shader_stages & tess_stages;
2218 /* can't do fixedfunc tes either */
2219 if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2220 return;
2221 struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2222 simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2223 /* link can be called repeatedly with the same shaders: ignore */
2224 if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2225 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2226 return;
2227 }
2228 struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash);
2229 u_foreach_bit(i, shader_stages)
2230 assert(prog->shaders[i]);
2231 _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2232 prog->base.removed = false;
2233 simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2234 if (zink_debug & ZINK_DEBUG_SHADERDB) {
2235 struct zink_screen *screen = zink_screen(pctx->screen);
2236 gfx_program_init(ctx, prog);
2237 if (screen->optimal_keys)
2238 generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
2239 else
2240 generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state);
2241 VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state,
2242 ctx->gfx_pipeline_state.element_state->binding_map,
2243 shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2244 print_pipeline_stats(screen, pipeline, &ctx->dbg);
2245 VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL);
2246 } else {
2247 if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
2248 prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
2249 if (zink_debug & ZINK_DEBUG_NOBGC)
2250 gfx_program_precompile_job(prog, pctx->screen, 0);
2251 else
2252 util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, gfx_program_precompile_job, NULL, 0);
2253 }
2254 }
2255
2256 void
zink_delete_shader_state(struct pipe_context * pctx,void * cso)2257 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
2258 {
2259 zink_gfx_shader_free(zink_screen(pctx->screen), cso);
2260 }
2261
2262 static void
precompile_separate_shader(struct zink_shader * zs,struct zink_screen * screen)2263 precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen)
2264 {
2265 zs->precompile.obj = zink_shader_compile_separate(screen, zs);
2266 if (!screen->info.have_EXT_shader_object) {
2267 struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0};
2268 objs[zs->info.stage].mod = zs->precompile.obj.mod;
2269 zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage);
2270 }
2271 }
2272
2273 static void
gfx_shader_init_job(void * data,void * gdata,int thread_index)2274 gfx_shader_init_job(void *data, void *gdata, int thread_index)
2275 {
2276 struct zink_screen *screen = gdata;
2277 struct zink_shader *zs = data;
2278
2279 zink_shader_init(screen, zs);
2280
2281 if (zink_debug & ZINK_DEBUG_NOPC) {
2282 ralloc_free(zs->nir);
2283 zs->nir = NULL;
2284 return;
2285 }
2286 if (zs->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2287 (screen->info.have_EXT_shader_object ||
2288 (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX)))) {
2289 /* sample shading can't precompile */
2290 if (zs->info.stage != MESA_SHADER_FRAGMENT || !zs->info.fs.uses_sample_shading)
2291 precompile_separate_shader(zs, screen);
2292 }
2293 ralloc_free(zs->nir);
2294 zs->nir = NULL;
2295 }
2296
2297 void *
zink_create_gfx_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2298 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2299 {
2300 struct zink_screen *screen = zink_screen(pctx->screen);
2301 nir_shader *nir;
2302 if (shader->type != PIPE_SHADER_IR_NIR)
2303 nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
2304 else
2305 nir = (struct nir_shader *)shader->ir.nir;
2306
2307 if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
2308 zink_descriptor_util_init_fbfetch(zink_context(pctx));
2309 if (nir->info.uses_bindless)
2310 zink_descriptors_init_bindless(zink_context(pctx));
2311
2312 struct zink_shader *zs = zink_shader_create(zink_screen(pctx->screen), nir);
2313 if (zink_debug & ZINK_DEBUG_NOBGC)
2314 gfx_shader_init_job(zs, screen, 0);
2315 else
2316 util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, gfx_shader_init_job, NULL, 0);
2317
2318 return zs;
2319 }
2320
2321 static void
zink_delete_cached_shader_state(struct pipe_context * pctx,void * cso)2322 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
2323 {
2324 struct zink_screen *screen = zink_screen(pctx->screen);
2325 util_shader_reference(pctx, &screen->shaders, &cso, NULL);
2326 }
2327
2328 static void *
zink_create_cached_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2329 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2330 {
2331 bool cache_hit;
2332 struct zink_screen *screen = zink_screen(pctx->screen);
2333 return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
2334 }
2335
2336
2337 void
zink_program_init(struct zink_context * ctx)2338 zink_program_init(struct zink_context *ctx)
2339 {
2340 ctx->base.create_vs_state = zink_create_cached_shader_state;
2341 ctx->base.bind_vs_state = zink_bind_vs_state;
2342 ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2343
2344 ctx->base.create_fs_state = zink_create_cached_shader_state;
2345 ctx->base.bind_fs_state = zink_bind_fs_state;
2346 ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2347
2348 ctx->base.create_gs_state = zink_create_cached_shader_state;
2349 ctx->base.bind_gs_state = zink_bind_gs_state;
2350 ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2351
2352 ctx->base.create_tcs_state = zink_create_cached_shader_state;
2353 ctx->base.bind_tcs_state = zink_bind_tcs_state;
2354 ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2355
2356 ctx->base.create_tes_state = zink_create_cached_shader_state;
2357 ctx->base.bind_tes_state = zink_bind_tes_state;
2358 ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2359
2360 ctx->base.create_compute_state = zink_create_cs_state;
2361 ctx->base.bind_compute_state = zink_bind_cs_state;
2362 ctx->base.get_compute_state_info = zink_get_compute_state_info;
2363 ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2364
2365 if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2366 _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2367 else
2368 _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2369 if (zink_screen(ctx->base.screen)->have_full_ds3)
2370 _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2371 else
2372 _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2373 /* validate struct packing */
2374 STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2375 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2376 offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2377 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2378 offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2379 STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2380 offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2381
2382 STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2383
2384 /* no precompile at all */
2385 if (zink_debug & ZINK_DEBUG_NOPC)
2386 return;
2387
2388 struct zink_screen *screen = zink_screen(ctx->base.screen);
2389 if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
2390 ctx->base.link_shader = zink_link_gfx_shader;
2391 }
2392
2393 bool
zink_set_rasterizer_discard(struct zink_context * ctx,bool disable)2394 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2395 {
2396 bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2397 bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2398 ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2399 if (!changed)
2400 return false;
2401 if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2402 ctx->gfx_pipeline_state.dirty |= true;
2403 ctx->rasterizer_discard_changed = true;
2404 return true;
2405 }
2406
2407 void
zink_driver_thread_add_job(struct pipe_screen * pscreen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)2408 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2409 struct util_queue_fence *fence,
2410 pipe_driver_thread_func execute,
2411 pipe_driver_thread_func cleanup,
2412 const size_t job_size)
2413 {
2414 struct zink_screen *screen = zink_screen(pscreen);
2415 util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2416 }
2417
2418 static bool
has_edge_flags(struct zink_context * ctx)2419 has_edge_flags(struct zink_context *ctx)
2420 {
2421 switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2422 case MESA_PRIM_POINTS:
2423 case MESA_PRIM_LINE_STRIP:
2424 case MESA_PRIM_LINE_STRIP_ADJACENCY:
2425 case MESA_PRIM_LINES:
2426 case MESA_PRIM_LINE_LOOP:
2427 case MESA_PRIM_LINES_ADJACENCY:
2428 case MESA_PRIM_TRIANGLE_STRIP:
2429 case MESA_PRIM_TRIANGLE_FAN:
2430 case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
2431 case MESA_PRIM_QUAD_STRIP:
2432 case MESA_PRIM_PATCHES:
2433 return false;
2434 case MESA_PRIM_TRIANGLES:
2435 case MESA_PRIM_TRIANGLES_ADJACENCY:
2436 case MESA_PRIM_QUADS:
2437 case MESA_PRIM_POLYGON:
2438 case MESA_PRIM_COUNT:
2439 default:
2440 break;
2441 }
2442 return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES ||
2443 ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) &&
2444 ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2445 }
2446
2447 static enum zink_rast_prim
zink_rast_prim_for_pipe(enum mesa_prim prim)2448 zink_rast_prim_for_pipe(enum mesa_prim prim)
2449 {
2450 switch (prim) {
2451 case MESA_PRIM_POINTS:
2452 return ZINK_PRIM_POINTS;
2453 case MESA_PRIM_LINES:
2454 return ZINK_PRIM_LINES;
2455 case MESA_PRIM_TRIANGLES:
2456 default:
2457 return ZINK_PRIM_TRIANGLES;
2458 }
2459 }
2460
2461 static enum mesa_prim
zink_tess_prim_type(struct zink_shader * tess)2462 zink_tess_prim_type(struct zink_shader *tess)
2463 {
2464 if (tess->info.tess.point_mode)
2465 return MESA_PRIM_POINTS;
2466 else {
2467 switch (tess->info.tess._primitive_mode) {
2468 case TESS_PRIMITIVE_ISOLINES:
2469 return MESA_PRIM_LINES;
2470 case TESS_PRIMITIVE_TRIANGLES:
2471 case TESS_PRIMITIVE_QUADS:
2472 return MESA_PRIM_TRIANGLES;
2473 default:
2474 return MESA_PRIM_COUNT;
2475 }
2476 }
2477 }
2478
2479 static inline void
zink_add_inline_uniform(nir_shader * shader,int offset)2480 zink_add_inline_uniform(nir_shader *shader, int offset)
2481 {
2482 shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
2483 ++shader->info.num_inlinable_uniforms;
2484 }
2485
2486 static unsigned
encode_lower_pv_mode(enum mesa_prim prim_type)2487 encode_lower_pv_mode(enum mesa_prim prim_type)
2488 {
2489 switch (prim_type) {
2490 case MESA_PRIM_TRIANGLE_STRIP:
2491 case MESA_PRIM_QUAD_STRIP:
2492 return ZINK_PVE_PRIMITIVE_TRISTRIP;
2493 case MESA_PRIM_TRIANGLE_FAN:
2494 return ZINK_PVE_PRIMITIVE_FAN;
2495 default:
2496 return ZINK_PVE_PRIMITIVE_SIMPLE;
2497 }
2498 }
2499
2500 void
zink_set_primitive_emulation_keys(struct zink_context * ctx)2501 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2502 {
2503 struct zink_screen *screen = zink_screen(ctx->base.screen);
2504 bool lower_line_stipple = false, lower_line_smooth = false;
2505 unsigned lower_pv_mode = 0;
2506 if (!screen->optimal_keys) {
2507 lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2508 screen->driver_workarounds.no_linestipple &&
2509 ctx->rast_state->base.line_stipple_enable &&
2510 !ctx->num_so_targets;
2511
2512 bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS &&
2513 screen->driconf.emulate_point_smooth &&
2514 ctx->rast_state->base.point_smooth;
2515 if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2516 assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2517 zink_get_fs_key(ctx)->lower_line_stipple);
2518 zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2519 zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2520 }
2521
2522 lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2523 screen->driver_workarounds.no_linesmooth &&
2524 ctx->rast_state->base.line_smooth &&
2525 !ctx->num_so_targets;
2526
2527 if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2528 assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2529 zink_get_fs_key(ctx)->lower_line_smooth);
2530 zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2531 zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2532 }
2533
2534 if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2535 zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2536 }
2537
2538 lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
2539 !screen->info.have_EXT_provoking_vertex;
2540 if (lower_pv_mode)
2541 lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
2542
2543 if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
2544 zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
2545 }
2546
2547 bool lower_edge_flags = has_edge_flags(ctx);
2548
2549 bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS;
2550
2551 bool lower_filled_quad = lower_quad_prim &&
2552 ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES;
2553
2554 if (lower_line_stipple || lower_line_smooth ||
2555 lower_edge_flags || lower_quad_prim ||
2556 lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
2557 enum pipe_shader_type prev_vertex_stage =
2558 ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2559 MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2560 enum zink_rast_prim zink_prim_type =
2561 zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
2562
2563 //when using transform feedback primitives must be tessellated
2564 lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
2565
2566 if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated &&
2567 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2568
2569 if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
2570 util_queue_fence_wait(&ctx->gfx_stages[prev_vertex_stage]->precompile.fence);
2571 nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
2572 nir_shader *nir;
2573 if (lower_filled_quad) {
2574 nir = zink_create_quads_emulation_gs(
2575 &screen->nir_options,
2576 prev_stage);
2577 } else {
2578 enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode;
2579 if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
2580 prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
2581 nir = nir_create_passthrough_gs(
2582 &screen->nir_options,
2583 prev_stage,
2584 prim,
2585 ctx->gfx_pipeline_state.rast_prim,
2586 lower_edge_flags,
2587 lower_line_stipple || lower_quad_prim);
2588 }
2589 zink_lower_system_values_to_inlined_uniforms(nir);
2590
2591 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
2592 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
2593 zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
2594 ralloc_free(prev_stage);
2595 struct zink_shader *shader = zink_shader_create(screen, nir);
2596 zink_shader_init(screen, shader);
2597 shader->needs_inlining = true;
2598 ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
2599 shader->non_fs.is_generated = true;
2600 shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
2601 shader->can_inline = true;
2602 memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride));
2603 }
2604
2605 ctx->base.bind_gs_state(&ctx->base,
2606 ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
2607 ctx->is_generated_gs_bound = true;
2608 }
2609
2610 ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
2611 (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
2612 ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
2613 ctx->gfx_pipeline_state.dyn_state3.pv_last});
2614 } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2615 ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2616 ctx->base.bind_gs_state(&ctx->base, NULL);
2617 }
2618