1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "spirv/nir_spirv.h"
27
28 #include "dxil_nir.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_spirv_nir.h"
31 #include "spirv_to_dxil.h"
32
33 #include "dxil_validator.h"
34
35 #include "vk_alloc.h"
36 #include "vk_util.h"
37 #include "vk_format.h"
38 #include "vk_pipeline.h"
39 #include "vk_pipeline_cache.h"
40
41 #include "util/u_debug.h"
42
43 #define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
44 __type *__desc; \
45 do { \
46 struct { \
47 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
48 __type desc; \
49 } *__wrapper; \
50 (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
51 __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
52 (__stream)->SizeInBytes += sizeof(*__wrapper); \
53 assert((__stream)->SizeInBytes <= __maxstreamsz); \
54 __wrapper->type = __id; \
55 __desc = &__wrapper->desc; \
56 memset(__desc, 0, sizeof(*__desc)); \
57 } while (0)
58
59 #define d3d12_pipeline_state_stream_new_desc_abbrev(__stream, __maxstreamsz, __id, __type, __desc) \
60 d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id, __type, __desc)
61
62 #define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
63 d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
64
65 #define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
66 d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
67
68 static bool
gfx_pipeline_variant_key_equal(const void * a,const void * b)69 gfx_pipeline_variant_key_equal(const void *a, const void *b)
70 {
71 return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
72 }
73
74 static uint32_t
gfx_pipeline_variant_key_hash(const void * key)75 gfx_pipeline_variant_key_hash(const void *key)
76 {
77 return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
78 }
79
80 static bool
gfx_pipeline_cmd_signature_key_equal(const void * a,const void * b)81 gfx_pipeline_cmd_signature_key_equal(const void *a, const void *b)
82 {
83 return !memcmp(a, b, sizeof(struct dzn_indirect_draw_cmd_sig_key));
84 }
85
86 static uint32_t
gfx_pipeline_cmd_signature_key_hash(const void * key)87 gfx_pipeline_cmd_signature_key_hash(const void *key)
88 {
89 return _mesa_hash_data(key, sizeof(struct dzn_indirect_draw_cmd_sig_key));
90 }
91
92 struct dzn_cached_blob {
93 struct vk_pipeline_cache_object base;
94 uint8_t hash[SHA1_DIGEST_LENGTH];
95 const void *data;
96 size_t size;
97 };
98
99 static bool
dzn_cached_blob_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)100 dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
101 struct blob *blob)
102 {
103 struct dzn_cached_blob *cached_blob =
104 container_of(object, struct dzn_cached_blob, base);
105
106 blob_write_bytes(blob, cached_blob->data, cached_blob->size);
107 return true;
108 }
109
110 static void
dzn_cached_blob_destroy(struct vk_device * device,struct vk_pipeline_cache_object * object)111 dzn_cached_blob_destroy(struct vk_device *device,
112 struct vk_pipeline_cache_object *object)
113 {
114 struct dzn_cached_blob *shader =
115 container_of(object, struct dzn_cached_blob, base);
116
117 vk_free(&device->alloc, shader);
118 }
119
120 static struct vk_pipeline_cache_object *
121 dzn_cached_blob_create(struct vk_device *device,
122 const void *hash,
123 const void *data,
124 size_t data_size);
125
126 static struct vk_pipeline_cache_object *
dzn_cached_blob_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)127 dzn_cached_blob_deserialize(struct vk_pipeline_cache *cache,
128 const void *key_data, size_t key_size,
129 struct blob_reader *blob)
130 {
131 size_t data_size = blob->end - blob->current;
132 assert(key_size == SHA1_DIGEST_LENGTH);
133
134 return dzn_cached_blob_create(cache->base.device, key_data,
135 blob_read_bytes(blob, data_size), data_size);
136 }
137
138 const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
139 .serialize = dzn_cached_blob_serialize,
140 .deserialize = dzn_cached_blob_deserialize,
141 .destroy = dzn_cached_blob_destroy,
142 };
143
144
145 static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device * device,const void * hash,const void * data,size_t data_size)146 dzn_cached_blob_create(struct vk_device *device,
147 const void *hash,
148 const void *data,
149 size_t data_size)
150 {
151 VK_MULTIALLOC(ma);
152 VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
153 VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);
154
155 if (!vk_multialloc_alloc(&ma, &device->alloc,
156 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
157 return NULL;
158
159 memcpy(blob->hash, hash, sizeof(blob->hash));
160
161 vk_pipeline_cache_object_init(device, &blob->base,
162 &dzn_cached_blob_ops,
163 blob->hash, sizeof(blob->hash));
164
165 if (data)
166 memcpy(copy, data, data_size);
167 blob->data = copy;
168 blob->size = data_size;
169
170 return &blob->base;
171 }
172
173 static VkResult
dzn_graphics_pipeline_prepare_for_variants(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline)174 dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
175 struct dzn_graphics_pipeline *pipeline)
176 {
177 if (pipeline->variants)
178 return VK_SUCCESS;
179
180 pipeline->variants =
181 _mesa_hash_table_create(NULL,
182 gfx_pipeline_variant_key_hash,
183 gfx_pipeline_variant_key_equal);
184 if (!pipeline->variants)
185 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
186
187 return VK_SUCCESS;
188 }
189
190 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)191 to_dxil_shader_stage(VkShaderStageFlagBits in)
192 {
193 switch (in) {
194 case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
195 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
196 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
197 case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
198 case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
199 case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
200 default: unreachable("Unsupported stage");
201 }
202 }
203
204 struct dzn_nir_options {
205 enum dxil_spirv_yz_flip_mode yz_flip_mode;
206 uint16_t y_flip_mask, z_flip_mask;
207 bool force_sample_rate_shading;
208 bool lower_view_index;
209 bool lower_view_index_to_rt_layer;
210 enum pipe_format *vi_conversions;
211 const nir_shader_compiler_options *nir_opts;
212 };
213
214 static VkResult
dzn_pipeline_get_nir_shader(struct dzn_device * device,const struct dzn_pipeline_layout * layout,struct vk_pipeline_cache * cache,const uint8_t * hash,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage,const struct dzn_nir_options * options,struct dxil_spirv_metadata * metadata,nir_shader ** nir)215 dzn_pipeline_get_nir_shader(struct dzn_device *device,
216 const struct dzn_pipeline_layout *layout,
217 struct vk_pipeline_cache *cache,
218 const uint8_t *hash,
219 VkPipelineCreateFlags2KHR pipeline_flags,
220 const VkPipelineShaderStageCreateInfo *stage_info,
221 gl_shader_stage stage,
222 const struct dzn_nir_options *options,
223 struct dxil_spirv_metadata *metadata,
224 nir_shader **nir)
225 {
226 if (cache) {
227 *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
228 options->nir_opts, NULL, NULL);
229 if (*nir) {
230 /* This bit is explicitly added into the info before caching, since this sysval wouldn't
231 * actually be present for this bit to be set by info gathering. */
232 if ((*nir)->info.stage == MESA_SHADER_VERTEX &&
233 BITSET_TEST((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX))
234 metadata->needs_draw_sysvals = true;
235 return VK_SUCCESS;
236 }
237 }
238
239 struct dzn_physical_device *pdev =
240 container_of(device->vk.physical, struct dzn_physical_device, vk);
241 const struct spirv_to_nir_options *spirv_opts = dxil_spirv_nir_get_spirv_options();
242
243 VkResult result =
244 vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
245 spirv_opts, options->nir_opts, NULL, nir);
246 if (result != VK_SUCCESS)
247 return result;
248
249 struct dxil_spirv_runtime_conf conf = {
250 .runtime_data_cbv = {
251 .register_space = DZN_REGISTER_SPACE_SYSVALS,
252 .base_shader_register = 0,
253 },
254 .push_constant_cbv = {
255 .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
256 .base_shader_register = 0,
257 },
258 .first_vertex_and_base_instance_mode = pdev->options21.ExtendedCommandInfoSupported ?
259 DXIL_SPIRV_SYSVAL_TYPE_NATIVE : DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
260 .workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
261 .yz_flip = {
262 .mode = options->yz_flip_mode,
263 .y_mask = options->y_flip_mask,
264 .z_mask = options->z_flip_mask,
265 },
266 .declared_read_only_images_as_srvs = !device->bindless,
267 .inferred_read_only_images_as_srvs = !device->bindless,
268 .force_sample_rate_shading = options->force_sample_rate_shading,
269 .lower_view_index = options->lower_view_index,
270 .lower_view_index_to_rt_layer = options->lower_view_index_to_rt_layer,
271 .shader_model_max = dzn_get_shader_model(pdev),
272 };
273
274 dxil_spirv_nir_passes(*nir, &conf, metadata);
275
276 if (stage == MESA_SHADER_VERTEX) {
277 bool needs_conv = false;
278 for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
279 if (options->vi_conversions[i] != PIPE_FORMAT_NONE)
280 needs_conv = true;
281 }
282
283 if (needs_conv)
284 NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, options->vi_conversions);
285 }
286
287 if (cache) {
288 /* Cache this additional metadata */
289 if (metadata->needs_draw_sysvals)
290 BITSET_SET((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX);
291 vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
292 }
293
294 return VK_SUCCESS;
295 }
296
297 static bool
adjust_resource_index_binding(struct nir_builder * builder,nir_intrinsic_instr * intrin,void * cb_data)298 adjust_resource_index_binding(struct nir_builder *builder,
299 nir_intrinsic_instr *intrin,
300 void *cb_data)
301 {
302 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
303 return false;
304
305 const struct dzn_pipeline_layout *layout = cb_data;
306 unsigned set = nir_intrinsic_desc_set(intrin);
307 unsigned binding = nir_intrinsic_binding(intrin);
308
309 if (set >= layout->set_count ||
310 binding >= layout->binding_translation[set].binding_count)
311 return false;
312
313 binding = layout->binding_translation[set].base_reg[binding];
314 nir_intrinsic_set_binding(intrin, binding);
315
316 return true;
317 }
318
319 static void
adjust_to_bindless_cb(struct dxil_spirv_binding_remapping * inout,void * context)320 adjust_to_bindless_cb(struct dxil_spirv_binding_remapping *inout, void *context)
321 {
322 const struct dzn_pipeline_layout *layout = context;
323 assert(inout->descriptor_set < layout->set_count);
324 uint32_t new_binding = layout->binding_translation[inout->descriptor_set].base_reg[inout->binding];
325 switch (layout->binding_translation[inout->descriptor_set].binding_class[inout->binding]) {
326 case DZN_PIPELINE_BINDING_DYNAMIC_BUFFER:
327 inout->descriptor_set = layout->set_count;
328 FALLTHROUGH;
329 case DZN_PIPELINE_BINDING_STATIC_SAMPLER:
330 if (inout->is_sampler) {
331 inout->descriptor_set = ~0;
332 break;
333 }
334 FALLTHROUGH;
335 case DZN_PIPELINE_BINDING_NORMAL:
336 inout->binding = new_binding;
337 break;
338 default:
339 unreachable("Invalid binding type");
340 }
341 }
342
343 static bool
adjust_var_bindings(nir_shader * shader,struct dzn_device * device,const struct dzn_pipeline_layout * layout,uint8_t * bindings_hash)344 adjust_var_bindings(nir_shader *shader,
345 struct dzn_device *device,
346 const struct dzn_pipeline_layout *layout,
347 uint8_t *bindings_hash)
348 {
349 uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
350 struct mesa_sha1 bindings_hash_ctx;
351
352 if (bindings_hash)
353 _mesa_sha1_init(&bindings_hash_ctx);
354
355 nir_foreach_variable_with_modes(var, shader, modes) {
356 if (var->data.mode == nir_var_uniform) {
357 const struct glsl_type *type = glsl_without_array(var->type);
358
359 if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
360 continue;
361 }
362
363 unsigned s = var->data.descriptor_set, b = var->data.binding;
364
365 if (s >= layout->set_count)
366 continue;
367
368 assert(b < layout->binding_translation[s].binding_count);
369 if (!device->bindless)
370 var->data.binding = layout->binding_translation[s].base_reg[b];
371
372 if (bindings_hash) {
373 _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
374 _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
375 _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
376 }
377 }
378
379 if (bindings_hash)
380 _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);
381
382 if (device->bindless) {
383 struct dxil_spirv_nir_lower_bindless_options options = {
384 .dynamic_buffer_binding = layout->dynamic_buffer_count ? layout->set_count : ~0,
385 .num_descriptor_sets = layout->set_count,
386 .callback_context = (void *)layout,
387 .remap_binding = adjust_to_bindless_cb
388 };
389 bool ret = dxil_spirv_nir_lower_bindless(shader, &options);
390 /* We skipped remapping variable bindings in the hashing loop, but if there's static
391 * samplers still declared, we need to remap those now. */
392 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
393 assert(glsl_type_is_sampler(glsl_without_array(var->type)));
394 var->data.binding = layout->binding_translation[var->data.descriptor_set].base_reg[var->data.binding];
395 }
396 return ret;
397 } else {
398 return nir_shader_intrinsics_pass(shader, adjust_resource_index_binding,
399 nir_metadata_all, (void *)layout);
400 }
401 }
402
403 enum dxil_shader_model
dzn_get_shader_model(const struct dzn_physical_device * pdev)404 dzn_get_shader_model(const struct dzn_physical_device *pdev)
405 {
406 static_assert(D3D_SHADER_MODEL_6_0 == 0x60 && SHADER_MODEL_6_0 == 0x60000, "Validating math below");
407 static_assert(D3D_SHADER_MODEL_6_8 == 0x68 && SHADER_MODEL_6_8 == 0x60008, "Validating math below");
408 return ((pdev->shader_model & 0xf0) << 12) | (pdev->shader_model & 0xf);
409 }
410
411 static VkResult
dzn_pipeline_compile_shader(struct dzn_device * device,nir_shader * nir,uint32_t input_clip_size,D3D12_SHADER_BYTECODE * slot)412 dzn_pipeline_compile_shader(struct dzn_device *device,
413 nir_shader *nir,
414 uint32_t input_clip_size,
415 D3D12_SHADER_BYTECODE *slot)
416 {
417 struct dzn_instance *instance =
418 container_of(device->vk.physical->instance, struct dzn_instance, vk);
419 struct dzn_physical_device *pdev =
420 container_of(device->vk.physical, struct dzn_physical_device, vk);
421 struct nir_to_dxil_options opts = {
422 .environment = DXIL_ENVIRONMENT_VULKAN,
423 .lower_int16 = !pdev->options4.Native16BitShaderOpsSupported &&
424 /* Don't lower 16-bit types if they can only come from min-precision */
425 (device->vk.enabled_extensions.KHR_shader_float16_int8 ||
426 device->vk.enabled_features.shaderFloat16 ||
427 device->vk.enabled_features.shaderInt16),
428 .shader_model_max = dzn_get_shader_model(pdev),
429 .input_clip_size = input_clip_size,
430 #ifdef _WIN32
431 .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
432 #endif
433 };
434 struct blob dxil_blob;
435 VkResult result = VK_SUCCESS;
436
437 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
438 if (instance->debug_flags & DZN_DEBUG_NIR)
439 nir_print_shader(nir, stderr);
440
441 if (nir_to_dxil(nir, &opts, NULL, &dxil_blob)) {
442 blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
443 (size_t *)&slot->BytecodeLength);
444 } else {
445 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
446 }
447
448 if (dxil_blob.allocated)
449 blob_finish(&dxil_blob);
450
451 if (result != VK_SUCCESS)
452 return result;
453
454 #ifdef _WIN32
455 char *err;
456 bool res = dxil_validate_module(instance->dxil_validator,
457 (void *)slot->pShaderBytecode,
458 slot->BytecodeLength, &err);
459
460 if (instance->debug_flags & DZN_DEBUG_DXIL) {
461 char *disasm = dxil_disasm_module(instance->dxil_validator,
462 (void *)slot->pShaderBytecode,
463 slot->BytecodeLength);
464 if (disasm) {
465 fprintf(stderr,
466 "== BEGIN SHADER ============================================\n"
467 "%s\n"
468 "== END SHADER ==============================================\n",
469 disasm);
470 ralloc_free(disasm);
471 }
472 }
473
474 if (!res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
475 if (err) {
476 mesa_loge(
477 "== VALIDATION ERROR =============================================\n"
478 "%s\n"
479 "== END ==========================================================\n",
480 err);
481 ralloc_free(err);
482 }
483 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
484 }
485 #endif
486
487 return VK_SUCCESS;
488 }
489
490 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC * stream,gl_shader_stage in)491 dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
492 gl_shader_stage in)
493 {
494 switch (in) {
495 case MESA_SHADER_VERTEX: {
496 d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
497 return desc;
498 }
499 case MESA_SHADER_TESS_CTRL: {
500 d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
501 return desc;
502 }
503 case MESA_SHADER_TESS_EVAL: {
504 d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
505 return desc;
506 }
507 case MESA_SHADER_GEOMETRY: {
508 d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
509 return desc;
510 }
511 case MESA_SHADER_FRAGMENT: {
512 d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
513 return desc;
514 }
515 default: unreachable("Unsupported stage");
516 }
517 }
518
519 struct dzn_cached_dxil_shader_header {
520 gl_shader_stage stage;
521 size_t size;
522 uint8_t data[0];
523 };
524
525 static VkResult
dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage * stage,D3D12_SHADER_BYTECODE * bc)526 dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
527 const uint8_t *dxil_hash,
528 gl_shader_stage *stage,
529 D3D12_SHADER_BYTECODE *bc)
530 {
531 *stage = MESA_SHADER_NONE;
532
533 if (!cache)
534 return VK_SUCCESS;
535
536 struct vk_pipeline_cache_object *cache_obj = NULL;
537
538 cache_obj =
539 vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
540 &dzn_cached_blob_ops,
541 NULL);
542 if (!cache_obj)
543 return VK_SUCCESS;
544
545 struct dzn_cached_blob *cached_blob =
546 container_of(cache_obj, struct dzn_cached_blob, base);
547 VkResult ret = VK_SUCCESS;
548
549 assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);
550
551 const struct dzn_cached_dxil_shader_header *info =
552 (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
553
554 assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
555 assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
556 assert(info->size > 0);
557
558 void *code = malloc(info->size);
559 if (!code) {
560 ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
561 goto out;
562 }
563
564 memcpy(code, info->data, info->size);
565
566 bc->pShaderBytecode = code;
567 bc->BytecodeLength = info->size;
568 *stage = info->stage;
569
570 out:
571 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
572 return ret;
573 }
574
575 static void
dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage stage,const D3D12_SHADER_BYTECODE * bc)576 dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
577 const uint8_t *dxil_hash,
578 gl_shader_stage stage,
579 const D3D12_SHADER_BYTECODE *bc)
580 {
581 size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
582 bc->BytecodeLength;
583
584 struct vk_pipeline_cache_object *cache_obj =
585 dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
586 if (!cache_obj)
587 return;
588
589 struct dzn_cached_blob *cached_blob =
590 container_of(cache_obj, struct dzn_cached_blob, base);
591 struct dzn_cached_dxil_shader_header *info =
592 (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
593 info->stage = stage;
594 info->size = bc->BytecodeLength;
595 memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);
596
597 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
598 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
599 }
600
601 struct dzn_cached_gfx_pipeline_header {
602 uint32_t stages : 30;
603 uint32_t needs_draw_sysvals : 1;
604 uint32_t rast_disabled_from_missing_position : 1;
605 uint32_t input_count;
606 };
607
608 static VkResult
dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const uint8_t * pipeline_hash,bool * cache_hit)609 dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
610 struct vk_pipeline_cache *cache,
611 const uint8_t *pipeline_hash,
612 bool *cache_hit)
613 {
614 *cache_hit = false;
615
616 if (!cache)
617 return VK_SUCCESS;
618
619 struct vk_pipeline_cache_object *cache_obj = NULL;
620
621 cache_obj =
622 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
623 &dzn_cached_blob_ops,
624 NULL);
625 if (!cache_obj)
626 return VK_SUCCESS;
627
628 struct dzn_cached_blob *cached_blob =
629 container_of(cache_obj, struct dzn_cached_blob, base);
630 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
631 &pipeline->templates.stream_desc;
632
633 const struct dzn_cached_gfx_pipeline_header *info =
634 (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
635 size_t offset = ALIGN_POT(sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
636
637 assert(cached_blob->size >= sizeof(*info));
638
639 if (info->input_count > 0) {
640 const D3D12_INPUT_ELEMENT_DESC *inputs =
641 (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
642
643 assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);
644
645 memcpy(pipeline->templates.inputs, inputs,
646 sizeof(*inputs) * info->input_count);
647 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
648 desc->pInputElementDescs = pipeline->templates.inputs;
649 desc->NumElements = info->input_count;
650 offset += sizeof(*inputs) * info->input_count;
651 }
652
653 assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);
654
655 u_foreach_bit(s, info->stages) {
656 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
657 gl_shader_stage stage;
658
659 D3D12_SHADER_BYTECODE *slot =
660 dzn_pipeline_get_gfx_shader_slot(stream_desc, s);
661
662 VkResult ret =
663 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
664 if (ret != VK_SUCCESS)
665 return ret;
666
667 assert(stage == s);
668 offset += SHA1_DIGEST_LENGTH;
669 }
670
671 pipeline->rast_disabled_from_missing_position = info->rast_disabled_from_missing_position;
672 pipeline->needs_draw_sysvals = info->needs_draw_sysvals;
673
674 *cache_hit = true;
675
676 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
677 return VK_SUCCESS;
678 }
679
680 static void
dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,uint32_t vertex_input_count,const uint8_t * pipeline_hash,const uint8_t * const * dxil_hashes)681 dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
682 struct vk_pipeline_cache *cache,
683 uint32_t vertex_input_count,
684 const uint8_t *pipeline_hash,
685 const uint8_t *const *dxil_hashes)
686 {
687 size_t offset =
688 ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
689 (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
690 uint32_t stages = 0;
691
692 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
693 if (pipeline->templates.shaders[i].bc) {
694 stages |= BITFIELD_BIT(i);
695 offset += SHA1_DIGEST_LENGTH;
696 }
697 }
698
699 struct vk_pipeline_cache_object *cache_obj =
700 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
701 if (!cache_obj)
702 return;
703
704 struct dzn_cached_blob *cached_blob =
705 container_of(cache_obj, struct dzn_cached_blob, base);
706
707 offset = 0;
708 struct dzn_cached_gfx_pipeline_header *info =
709 (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
710
711 info->input_count = vertex_input_count;
712 info->stages = stages;
713 info->needs_draw_sysvals = pipeline->needs_draw_sysvals;
714 info->rast_disabled_from_missing_position = pipeline->rast_disabled_from_missing_position;
715
716 offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
717
718 D3D12_INPUT_ELEMENT_DESC *inputs =
719 (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
720 memcpy(inputs, pipeline->templates.inputs,
721 sizeof(*inputs) * vertex_input_count);
722 offset += sizeof(*inputs) * vertex_input_count;
723
724 u_foreach_bit(s, stages) {
725 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
726
727 memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
728 offset += SHA1_DIGEST_LENGTH;
729 }
730
731 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
732 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
733 }
734
735 static void
dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,uint8_t * result)736 dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
737 enum pipe_format *vi_conversions,
738 uint8_t *result)
739 {
740 struct mesa_sha1 ctx;
741
742 _mesa_sha1_init(&ctx);
743 _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
744 _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
745 _mesa_sha1_final(&ctx, result);
746 }
747
748 static VkResult
dzn_graphics_pipeline_compile_shaders(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * out,D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,const VkGraphicsPipelineCreateInfo * info)749 dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
750 struct dzn_graphics_pipeline *pipeline,
751 struct vk_pipeline_cache *cache,
752 const struct dzn_pipeline_layout *layout,
753 D3D12_PIPELINE_STATE_STREAM_DESC *out,
754 D3D12_INPUT_ELEMENT_DESC *attribs,
755 enum pipe_format *vi_conversions,
756 const VkGraphicsPipelineCreateInfo *info)
757 {
758 struct dzn_physical_device *pdev =
759 container_of(device->vk.physical, struct dzn_physical_device, vk);
760 const VkPipelineViewportStateCreateInfo *vp_info =
761 info->pRasterizationState->rasterizerDiscardEnable ?
762 NULL : info->pViewportState;
763 struct {
764 const VkPipelineShaderStageCreateInfo *info;
765 uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
766 uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
767 uint8_t nir_hash[SHA1_DIGEST_LENGTH];
768 uint8_t link_hashes[SHA1_DIGEST_LENGTH][2];
769 } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
770 const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
771 uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
772 uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
773 gl_shader_stage last_raster_stage = MESA_SHADER_NONE;
774 uint32_t active_stage_mask = 0;
775 VkResult ret;
776
777 /* First step: collect stage info in a table indexed by gl_shader_stage
778 * so we can iterate over stages in pipeline order or reverse pipeline
779 * order.
780 */
781 for (uint32_t i = 0; i < info->stageCount; i++) {
782 gl_shader_stage stage =
783 vk_to_mesa_shader_stage(info->pStages[i].stage);
784
785 assert(stage <= MESA_SHADER_FRAGMENT);
786
787 if ((stage == MESA_SHADER_VERTEX ||
788 stage == MESA_SHADER_TESS_EVAL ||
789 stage == MESA_SHADER_GEOMETRY) &&
790 last_raster_stage < stage)
791 last_raster_stage = stage;
792
793 if (stage == MESA_SHADER_FRAGMENT &&
794 info->pRasterizationState &&
795 (info->pRasterizationState->rasterizerDiscardEnable ||
796 info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
797 /* Disable rasterization (AKA leave fragment shader NULL) when
798 * front+back culling or discard is set.
799 */
800 continue;
801 }
802
803 stages[stage].info = &info->pStages[i];
804 active_stage_mask |= BITFIELD_BIT(stage);
805 }
806
807 pipeline->use_gs_for_polygon_mode_point =
808 info->pRasterizationState &&
809 info->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT &&
810 !(active_stage_mask & (1 << MESA_SHADER_GEOMETRY));
811 if (pipeline->use_gs_for_polygon_mode_point)
812 last_raster_stage = MESA_SHADER_GEOMETRY;
813
814 enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
815 uint16_t y_flip_mask = 0, z_flip_mask = 0;
816 bool lower_view_index =
817 !pipeline->multiview.native_view_instancing &&
818 pipeline->multiview.view_mask > 1;
819
820 if (pipeline->vp.dynamic) {
821 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
822 } else if (vp_info) {
823 for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
824 if (vp_info->pViewports[i].height > 0)
825 y_flip_mask |= BITFIELD_BIT(i);
826
827 if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
828 z_flip_mask |= BITFIELD_BIT(i);
829 }
830
831 if (y_flip_mask && z_flip_mask)
832 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
833 else if (z_flip_mask)
834 yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
835 else if (y_flip_mask)
836 yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
837 }
838
839 bool force_sample_rate_shading =
840 !info->pRasterizationState->rasterizerDiscardEnable &&
841 info->pMultisampleState &&
842 info->pMultisampleState->sampleShadingEnable;
843
844 if (cache) {
845 dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);
846
847 struct mesa_sha1 pipeline_hash_ctx;
848
849 _mesa_sha1_init(&pipeline_hash_ctx);
850 _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
851 _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
852 _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
853 _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
854 _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
855 _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
856 _mesa_sha1_update(&pipeline_hash_ctx, &lower_view_index, sizeof(lower_view_index));
857 _mesa_sha1_update(&pipeline_hash_ctx, &pipeline->use_gs_for_polygon_mode_point, sizeof(pipeline->use_gs_for_polygon_mode_point));
858
859 u_foreach_bit(stage, active_stage_mask) {
860 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
861 (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
862 vk_find_struct_const(stages[stage].info->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
863 enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
864 subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;
865
866 vk_pipeline_hash_shader_stage(pipeline->base.flags, stages[stage].info, NULL, stages[stage].spirv_hash);
867 _mesa_sha1_update(&pipeline_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
868 _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
869 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
870 }
871 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
872
873 bool cache_hit;
874 ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
875 &cache_hit);
876 if (ret != VK_SUCCESS)
877 return ret;
878
879 if (cache_hit)
880 return VK_SUCCESS;
881 }
882
883 /* Second step: get NIR shaders for all stages. */
884 nir_shader_compiler_options nir_opts;
885 unsigned supported_bit_sizes = (pdev->options4.Native16BitShaderOpsSupported ? 16 : 0) | 32 | 64;
886 dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
887 nir_opts.lower_base_vertex = true;
888 u_foreach_bit(stage, active_stage_mask) {
889 struct mesa_sha1 nir_hash_ctx;
890
891 if (cache) {
892 _mesa_sha1_init(&nir_hash_ctx);
893 _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
894 if (stage != MESA_SHADER_FRAGMENT) {
895 _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
896 _mesa_sha1_update(&nir_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
897 }
898 if (stage == MESA_SHADER_VERTEX)
899 _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
900 if (stage == last_raster_stage) {
901 _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
902 _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
903 _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
904 _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
905 }
906 _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
907 _mesa_sha1_final(&nir_hash_ctx, stages[stage].nir_hash);
908 }
909
910 struct dzn_nir_options options = {
911 .yz_flip_mode = stage == last_raster_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
912 .y_flip_mask = y_flip_mask,
913 .z_flip_mask = z_flip_mask,
914 .force_sample_rate_shading = stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
915 .lower_view_index = lower_view_index,
916 .lower_view_index_to_rt_layer = stage == last_raster_stage ? lower_view_index : false,
917 .vi_conversions = vi_conversions,
918 .nir_opts = &nir_opts,
919 };
920
921 struct dxil_spirv_metadata metadata = { 0 };
922 ret = dzn_pipeline_get_nir_shader(device, layout,
923 cache, stages[stage].nir_hash,
924 pipeline->base.flags,
925 stages[stage].info, stage,
926 &options, &metadata,
927 &pipeline->templates.shaders[stage].nir);
928 if (ret != VK_SUCCESS)
929 return ret;
930
931 if (stage == MESA_SHADER_VERTEX)
932 pipeline->needs_draw_sysvals = metadata.needs_draw_sysvals;
933 }
934
935 if (pipeline->use_gs_for_polygon_mode_point) {
936 /* TODO: Cache; handle TES */
937 struct dzn_nir_point_gs_info gs_info = {
938 .cull_mode = info->pRasterizationState->cullMode,
939 .front_ccw = info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE,
940 .depth_bias = info->pRasterizationState->depthBiasEnable,
941 .depth_bias_dynamic = pipeline->zsa.dynamic_depth_bias,
942 .ds_fmt = pipeline->zsa.ds_fmt,
943 .constant_depth_bias = info->pRasterizationState->depthBiasConstantFactor,
944 .slope_scaled_depth_bias = info->pRasterizationState->depthBiasSlopeFactor,
945 .depth_bias_clamp = info->pRasterizationState->depthBiasClamp,
946 .runtime_data_cbv = {
947 .register_space = DZN_REGISTER_SPACE_SYSVALS,
948 .base_shader_register = 0,
949 }
950 };
951 pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir =
952 dzn_nir_polygon_point_mode_gs(pipeline->templates.shaders[MESA_SHADER_VERTEX].nir,
953 &gs_info);
954
955 struct dxil_spirv_runtime_conf conf = {
956 .runtime_data_cbv = {
957 .register_space = DZN_REGISTER_SPACE_SYSVALS,
958 .base_shader_register = 0,
959 },
960 .yz_flip = {
961 .mode = yz_flip_mode,
962 .y_mask = y_flip_mask,
963 .z_mask = z_flip_mask,
964 },
965 };
966
967 bool requires_runtime_data;
968 NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir, dxil_spirv_nir_lower_yz_flip,
969 &conf, &requires_runtime_data);
970
971 active_stage_mask |= (1 << MESA_SHADER_GEOMETRY);
972 memcpy(stages[MESA_SHADER_GEOMETRY].spirv_hash, stages[MESA_SHADER_VERTEX].spirv_hash, SHA1_DIGEST_LENGTH);
973
974 if ((active_stage_mask & (1 << MESA_SHADER_FRAGMENT)) &&
975 BITSET_TEST(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE))
976 NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir, dxil_nir_forward_front_face);
977 }
978
979 /* Third step: link those NIR shaders. We iterate in reverse order
980 * so we can eliminate outputs that are never read by the next stage.
981 */
982 uint32_t link_mask = active_stage_mask;
983 while (link_mask != 0) {
984 gl_shader_stage stage = util_last_bit(link_mask) - 1;
985 link_mask &= ~BITFIELD_BIT(stage);
986 gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;
987
988 struct dxil_spirv_runtime_conf conf = {
989 .runtime_data_cbv = {
990 .register_space = DZN_REGISTER_SPACE_SYSVALS,
991 .base_shader_register = 0,
992 }};
993
994 assert(pipeline->templates.shaders[stage].nir);
995 struct dxil_spirv_metadata metadata = { 0 };
996 dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
997 prev_stage != MESA_SHADER_NONE ?
998 pipeline->templates.shaders[prev_stage].nir : NULL,
999 &conf, &metadata);
1000
1001 if (prev_stage != MESA_SHADER_NONE) {
1002 memcpy(stages[stage].link_hashes[0], stages[prev_stage].spirv_hash, SHA1_DIGEST_LENGTH);
1003 memcpy(stages[prev_stage].link_hashes[1], stages[stage].spirv_hash, SHA1_DIGEST_LENGTH);
1004 }
1005 }
1006
1007 u_foreach_bit(stage, active_stage_mask) {
1008 uint8_t bindings_hash[SHA1_DIGEST_LENGTH];
1009
1010 NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, device, layout,
1011 cache ? bindings_hash : NULL);
1012
1013 if (cache) {
1014 struct mesa_sha1 dxil_hash_ctx;
1015
1016 _mesa_sha1_init(&dxil_hash_ctx);
1017 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].nir_hash, sizeof(stages[stage].nir_hash));
1018 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
1019 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[0], sizeof(stages[stage].link_hashes[0]));
1020 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[1], sizeof(stages[stage].link_hashes[1]));
1021 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
1022 _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
1023 dxil_hashes[stage] = stages[stage].dxil_hash;
1024
1025 gl_shader_stage cached_stage;
1026 D3D12_SHADER_BYTECODE bc;
1027 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
1028 if (ret != VK_SUCCESS)
1029 return ret;
1030
1031 if (cached_stage != MESA_SHADER_NONE) {
1032 assert(cached_stage == stage);
1033 D3D12_SHADER_BYTECODE *slot =
1034 dzn_pipeline_get_gfx_shader_slot(out, stage);
1035 *slot = bc;
1036 pipeline->templates.shaders[stage].bc = slot;
1037 }
1038 }
1039 }
1040
1041 uint32_t vert_input_count = 0;
1042 if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1043 /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
1044 * we can handle location overlaps properly.
1045 */
1046 nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1047 assert(var->data.location >= VERT_ATTRIB_GENERIC0);
1048 unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
1049 assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
1050 assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);
1051
1052 pipeline->templates.inputs[vert_input_count] = attribs[loc];
1053 pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
1054 var->data.driver_location = vert_input_count++;
1055 }
1056
1057 if (vert_input_count > 0) {
1058 d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
1059 desc->pInputElementDescs = pipeline->templates.inputs;
1060 desc->NumElements = vert_input_count;
1061 }
1062 }
1063
1064 /* Last step: translate NIR shaders into DXIL modules */
1065 u_foreach_bit(stage, active_stage_mask) {
1066 gl_shader_stage prev_stage =
1067 util_last_bit(active_stage_mask & BITFIELD_MASK(stage)) - 1;
1068 uint32_t prev_stage_output_clip_size = 0;
1069 if (stage == MESA_SHADER_FRAGMENT) {
1070 /* Disable rasterization if the last geometry stage doesn't
1071 * write the position.
1072 */
1073 if (prev_stage == MESA_SHADER_NONE ||
1074 !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS)) {
1075 pipeline->rast_disabled_from_missing_position = true;
1076 /* Clear a cache hit if there was one. */
1077 pipeline->templates.shaders[stage].bc = NULL;
1078 continue;
1079 }
1080 } else if (prev_stage != MESA_SHADER_NONE) {
1081 prev_stage_output_clip_size = pipeline->templates.shaders[prev_stage].nir->info.clip_distance_array_size;
1082 }
1083
1084 /* Cache hit, we can skip the compilation. */
1085 if (pipeline->templates.shaders[stage].bc)
1086 continue;
1087
1088 D3D12_SHADER_BYTECODE *slot =
1089 dzn_pipeline_get_gfx_shader_slot(out, stage);
1090
1091 ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, prev_stage_output_clip_size, slot);
1092 if (ret != VK_SUCCESS)
1093 return ret;
1094
1095 pipeline->templates.shaders[stage].bc = slot;
1096
1097 if (cache)
1098 dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
1099 }
1100
1101 if (cache)
1102 dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
1103 dxil_hashes);
1104
1105 return VK_SUCCESS;
1106 }
1107
1108 VkFormat
dzn_graphics_pipeline_patch_vi_format(VkFormat format)1109 dzn_graphics_pipeline_patch_vi_format(VkFormat format)
1110 {
1111 switch (format) {
1112 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
1113 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1114 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1115 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1116 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
1117 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1118 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1119 return VK_FORMAT_R32_UINT;
1120 case VK_FORMAT_R8G8B8A8_SSCALED:
1121 return VK_FORMAT_R8G8B8A8_SINT;
1122 case VK_FORMAT_R8G8B8A8_USCALED:
1123 return VK_FORMAT_R8G8B8A8_UINT;
1124 case VK_FORMAT_R16G16B16A16_USCALED:
1125 return VK_FORMAT_R16G16B16A16_UINT;
1126 case VK_FORMAT_R16G16B16A16_SSCALED:
1127 return VK_FORMAT_R16G16B16A16_SINT;
1128 default:
1129 return format;
1130 }
1131 }
1132
1133 static VkResult
dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC * inputs,enum pipe_format * vi_conversions)1134 dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
1135 const VkGraphicsPipelineCreateInfo *in,
1136 D3D12_INPUT_ELEMENT_DESC *inputs,
1137 enum pipe_format *vi_conversions)
1138 {
1139 const VkPipelineVertexInputStateCreateInfo *in_vi =
1140 in->pVertexInputState;
1141 const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
1142 (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
1143 vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1144
1145 if (!in_vi->vertexAttributeDescriptionCount)
1146 return VK_SUCCESS;
1147
1148 D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
1149
1150 pipeline->vb.count = 0;
1151 for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
1152 const struct VkVertexInputBindingDescription *bdesc =
1153 &in_vi->pVertexBindingDescriptions[i];
1154
1155 pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
1156 pipeline->vb.strides[bdesc->binding] = bdesc->stride;
1157 if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
1158 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
1159 } else {
1160 assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
1161 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
1162 }
1163 }
1164
1165 for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
1166 const VkVertexInputAttributeDescription *attr =
1167 &in_vi->pVertexAttributeDescriptions[i];
1168 const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;
1169
1170 if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
1171 divisors) {
1172 for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
1173 if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
1174 divisor = &divisors->pVertexBindingDivisors[d];
1175 break;
1176 }
1177 }
1178 }
1179
1180 VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
1181 if (patched_format != attr->format)
1182 vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);
1183
1184 /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
1185 inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
1186 .SemanticName = "TEXCOORD",
1187 .Format = dzn_buffer_get_dxgi_format(patched_format),
1188 .InputSlot = attr->binding,
1189 .InputSlotClass = slot_class[attr->binding],
1190 .InstanceDataStepRate =
1191 divisor ? divisor->divisor :
1192 slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
1193 .AlignedByteOffset = attr->offset,
1194 };
1195 }
1196
1197 return VK_SUCCESS;
1198 }
1199
1200 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)1201 to_prim_topology_type(VkPrimitiveTopology in)
1202 {
1203 switch (in) {
1204 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
1205 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
1206 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
1207 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
1208 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
1209 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
1210 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
1211 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
1212 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
1213 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
1214 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
1215 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
1216 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
1217 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1218 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
1219 default: unreachable("Invalid primitive topology");
1220 }
1221 }
1222
1223 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points,bool support_triangle_fan)1224 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points, bool support_triangle_fan)
1225 {
1226 switch (in) {
1227 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
1228 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
1229 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
1230 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
1231 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
1232 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1233 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
1234 /* Triangle fans are emulated using an intermediate index buffer. */
1235 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return support_triangle_fan ?
1236 D3D_PRIMITIVE_TOPOLOGY_TRIANGLEFAN : D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1237 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
1238 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
1239 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1240 assert(patch_control_points);
1241 return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
1242 default: unreachable("Invalid primitive topology");
1243 }
1244 }
1245
1246 static VkResult
dzn_graphics_pipeline_translate_ia(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1247 dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
1248 struct dzn_graphics_pipeline *pipeline,
1249 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1250 const VkGraphicsPipelineCreateInfo *in)
1251 {
1252 struct dzn_physical_device *pdev =
1253 container_of(device->vk.physical, struct dzn_physical_device, vk);
1254 const VkPipelineInputAssemblyStateCreateInfo *in_ia =
1255 in->pInputAssemblyState;
1256 bool has_tes = false;
1257 for (uint32_t i = 0; i < in->stageCount; i++) {
1258 if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
1259 in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1260 has_tes = true;
1261 break;
1262 }
1263 }
1264 const VkPipelineTessellationStateCreateInfo *in_tes =
1265 has_tes ? in->pTessellationState : NULL;
1266 VkResult ret = VK_SUCCESS;
1267
1268 d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
1269 *prim_top_type = to_prim_topology_type(in_ia->topology);
1270 pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN && !pdev->options15.TriangleFanSupported;
1271 pipeline->ia.topology =
1272 to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0,
1273 pdev->options15.TriangleFanSupported);
1274
1275 if (in_ia->primitiveRestartEnable) {
1276 d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
1277 pipeline->templates.desc_offsets.ib_strip_cut =
1278 (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
1279 *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1280 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1281 }
1282
1283 return ret;
1284 }
1285
1286 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)1287 translate_polygon_mode(VkPolygonMode in)
1288 {
1289 switch (in) {
1290 case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
1291 case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
1292 case VK_POLYGON_MODE_POINT:
1293 /* This is handled elsewhere */
1294 return D3D12_FILL_MODE_SOLID;
1295 default: unreachable("Unsupported polygon mode");
1296 }
1297 }
1298
1299 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)1300 translate_cull_mode(VkCullModeFlags in)
1301 {
1302 switch (in) {
1303 case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
1304 case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
1305 case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
1306 /* Front+back face culling is equivalent to 'rasterization disabled' */
1307 case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
1308 default: unreachable("Unsupported cull mode");
1309 }
1310 }
1311
1312 static int32_t
translate_depth_bias(double depth_bias)1313 translate_depth_bias(double depth_bias)
1314 {
1315 if (depth_bias > INT32_MAX)
1316 return INT32_MAX;
1317 else if (depth_bias < INT32_MIN)
1318 return INT32_MIN;
1319
1320 return depth_bias;
1321 }
1322
1323 static void
dzn_graphics_pipeline_translate_rast(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1324 dzn_graphics_pipeline_translate_rast(struct dzn_device *device,
1325 struct dzn_graphics_pipeline *pipeline,
1326 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1327 const VkGraphicsPipelineCreateInfo *in)
1328 {
1329 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
1330 const VkPipelineRasterizationStateCreateInfo *in_rast =
1331 in->pRasterizationState;
1332 const VkPipelineViewportStateCreateInfo *in_vp =
1333 in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
1334 const VkPipelineMultisampleStateCreateInfo *in_ms =
1335 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1336
1337 if (in_vp) {
1338 pipeline->vp.count = in_vp->viewportCount;
1339 if (in_vp->pViewports) {
1340 for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
1341 dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
1342 }
1343
1344 pipeline->scissor.count = in_vp->scissorCount;
1345 if (in_vp->pScissors) {
1346 for (uint32_t i = 0; i < in_vp->scissorCount; i++)
1347 dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
1348 }
1349 }
1350
1351 if (pdev->options19.NarrowQuadrilateralLinesSupported) {
1352 assert(pdev->options16.DynamicDepthBiasSupported);
1353 d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER2, D3D12_RASTERIZER_DESC2, desc);
1354 pipeline->templates.desc_offsets.rast =
1355 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1356 desc->DepthClipEnable = !in_rast->depthClampEnable;
1357 desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1358 desc->CullMode = translate_cull_mode(in_rast->cullMode);
1359 desc->FrontCounterClockwise =
1360 in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1361 if (in_rast->depthBiasEnable) {
1362 desc->DepthBias = in_rast->depthBiasConstantFactor;
1363 desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1364 desc->DepthBiasClamp = in_rast->depthBiasClamp;
1365 }
1366 desc->LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_QUADRILATERAL_NARROW;
1367 } else {
1368 static_assert(sizeof(D3D12_RASTERIZER_DESC) == sizeof(D3D12_RASTERIZER_DESC1), "Casting between these");
1369 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE rast_type = pdev->options16.DynamicDepthBiasSupported ?
1370 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1 :
1371 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER;
1372 d3d12_pipeline_state_stream_new_desc(out, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, rast_type, D3D12_RASTERIZER_DESC, desc);
1373 pipeline->templates.desc_offsets.rast =
1374 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1375 desc->DepthClipEnable = !in_rast->depthClampEnable;
1376 desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1377 desc->CullMode = translate_cull_mode(in_rast->cullMode);
1378 desc->FrontCounterClockwise =
1379 in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1380 if (in_rast->depthBiasEnable) {
1381 if (rast_type == D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1)
1382 ((D3D12_RASTERIZER_DESC1 *)desc)->DepthBias = in_rast->depthBiasConstantFactor;
1383 else
1384 desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
1385 desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1386 desc->DepthBiasClamp = in_rast->depthBiasClamp;
1387 }
1388
1389 /* The Vulkan conformance tests use different reference rasterizers for single-sampled
1390 * and multi-sampled lines. The single-sampled lines can be bresenham lines, but multi-
1391 * sampled need to be quadrilateral lines. This still isn't *quite* sufficient, because
1392 * D3D only supports a line width of 1.4 (per spec), but Vulkan requires us to support
1393 * 1.0 (and without claiming wide lines, that's all we can support).
1394 */
1395 if (in_ms && in_ms->rasterizationSamples > 1)
1396 desc->MultisampleEnable = true;
1397 }
1398
1399 assert(in_rast->lineWidth == 1.0f);
1400 }
1401
1402 static void
dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1403 dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
1404 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1405 const VkGraphicsPipelineCreateInfo *in)
1406 {
1407 const VkPipelineRasterizationStateCreateInfo *in_rast =
1408 in->pRasterizationState;
1409 const VkPipelineMultisampleStateCreateInfo *in_ms =
1410 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1411
1412 if (!in_ms)
1413 return;
1414
1415 /* TODO: minSampleShading (use VRS), alphaToOneEnable */
1416 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
1417 desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
1418 desc->Quality = 0;
1419
1420 if (!in_ms->pSampleMask)
1421 return;
1422
1423 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
1424 *mask = *in_ms->pSampleMask;
1425 }
1426
1427 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)1428 translate_stencil_op(VkStencilOp in)
1429 {
1430 switch (in) {
1431 case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
1432 case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
1433 case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
1434 case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
1435 case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
1436 case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
1437 case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
1438 case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
1439 default: unreachable("Invalid stencil op");
1440 }
1441 }
1442
1443 static void
translate_stencil_test(struct dzn_graphics_pipeline * pipeline,D3D12_DEPTH_STENCIL_DESC2 * out,const VkGraphicsPipelineCreateInfo * in)1444 translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
1445 D3D12_DEPTH_STENCIL_DESC2 *out,
1446 const VkGraphicsPipelineCreateInfo *in)
1447 {
1448 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1449 in->pDepthStencilState;
1450
1451 bool front_test_uses_ref =
1452 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1453 in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1454 in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1455 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1456 in_zsa->front.compareMask != 0);
1457 bool back_test_uses_ref =
1458 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1459 in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1460 in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1461 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1462 in_zsa->back.compareMask != 0);
1463
1464 if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1465 pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
1466 else if (front_test_uses_ref)
1467 pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
1468 else
1469 pipeline->zsa.stencil_test.front.compare_mask = 0;
1470
1471 if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1472 pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
1473 else if (back_test_uses_ref)
1474 pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
1475 else
1476 pipeline->zsa.stencil_test.back.compare_mask = 0;
1477
1478 bool back_wr_uses_ref =
1479 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1480 ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1481 in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
1482 (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1483 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1484 in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
1485 (in_zsa->depthTestEnable &&
1486 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1487 in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
1488 bool front_wr_uses_ref =
1489 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1490 ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1491 in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
1492 (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1493 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1494 in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
1495 (in_zsa->depthTestEnable &&
1496 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1497 in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));
1498
1499 pipeline->zsa.stencil_test.front.write_mask =
1500 (pipeline->zsa.stencil_test.dynamic_write_mask ||
1501 (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
1502 0 : in_zsa->front.writeMask;
1503 pipeline->zsa.stencil_test.back.write_mask =
1504 (pipeline->zsa.stencil_test.dynamic_write_mask ||
1505 (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
1506 0 : in_zsa->back.writeMask;
1507
1508 pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
1509 pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
1510
1511 pipeline->zsa.stencil_test.front.ref =
1512 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
1513 pipeline->zsa.stencil_test.back.ref =
1514 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
1515
1516 out->FrontFace.StencilReadMask = pipeline->zsa.stencil_test.front.compare_mask;
1517 out->BackFace.StencilReadMask = pipeline->zsa.stencil_test.back.compare_mask;
1518 out->FrontFace.StencilWriteMask = pipeline->zsa.stencil_test.front.write_mask;
1519 out->BackFace.StencilWriteMask = pipeline->zsa.stencil_test.back.write_mask;
1520 }
1521
1522 static void
dzn_graphics_pipeline_translate_zsa(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1523 dzn_graphics_pipeline_translate_zsa(struct dzn_device *device,
1524 struct dzn_graphics_pipeline *pipeline,
1525 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1526 const VkGraphicsPipelineCreateInfo *in)
1527 {
1528 struct dzn_physical_device *pdev =
1529 container_of(device->vk.physical, struct dzn_physical_device, vk);
1530
1531 const VkPipelineRasterizationStateCreateInfo *in_rast =
1532 in->pRasterizationState;
1533 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1534 in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
1535 const VkPipelineRenderingCreateInfo *ri = vk_find_struct_const(in, PIPELINE_RENDERING_CREATE_INFO);
1536
1537 if (!in_zsa ||
1538 in_rast->cullMode == VK_CULL_MODE_FRONT_AND_BACK) {
1539 /* Ensure depth is disabled if the rasterizer should be disabled / everything culled */
1540 if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1541 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1542 pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1543 memset(stream_desc, 0, sizeof(*stream_desc));
1544 } else {
1545 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1546 pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1547 memset(stream_desc, 0, sizeof(*stream_desc));
1548 }
1549 return;
1550 }
1551
1552 D3D12_DEPTH_STENCIL_DESC2 desc;
1553 memset(&desc, 0, sizeof(desc));
1554
1555 bool has_no_depth = ri && ri->depthAttachmentFormat == VK_FORMAT_UNDEFINED;
1556 bool has_no_stencil = ri && ri->stencilAttachmentFormat == VK_FORMAT_UNDEFINED;
1557
1558 desc.DepthEnable = !has_no_depth &&
1559 (in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable);
1560 if (desc.DepthEnable) {
1561 desc.DepthWriteMask =
1562 in_zsa->depthWriteEnable ?
1563 D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
1564 desc.DepthFunc =
1565 in_zsa->depthTestEnable ?
1566 dzn_translate_compare_op(in_zsa->depthCompareOp) :
1567 D3D12_COMPARISON_FUNC_ALWAYS;
1568 }
1569 pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
1570 pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
1571 pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
1572 desc.DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
1573 desc.StencilEnable = in_zsa->stencilTestEnable && !has_no_stencil;
1574 if (desc.StencilEnable) {
1575 desc.FrontFace.StencilFailOp = translate_stencil_op(in_zsa->front.failOp);
1576 desc.FrontFace.StencilDepthFailOp = translate_stencil_op(in_zsa->front.depthFailOp);
1577 desc.FrontFace.StencilPassOp = translate_stencil_op(in_zsa->front.passOp);
1578 desc.FrontFace.StencilFunc = dzn_translate_compare_op(in_zsa->front.compareOp);
1579 desc.BackFace.StencilFailOp = translate_stencil_op(in_zsa->back.failOp);
1580 desc.BackFace.StencilDepthFailOp = translate_stencil_op(in_zsa->back.depthFailOp);
1581 desc.BackFace.StencilPassOp = translate_stencil_op(in_zsa->back.passOp);
1582 desc.BackFace.StencilFunc = dzn_translate_compare_op(in_zsa->back.compareOp);
1583
1584 pipeline->zsa.stencil_test.enable = true;
1585
1586 translate_stencil_test(pipeline, &desc, in);
1587 }
1588
1589 if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1590 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1591 pipeline->templates.desc_offsets.ds =
1592 (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1593 *stream_desc = desc;
1594 } else {
1595 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1596 pipeline->templates.desc_offsets.ds =
1597 (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1598
1599 stream_desc->DepthEnable = desc.DepthEnable;
1600 stream_desc->DepthWriteMask = desc.DepthWriteMask;
1601 stream_desc->DepthFunc = desc.DepthFunc;
1602 stream_desc->DepthBoundsTestEnable = desc.DepthBoundsTestEnable;
1603 stream_desc->StencilEnable = desc.StencilEnable;
1604 stream_desc->FrontFace.StencilFailOp = desc.FrontFace.StencilFailOp;
1605 stream_desc->FrontFace.StencilDepthFailOp = desc.FrontFace.StencilDepthFailOp;
1606 stream_desc->FrontFace.StencilPassOp = desc.FrontFace.StencilPassOp;
1607 stream_desc->FrontFace.StencilFunc = desc.FrontFace.StencilFunc;
1608 stream_desc->BackFace.StencilFailOp = desc.BackFace.StencilFailOp;
1609 stream_desc->BackFace.StencilDepthFailOp = desc.BackFace.StencilDepthFailOp;
1610 stream_desc->BackFace.StencilPassOp = desc.BackFace.StencilPassOp;
1611 stream_desc->BackFace.StencilFunc = desc.BackFace.StencilFunc;
1612
1613 /* No support for independent front/back, just pick front (if set, else back) */
1614 stream_desc->StencilReadMask = desc.FrontFace.StencilReadMask ? desc.FrontFace.StencilReadMask : desc.BackFace.StencilReadMask;
1615 stream_desc->StencilWriteMask = desc.FrontFace.StencilWriteMask ? desc.FrontFace.StencilWriteMask : desc.BackFace.StencilWriteMask;
1616 }
1617 }
1618
1619 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in,bool is_alpha,bool support_alpha_blend_factor)1620 translate_blend_factor(VkBlendFactor in, bool is_alpha, bool support_alpha_blend_factor)
1621 {
1622 switch (in) {
1623 case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
1624 case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
1625 case VK_BLEND_FACTOR_SRC_COLOR:
1626 return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
1627 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1628 return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
1629 case VK_BLEND_FACTOR_DST_COLOR:
1630 return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
1631 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1632 return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
1633 case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
1634 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
1635 case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
1636 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
1637 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1638 return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1639 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1640 return support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1641 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1642 return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1643 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1644 return support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1645 case VK_BLEND_FACTOR_SRC1_COLOR:
1646 return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
1647 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
1648 return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
1649 case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
1650 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
1651 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
1652 default: unreachable("Invalid blend factor");
1653 }
1654 }
1655
1656 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)1657 translate_blend_op(VkBlendOp in)
1658 {
1659 switch (in) {
1660 case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
1661 case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
1662 case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
1663 case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
1664 case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
1665 default: unreachable("Invalid blend op");
1666 }
1667 }
1668
1669 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)1670 translate_logic_op(VkLogicOp in)
1671 {
1672 switch (in) {
1673 case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
1674 case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
1675 case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
1676 case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
1677 case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
1678 case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
1679 case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
1680 case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
1681 case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
1682 case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
1683 case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
1684 case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
1685 case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
1686 case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
1687 case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
1688 case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
1689 default: unreachable("Invalid logic op");
1690 }
1691 }
1692
1693 static void
dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1694 dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
1695 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1696 const VkGraphicsPipelineCreateInfo *in)
1697 {
1698 const VkPipelineRasterizationStateCreateInfo *in_rast =
1699 in->pRasterizationState;
1700 const VkPipelineColorBlendStateCreateInfo *in_blend =
1701 in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
1702 const VkPipelineMultisampleStateCreateInfo *in_ms =
1703 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1704
1705 if (!in_blend || !in_ms)
1706 return;
1707
1708 struct dzn_device *device =
1709 container_of(pipeline->base.base.device, struct dzn_device, vk);
1710 struct dzn_physical_device *pdev =
1711 container_of(device->vk.physical, struct dzn_physical_device, vk);
1712 bool support_alpha_blend_factor = pdev->options13.AlphaBlendFactorSupported;
1713
1714 d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
1715 D3D12_LOGIC_OP logicop =
1716 in_blend->logicOpEnable ?
1717 translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
1718 desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
1719 memcpy(pipeline->blend.constants, in_blend->blendConstants,
1720 sizeof(pipeline->blend.constants));
1721
1722 for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
1723 if (i > 0 &&
1724 memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
1725 sizeof(*in_blend->pAttachments)) != 0)
1726 desc->IndependentBlendEnable = true;
1727
1728 desc->RenderTarget[i].BlendEnable =
1729 in_blend->pAttachments[i].blendEnable;
1730 desc->RenderTarget[i].RenderTargetWriteMask =
1731 in_blend->pAttachments[i].colorWriteMask;
1732
1733 if (in_blend->logicOpEnable) {
1734 desc->RenderTarget[i].LogicOpEnable = true;
1735 desc->RenderTarget[i].LogicOp = logicop;
1736 } else {
1737 desc->RenderTarget[i].SrcBlend =
1738 translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false, support_alpha_blend_factor);
1739 desc->RenderTarget[i].DestBlend =
1740 translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false, support_alpha_blend_factor);
1741 desc->RenderTarget[i].BlendOp =
1742 translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
1743 desc->RenderTarget[i].SrcBlendAlpha =
1744 translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true, support_alpha_blend_factor);
1745 desc->RenderTarget[i].DestBlendAlpha =
1746 translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true, support_alpha_blend_factor);
1747 desc->RenderTarget[i].BlendOpAlpha =
1748 translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
1749 }
1750 }
1751 }
1752
1753
1754 static void
dzn_pipeline_init(struct dzn_pipeline * pipeline,struct dzn_device * device,VkPipelineBindPoint type,VkPipelineCreateFlags2KHR flags,struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc)1755 dzn_pipeline_init(struct dzn_pipeline *pipeline,
1756 struct dzn_device *device,
1757 VkPipelineBindPoint type,
1758 VkPipelineCreateFlags2KHR flags,
1759 struct dzn_pipeline_layout *layout,
1760 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
1761 {
1762 pipeline->type = type;
1763 pipeline->flags = flags;
1764 pipeline->root.sets_param_count = layout->root.sets_param_count;
1765 pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
1766 pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
1767 pipeline->root.dynamic_buffer_bindless_param_idx = layout->root.dynamic_buffer_bindless_param_idx;
1768 STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
1769 memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
1770 pipeline->root.sig = layout->root.sig;
1771 ID3D12RootSignature_AddRef(pipeline->root.sig);
1772
1773 STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
1774 memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
1775
1776 STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
1777 memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
1778 pipeline->set_count = layout->set_count;
1779 pipeline->dynamic_buffer_count = layout->dynamic_buffer_count;
1780 vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1781
1782 ASSERTED uint32_t max_streamsz =
1783 type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
1784 MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
1785 MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;
1786
1787 d3d12_pipeline_state_stream_new_desc_abbrev(stream_desc, max_streamsz, ROOT_SIGNATURE,
1788 ID3D12RootSignature *, root_sig);
1789 *root_sig = pipeline->root.sig;
1790 }
1791
1792 static void
dzn_pipeline_finish(struct dzn_pipeline * pipeline)1793 dzn_pipeline_finish(struct dzn_pipeline *pipeline)
1794 {
1795 if (pipeline->state)
1796 ID3D12PipelineState_Release(pipeline->state);
1797 if (pipeline->root.sig)
1798 ID3D12RootSignature_Release(pipeline->root.sig);
1799
1800 vk_object_base_finish(&pipeline->base);
1801 }
1802
dzn_graphics_pipeline_delete_variant(struct hash_entry * he)1803 static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
1804 {
1805 struct dzn_graphics_pipeline_variant *variant = he->data;
1806
1807 if (variant->state)
1808 ID3D12PipelineState_Release(variant->state);
1809 }
1810
dzn_graphics_pipeline_delete_cmd_sig(struct hash_entry * he)1811 static void dzn_graphics_pipeline_delete_cmd_sig(struct hash_entry *he)
1812 {
1813 ID3D12CommandSignature_Release((ID3D12CommandSignature *)he->data);
1814 }
1815
1816 static void
dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline * pipeline)1817 dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
1818 {
1819 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1820 ralloc_free(pipeline->templates.shaders[i].nir);
1821 pipeline->templates.shaders[i].nir = NULL;
1822 }
1823 }
1824
1825 static void
dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline * pipeline)1826 dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
1827 {
1828 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1829 if (pipeline->templates.shaders[i].bc) {
1830 free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
1831 pipeline->templates.shaders[i].bc = NULL;
1832 }
1833 }
1834 }
1835
1836 static void
dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)1837 dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
1838 const VkAllocationCallbacks *alloc)
1839 {
1840 if (!pipeline)
1841 return;
1842
1843 _mesa_hash_table_destroy(pipeline->variants,
1844 dzn_graphics_pipeline_delete_variant);
1845
1846 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1847 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1848
1849 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
1850 if (pipeline->indirect_cmd_sigs[i])
1851 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
1852 }
1853 _mesa_hash_table_destroy(pipeline->custom_stride_cmd_sigs,
1854 dzn_graphics_pipeline_delete_cmd_sig);
1855
1856 dzn_pipeline_finish(&pipeline->base);
1857 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1858 }
1859
1860 static VkResult
dzn_graphics_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1861 dzn_graphics_pipeline_create(struct dzn_device *device,
1862 VkPipelineCache cache,
1863 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1864 const VkAllocationCallbacks *pAllocator,
1865 VkPipeline *out)
1866 {
1867 struct dzn_physical_device *pdev =
1868 container_of(device->vk.physical, struct dzn_physical_device, vk);
1869 const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
1870 vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
1871 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
1872 VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
1873 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1874 uint32_t color_count = 0;
1875 VkFormat color_fmts[MAX_RTS] = { 0 };
1876 VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
1877 VkResult ret;
1878 HRESULT hres = 0;
1879 D3D12_VIEW_INSTANCE_LOCATION vi_locs[D3D12_MAX_VIEW_INSTANCE_COUNT];
1880
1881 struct dzn_graphics_pipeline *pipeline =
1882 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1883 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1884 if (!pipeline)
1885 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1886
1887 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
1888 stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;
1889
1890 dzn_pipeline_init(&pipeline->base, device,
1891 VK_PIPELINE_BIND_POINT_GRAPHICS,
1892 vk_graphics_pipeline_create_flags(pCreateInfo),
1893 layout, stream_desc);
1894 D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1895 enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1896
1897 ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
1898 attribs, vi_conversions);
1899 if (ret != VK_SUCCESS)
1900 goto out;
1901
1902 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, FLAGS, D3D12_PIPELINE_STATE_FLAGS, flags);
1903 *flags = D3D12_PIPELINE_STATE_FLAG_NONE;
1904
1905 if (pCreateInfo->pDynamicState) {
1906 for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
1907 switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
1908 case VK_DYNAMIC_STATE_VIEWPORT:
1909 pipeline->vp.dynamic = true;
1910 break;
1911 case VK_DYNAMIC_STATE_SCISSOR:
1912 pipeline->scissor.dynamic = true;
1913 break;
1914 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1915 pipeline->zsa.stencil_test.dynamic_ref = true;
1916 break;
1917 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1918 pipeline->zsa.stencil_test.dynamic_compare_mask = true;
1919 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1920 if (ret)
1921 goto out;
1922 break;
1923 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1924 pipeline->zsa.stencil_test.dynamic_write_mask = true;
1925 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1926 if (ret)
1927 goto out;
1928 break;
1929 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1930 pipeline->blend.dynamic_constants = true;
1931 break;
1932 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1933 pipeline->zsa.depth_bounds.dynamic = true;
1934 break;
1935 case VK_DYNAMIC_STATE_DEPTH_BIAS:
1936 pipeline->zsa.dynamic_depth_bias = true;
1937 if (pdev->options16.DynamicDepthBiasSupported) {
1938 *flags |= D3D12_PIPELINE_STATE_FLAG_DYNAMIC_DEPTH_BIAS;
1939 } else {
1940 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1941 if (ret)
1942 goto out;
1943 }
1944 break;
1945 case VK_DYNAMIC_STATE_LINE_WIDTH:
1946 /* Nothing to do since we just support lineWidth = 1. */
1947 break;
1948 default: unreachable("Unsupported dynamic state");
1949 }
1950 }
1951 }
1952
1953 ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
1954 if (ret)
1955 goto out;
1956
1957 dzn_graphics_pipeline_translate_rast(device, pipeline, stream_desc, pCreateInfo);
1958 dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
1959 dzn_graphics_pipeline_translate_zsa(device, pipeline, stream_desc, pCreateInfo);
1960 dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);
1961
1962 unsigned view_mask = 0;
1963 if (pass) {
1964 const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
1965 color_count = subpass->color_count;
1966 for (uint32_t i = 0; i < subpass->color_count; i++) {
1967 uint32_t idx = subpass->color_attachments[i].attachment;
1968
1969 if (idx == VK_ATTACHMENT_UNUSED) continue;
1970
1971 const struct vk_render_pass_attachment *attachment =
1972 &pass->attachments[idx];
1973
1974 color_fmts[i] = attachment->format;
1975 }
1976
1977 if (subpass->depth_stencil_attachment &&
1978 subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
1979 const struct vk_render_pass_attachment *attachment =
1980 &pass->attachments[subpass->depth_stencil_attachment->attachment];
1981
1982 zs_fmt = attachment->format;
1983 }
1984
1985 view_mask = subpass->view_mask;
1986 } else if (ri) {
1987 color_count = ri->colorAttachmentCount;
1988 memcpy(color_fmts, ri->pColorAttachmentFormats,
1989 sizeof(color_fmts[0]) * color_count);
1990 if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
1991 zs_fmt = ri->depthAttachmentFormat;
1992 else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
1993 zs_fmt = ri->stencilAttachmentFormat;
1994
1995 view_mask = ri->viewMask;
1996 }
1997
1998 if (color_count > 0) {
1999 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
2000 rts->NumRenderTargets = color_count;
2001 for (uint32_t i = 0; i < color_count; i++) {
2002 rts->RTFormats[i] =
2003 dzn_image_get_dxgi_format(pdev, color_fmts[i],
2004 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
2005 VK_IMAGE_ASPECT_COLOR_BIT);
2006 }
2007 }
2008
2009 if (zs_fmt != VK_FORMAT_UNDEFINED) {
2010 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
2011 *ds_fmt =
2012 dzn_image_get_dxgi_format(pdev, zs_fmt,
2013 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2014 VK_IMAGE_ASPECT_DEPTH_BIT |
2015 VK_IMAGE_ASPECT_STENCIL_BIT);
2016 pipeline->zsa.ds_fmt = *ds_fmt;
2017 }
2018
2019 pipeline->multiview.view_mask = MAX2(view_mask, 1);
2020 if (view_mask != 0 && /* Is multiview */
2021 view_mask != 1 && /* Is non-trivially multiview */
2022 (view_mask & ~((1 << D3D12_MAX_VIEW_INSTANCE_COUNT) - 1)) == 0 && /* Uses only views 0 thru 3 */
2023 pdev->options3.ViewInstancingTier > D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED /* Actually supported */) {
2024 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, VIEW_INSTANCING, D3D12_VIEW_INSTANCING_DESC, vi);
2025 vi->pViewInstanceLocations = vi_locs;
2026 for (uint32_t i = 0; i < D3D12_MAX_VIEW_INSTANCE_COUNT; ++i) {
2027 vi_locs[i].RenderTargetArrayIndex = i;
2028 vi_locs[i].ViewportArrayIndex = 0;
2029 if (view_mask & (1 << i))
2030 vi->ViewInstanceCount = i + 1;
2031 }
2032 vi->Flags = D3D12_VIEW_INSTANCING_FLAG_ENABLE_VIEW_INSTANCE_MASKING;
2033 pipeline->multiview.native_view_instancing = true;
2034 }
2035
2036 ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
2037 layout, stream_desc,
2038 attribs, vi_conversions,
2039 pCreateInfo);
2040 if (ret != VK_SUCCESS)
2041 goto out;
2042
2043 /* If we have no position output from a pre-rasterizer stage, we need to make sure that
2044 * depth is disabled, to fully disable the rasterizer. We can only know this after compiling
2045 * or loading the shaders.
2046 */
2047 if (pipeline->rast_disabled_from_missing_position) {
2048 if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2049 D3D12_DEPTH_STENCIL_DESC2 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2050 if (ds)
2051 ds->DepthEnable = ds->StencilEnable = false;
2052 } else {
2053 D3D12_DEPTH_STENCIL_DESC1 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2054 if (ds)
2055 ds->DepthEnable = ds->StencilEnable = false;
2056 }
2057 }
2058
2059 if (!pipeline->variants) {
2060 hres = ID3D12Device4_CreatePipelineState(device->dev, stream_desc,
2061 &IID_ID3D12PipelineState,
2062 (void **)&pipeline->base.state);
2063 if (FAILED(hres)) {
2064 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2065 goto out;
2066 }
2067
2068 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
2069 }
2070
2071 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
2072 ret = VK_SUCCESS;
2073
2074 out:
2075 if (ret != VK_SUCCESS)
2076 dzn_graphics_pipeline_destroy(pipeline, pAllocator);
2077 else
2078 *out = dzn_graphics_pipeline_to_handle(pipeline);
2079
2080 return ret;
2081 }
2082
2083 static void
mask_key_for_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key,struct dzn_graphics_pipeline_variant_key * masked_key)2084 mask_key_for_stencil_state(struct dzn_physical_device *pdev,
2085 struct dzn_graphics_pipeline *pipeline,
2086 const struct dzn_graphics_pipeline_variant_key *key,
2087 struct dzn_graphics_pipeline_variant_key *masked_key)
2088 {
2089 if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2090 const D3D12_DEPTH_STENCIL_DESC2 *ds_templ =
2091 dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2092 if (ds_templ && ds_templ->StencilEnable) {
2093 if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2094 ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2095 masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2096 if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2097 ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2098 masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2099 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2100 masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2101 masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2102 }
2103 }
2104 } else {
2105 const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
2106 dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2107 if (ds_templ && ds_templ->StencilEnable) {
2108 if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2109 ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2110 masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2111 if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2112 ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2113 masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2114 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2115 masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2116 masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2117 }
2118 }
2119 }
2120 }
2121
2122 static void
update_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,uintptr_t * stream_buf,const struct dzn_graphics_pipeline_variant_key * masked_key)2123 update_stencil_state(struct dzn_physical_device *pdev,
2124 struct dzn_graphics_pipeline *pipeline,
2125 uintptr_t *stream_buf,
2126 const struct dzn_graphics_pipeline_variant_key *masked_key)
2127 {
2128 if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2129 D3D12_DEPTH_STENCIL_DESC2 *ds =
2130 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2131 if (ds && ds->StencilEnable) {
2132 if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2133 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2134 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2135 ds->FrontFace.StencilReadMask = masked_key->stencil_test.front.compare_mask;
2136 }
2137
2138 if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2139 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2140 ds->BackFace.StencilReadMask = masked_key->stencil_test.back.compare_mask;
2141 }
2142 }
2143
2144 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2145 ds->FrontFace.StencilWriteMask = masked_key->stencil_test.front.write_mask;
2146 ds->BackFace.StencilWriteMask = masked_key->stencil_test.back.write_mask;
2147 }
2148 }
2149 } else {
2150 D3D12_DEPTH_STENCIL_DESC1 *ds =
2151 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2152 if (ds && ds->StencilEnable) {
2153 if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2154 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2155 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2156 ds->StencilReadMask = masked_key->stencil_test.front.compare_mask;
2157 }
2158
2159 if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2160 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2161 ds->StencilReadMask = masked_key->stencil_test.back.compare_mask;
2162 }
2163
2164 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2165 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
2166 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2167 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2168 assert(masked_key->stencil_test.front.compare_mask == masked_key->stencil_test.back.compare_mask);
2169 }
2170
2171 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2172 assert(!masked_key->stencil_test.front.write_mask ||
2173 !masked_key->stencil_test.back.write_mask ||
2174 masked_key->stencil_test.front.write_mask == masked_key->stencil_test.back.write_mask);
2175 ds->StencilWriteMask =
2176 masked_key->stencil_test.front.write_mask |
2177 masked_key->stencil_test.back.write_mask;
2178 }
2179 }
2180 }
2181 }
2182
2183 ID3D12PipelineState *
dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key)2184 dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
2185 const struct dzn_graphics_pipeline_variant_key *key)
2186 {
2187 if (!pipeline->variants)
2188 return pipeline->base.state;
2189
2190 struct dzn_device *device =
2191 container_of(pipeline->base.base.device, struct dzn_device, vk);
2192 struct dzn_physical_device *pdev =
2193 container_of(device->vk.physical, struct dzn_physical_device, vk);
2194
2195 struct dzn_graphics_pipeline_variant_key masked_key = { 0 };
2196
2197 if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
2198 masked_key.ib_strip_cut = key->ib_strip_cut;
2199
2200 if (!pdev->options16.DynamicDepthBiasSupported &&
2201 dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
2202 pipeline->zsa.dynamic_depth_bias)
2203 masked_key.depth_bias = key->depth_bias;
2204
2205 mask_key_for_stencil_state(pdev, pipeline, key, &masked_key);
2206
2207 struct hash_entry *he =
2208 _mesa_hash_table_search(pipeline->variants, &masked_key);
2209
2210 struct dzn_graphics_pipeline_variant *variant;
2211
2212 if (!he) {
2213 variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
2214 variant->key = masked_key;
2215
2216 uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2217 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2218 .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
2219 .pPipelineStateSubobjectStream = stream_buf,
2220 };
2221
2222 memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);
2223
2224 D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
2225 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
2226 if (ib_strip_cut)
2227 *ib_strip_cut = masked_key.ib_strip_cut;
2228
2229 D3D12_RASTERIZER_DESC *rast =
2230 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
2231 if (!pdev->options16.DynamicDepthBiasSupported && rast && pipeline->zsa.dynamic_depth_bias) {
2232 rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
2233 rast->DepthBiasClamp = masked_key.depth_bias.clamp;
2234 rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
2235 }
2236
2237 update_stencil_state(pdev, pipeline, stream_buf, &masked_key);
2238
2239 ASSERTED HRESULT hres = ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2240 &IID_ID3D12PipelineState,
2241 (void**)(&variant->state));
2242 assert(!FAILED(hres));
2243 he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
2244 assert(he);
2245 } else {
2246 variant = he->data;
2247 }
2248
2249 if (variant->state)
2250 ID3D12PipelineState_AddRef(variant->state);
2251
2252 if (pipeline->base.state)
2253 ID3D12PipelineState_Release(pipeline->base.state);
2254
2255 pipeline->base.state = variant->state;
2256 return variant->state;
2257 }
2258
2259 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4
2260
2261 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline * pipeline,struct dzn_indirect_draw_cmd_sig_key key)2262 dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
2263 struct dzn_indirect_draw_cmd_sig_key key)
2264 {
2265 assert(key.value < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
2266
2267 struct dzn_device *device = container_of(pipeline->base.base.device, struct dzn_device, vk);
2268
2269 uint32_t cmd_arg_count = 0;
2270 D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
2271 uint32_t stride = 0;
2272
2273 if (key.triangle_fan) {
2274 assert(key.indexed);
2275 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2276 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
2277 };
2278 stride += sizeof(D3D12_INDEX_BUFFER_VIEW);
2279 }
2280
2281 if (key.draw_params) {
2282 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2283 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2284 .Constant = {
2285 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2286 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
2287 .Num32BitValuesToSet = 2,
2288 },
2289 };
2290 stride += sizeof(uint32_t) * 2;
2291 }
2292
2293 if (key.draw_id) {
2294 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2295 if (pdev->options21.ExecuteIndirectTier >= D3D12_EXECUTE_INDIRECT_TIER_1_1) {
2296 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2297 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INCREMENTING_CONSTANT,
2298 .IncrementingConstant = {
2299 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2300 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
2301 },
2302 };
2303 } else {
2304 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2305 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2306 .Constant = {
2307 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2308 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
2309 .Num32BitValuesToSet = 1,
2310 },
2311 };
2312 stride += sizeof(uint32_t);
2313 }
2314 }
2315
2316 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2317 .Type = key.indexed ?
2318 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
2319 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
2320 };
2321 stride += key.indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) :
2322 sizeof(D3D12_DRAW_ARGUMENTS);
2323
2324 assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
2325 assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
2326 ID3D12CommandSignature *cmdsig = NULL;
2327
2328 if (key.custom_stride == 0 || key.custom_stride == stride)
2329 cmdsig = pipeline->indirect_cmd_sigs[key.value];
2330 else {
2331 if (!pipeline->custom_stride_cmd_sigs) {
2332 pipeline->custom_stride_cmd_sigs =
2333 _mesa_hash_table_create(NULL, gfx_pipeline_cmd_signature_key_hash, gfx_pipeline_cmd_signature_key_equal);
2334 }
2335 struct hash_entry *entry = _mesa_hash_table_search(pipeline->custom_stride_cmd_sigs, &key);
2336 if (entry)
2337 cmdsig = entry->data;
2338 }
2339
2340 if (cmdsig)
2341 return cmdsig;
2342
2343 D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
2344 .ByteStride = key.custom_stride ? key.custom_stride : stride,
2345 .NumArgumentDescs = cmd_arg_count,
2346 .pArgumentDescs = cmd_args,
2347 };
2348 /* A root signature should be specified iff root params are changing */
2349 ID3D12RootSignature *root_sig = key.draw_id || key.draw_params ?
2350 pipeline->base.root.sig : NULL;
2351 HRESULT hres =
2352 ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
2353 root_sig,
2354 &IID_ID3D12CommandSignature,
2355 (void **)&cmdsig);
2356 if (FAILED(hres))
2357 return NULL;
2358
2359 if (key.custom_stride == 0 || key.custom_stride == stride)
2360 pipeline->indirect_cmd_sigs[key.value] = cmdsig;
2361 else
2362 _mesa_hash_table_insert(pipeline->custom_stride_cmd_sigs, &key, cmdsig);
2363 return cmdsig;
2364 }
2365
2366 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2367 dzn_CreateGraphicsPipelines(VkDevice dev,
2368 VkPipelineCache pipelineCache,
2369 uint32_t count,
2370 const VkGraphicsPipelineCreateInfo *pCreateInfos,
2371 const VkAllocationCallbacks *pAllocator,
2372 VkPipeline *pPipelines)
2373 {
2374 VK_FROM_HANDLE(dzn_device, device, dev);
2375 VkResult result = VK_SUCCESS;
2376
2377 unsigned i;
2378 for (i = 0; i < count; i++) {
2379 result = dzn_graphics_pipeline_create(device,
2380 pipelineCache,
2381 &pCreateInfos[i],
2382 pAllocator,
2383 &pPipelines[i]);
2384 if (result != VK_SUCCESS) {
2385 pPipelines[i] = VK_NULL_HANDLE;
2386
2387 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2388 * is not obvious what error should be report upon 2 different failures.
2389 */
2390 if (result != VK_PIPELINE_COMPILE_REQUIRED)
2391 break;
2392
2393 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2394 break;
2395 }
2396 }
2397
2398 for (; i < count; i++)
2399 pPipelines[i] = VK_NULL_HANDLE;
2400
2401 return result;
2402 }
2403
2404 static void
dzn_compute_pipeline_destroy(struct dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)2405 dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
2406 const VkAllocationCallbacks *alloc)
2407 {
2408 if (!pipeline)
2409 return;
2410
2411 if (pipeline->indirect_cmd_sig)
2412 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);
2413
2414 dzn_pipeline_finish(&pipeline->base);
2415 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
2416 }
2417
2418 static VkResult
dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * dxil,bool * cache_hit)2419 dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
2420 uint8_t *pipeline_hash,
2421 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2422 D3D12_SHADER_BYTECODE *dxil,
2423 bool *cache_hit)
2424 {
2425 *cache_hit = false;
2426
2427 if (!cache)
2428 return VK_SUCCESS;
2429
2430 struct vk_pipeline_cache_object *cache_obj = NULL;
2431
2432 cache_obj =
2433 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
2434 &dzn_cached_blob_ops,
2435 NULL);
2436 if (!cache_obj)
2437 return VK_SUCCESS;
2438
2439 struct dzn_cached_blob *cached_blob =
2440 container_of(cache_obj, struct dzn_cached_blob, base);
2441
2442 assert(cached_blob->size == SHA1_DIGEST_LENGTH);
2443
2444 const uint8_t *dxil_hash = cached_blob->data;
2445 gl_shader_stage stage;
2446
2447 VkResult ret =
2448 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);
2449
2450 if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
2451 goto out;
2452
2453 assert(stage == MESA_SHADER_COMPUTE);
2454
2455 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
2456 *slot = *dxil;
2457 *cache_hit = true;
2458
2459 out:
2460 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2461 return ret;
2462 }
2463
2464 static void
dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,uint8_t * dxil_hash)2465 dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
2466 uint8_t *pipeline_hash,
2467 uint8_t *dxil_hash)
2468 {
2469 struct vk_pipeline_cache_object *cache_obj =
2470 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
2471 if (!cache_obj)
2472 return;
2473
2474 struct dzn_cached_blob *cached_blob =
2475 container_of(cache_obj, struct dzn_cached_blob, base);
2476
2477 memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);
2478
2479 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
2480 vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2481 }
2482
2483 static VkResult
dzn_compute_pipeline_compile_shader(struct dzn_device * device,struct dzn_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * shader,const VkComputePipelineCreateInfo * info)2484 dzn_compute_pipeline_compile_shader(struct dzn_device *device,
2485 struct dzn_compute_pipeline *pipeline,
2486 struct vk_pipeline_cache *cache,
2487 const struct dzn_pipeline_layout *layout,
2488 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2489 D3D12_SHADER_BYTECODE *shader,
2490 const VkComputePipelineCreateInfo *info)
2491 {
2492 struct dzn_physical_device *pdev =
2493 container_of(device->vk.physical, struct dzn_physical_device, vk);
2494 uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH], nir_hash[SHA1_DIGEST_LENGTH];
2495 VkResult ret = VK_SUCCESS;
2496 nir_shader *nir = NULL;
2497
2498 if (cache) {
2499 struct mesa_sha1 pipeline_hash_ctx;
2500
2501 _mesa_sha1_init(&pipeline_hash_ctx);
2502 vk_pipeline_hash_shader_stage(pipeline->base.flags, &info->stage, NULL, spirv_hash);
2503 _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
2504 _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
2505 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
2506 sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
2507 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
2508
2509 bool cache_hit = false;
2510 ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
2511 stream_desc, shader,
2512 &cache_hit);
2513 if (ret != VK_SUCCESS || cache_hit)
2514 goto out;
2515 }
2516
2517 if (cache) {
2518 struct mesa_sha1 nir_hash_ctx;
2519 _mesa_sha1_init(&nir_hash_ctx);
2520 _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
2521 _mesa_sha1_update(&nir_hash_ctx, spirv_hash, sizeof(spirv_hash));
2522 _mesa_sha1_final(&nir_hash_ctx, nir_hash);
2523 }
2524 nir_shader_compiler_options nir_opts;
2525 const unsigned supported_bit_sizes = 16 | 32 | 64;
2526 dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
2527 struct dzn_nir_options options = {
2528 .nir_opts = &nir_opts,
2529 };
2530 struct dxil_spirv_metadata metadata = { 0 };
2531 ret = dzn_pipeline_get_nir_shader(device, layout, cache, nir_hash,
2532 pipeline->base.flags, &info->stage,
2533 MESA_SHADER_COMPUTE,
2534 &options, &metadata, &nir);
2535 if (ret != VK_SUCCESS)
2536 return ret;
2537
2538 uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];
2539
2540 NIR_PASS_V(nir, adjust_var_bindings, device, layout, cache ? bindings_hash : NULL);
2541
2542 if (cache) {
2543 struct mesa_sha1 dxil_hash_ctx;
2544
2545 _mesa_sha1_init(&dxil_hash_ctx);
2546 _mesa_sha1_update(&dxil_hash_ctx, nir_hash, sizeof(nir_hash));
2547 _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
2548 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
2549 _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);
2550
2551 gl_shader_stage stage;
2552
2553 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
2554 if (ret != VK_SUCCESS)
2555 goto out;
2556
2557 if (stage != MESA_SHADER_NONE) {
2558 assert(stage == MESA_SHADER_COMPUTE);
2559 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2560 *cs = *shader;
2561 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2562 goto out;
2563 }
2564 }
2565
2566 ret = dzn_pipeline_compile_shader(device, nir, 0, shader);
2567 if (ret != VK_SUCCESS)
2568 goto out;
2569
2570 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2571 *cs = *shader;
2572
2573 if (cache) {
2574 dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
2575 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2576 }
2577
2578 out:
2579 ralloc_free(nir);
2580 return ret;
2581 }
2582
2583 static VkResult
dzn_compute_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)2584 dzn_compute_pipeline_create(struct dzn_device *device,
2585 VkPipelineCache cache,
2586 const VkComputePipelineCreateInfo *pCreateInfo,
2587 const VkAllocationCallbacks *pAllocator,
2588 VkPipeline *out)
2589 {
2590 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
2591 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
2592
2593 struct dzn_compute_pipeline *pipeline =
2594 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2595 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2596 if (!pipeline)
2597 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2598
2599 uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2600 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2601 .pPipelineStateSubobjectStream = state_buf,
2602 };
2603
2604 dzn_pipeline_init(&pipeline->base, device,
2605 VK_PIPELINE_BIND_POINT_COMPUTE,
2606 vk_compute_pipeline_create_flags(pCreateInfo),
2607 layout, &stream_desc);
2608
2609 D3D12_SHADER_BYTECODE shader = { 0 };
2610 VkResult ret =
2611 dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
2612 &stream_desc, &shader, pCreateInfo);
2613 if (ret != VK_SUCCESS)
2614 goto out;
2615
2616 if (FAILED(ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2617 &IID_ID3D12PipelineState,
2618 (void **)&pipeline->base.state)))
2619 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2620
2621 out:
2622 free((void *)shader.pShaderBytecode);
2623 if (ret != VK_SUCCESS)
2624 dzn_compute_pipeline_destroy(pipeline, pAllocator);
2625 else
2626 *out = dzn_compute_pipeline_to_handle(pipeline);
2627
2628 return ret;
2629 }
2630
2631 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline * pipeline)2632 dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
2633 {
2634 if (pipeline->indirect_cmd_sig)
2635 return pipeline->indirect_cmd_sig;
2636
2637 struct dzn_device *device =
2638 container_of(pipeline->base.base.device, struct dzn_device, vk);
2639
2640 D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
2641 {
2642 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2643 .Constant = {
2644 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2645 .DestOffsetIn32BitValues = 0,
2646 .Num32BitValuesToSet = 3,
2647 },
2648 },
2649 {
2650 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
2651 },
2652 };
2653
2654 D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
2655 .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
2656 .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
2657 .pArgumentDescs = indirect_dispatch_args,
2658 };
2659
2660 HRESULT hres =
2661 ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
2662 pipeline->base.root.sig,
2663 &IID_ID3D12CommandSignature,
2664 (void **)&pipeline->indirect_cmd_sig);
2665 if (FAILED(hres))
2666 return NULL;
2667
2668 return pipeline->indirect_cmd_sig;
2669 }
2670
2671 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2672 dzn_CreateComputePipelines(VkDevice dev,
2673 VkPipelineCache pipelineCache,
2674 uint32_t count,
2675 const VkComputePipelineCreateInfo *pCreateInfos,
2676 const VkAllocationCallbacks *pAllocator,
2677 VkPipeline *pPipelines)
2678 {
2679 VK_FROM_HANDLE(dzn_device, device, dev);
2680 VkResult result = VK_SUCCESS;
2681
2682 unsigned i;
2683 for (i = 0; i < count; i++) {
2684 result = dzn_compute_pipeline_create(device,
2685 pipelineCache,
2686 &pCreateInfos[i],
2687 pAllocator,
2688 &pPipelines[i]);
2689 if (result != VK_SUCCESS) {
2690 pPipelines[i] = VK_NULL_HANDLE;
2691
2692 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2693 * is not obvious what error should be report upon 2 different failures.
2694 */
2695 if (result != VK_PIPELINE_COMPILE_REQUIRED)
2696 break;
2697
2698 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2699 break;
2700 }
2701 }
2702
2703 for (; i < count; i++)
2704 pPipelines[i] = VK_NULL_HANDLE;
2705
2706 return result;
2707 }
2708
2709 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)2710 dzn_DestroyPipeline(VkDevice device,
2711 VkPipeline pipeline,
2712 const VkAllocationCallbacks *pAllocator)
2713 {
2714 VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
2715
2716 if (!pipe)
2717 return;
2718
2719 if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2720 struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
2721 dzn_graphics_pipeline_destroy(gfx, pAllocator);
2722 } else {
2723 assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
2724 struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
2725 dzn_compute_pipeline_destroy(compute, pAllocator);
2726 }
2727 }
2728