xref: /aosp_15_r20/external/mesa3d/src/microsoft/vulkan/dzn_pipeline.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "spirv/nir_spirv.h"
27 
28 #include "dxil_nir.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_spirv_nir.h"
31 #include "spirv_to_dxil.h"
32 
33 #include "dxil_validator.h"
34 
35 #include "vk_alloc.h"
36 #include "vk_util.h"
37 #include "vk_format.h"
38 #include "vk_pipeline.h"
39 #include "vk_pipeline_cache.h"
40 
41 #include "util/u_debug.h"
42 
43 #define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
44    __type *__desc; \
45    do { \
46       struct { \
47          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
48          __type desc; \
49       } *__wrapper; \
50       (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
51       __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
52       (__stream)->SizeInBytes += sizeof(*__wrapper); \
53       assert((__stream)->SizeInBytes <= __maxstreamsz); \
54       __wrapper->type = __id; \
55       __desc = &__wrapper->desc; \
56       memset(__desc, 0, sizeof(*__desc)); \
57    } while (0)
58 
59 #define d3d12_pipeline_state_stream_new_desc_abbrev(__stream, __maxstreamsz, __id, __type, __desc) \
60    d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id, __type, __desc)
61 
62 #define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
63    d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
64 
65 #define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
66    d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
67 
68 static bool
gfx_pipeline_variant_key_equal(const void * a,const void * b)69 gfx_pipeline_variant_key_equal(const void *a, const void *b)
70 {
71    return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
72 }
73 
74 static uint32_t
gfx_pipeline_variant_key_hash(const void * key)75 gfx_pipeline_variant_key_hash(const void *key)
76 {
77    return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
78 }
79 
80 static bool
gfx_pipeline_cmd_signature_key_equal(const void * a,const void * b)81 gfx_pipeline_cmd_signature_key_equal(const void *a, const void *b)
82 {
83    return !memcmp(a, b, sizeof(struct dzn_indirect_draw_cmd_sig_key));
84 }
85 
86 static uint32_t
gfx_pipeline_cmd_signature_key_hash(const void * key)87 gfx_pipeline_cmd_signature_key_hash(const void *key)
88 {
89    return _mesa_hash_data(key, sizeof(struct dzn_indirect_draw_cmd_sig_key));
90 }
91 
92 struct dzn_cached_blob {
93    struct vk_pipeline_cache_object base;
94    uint8_t hash[SHA1_DIGEST_LENGTH];
95    const void *data;
96    size_t size;
97 };
98 
99 static bool
dzn_cached_blob_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)100 dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
101                           struct blob *blob)
102 {
103    struct dzn_cached_blob *cached_blob =
104       container_of(object, struct dzn_cached_blob, base);
105 
106    blob_write_bytes(blob, cached_blob->data, cached_blob->size);
107    return true;
108 }
109 
110 static void
dzn_cached_blob_destroy(struct vk_device * device,struct vk_pipeline_cache_object * object)111 dzn_cached_blob_destroy(struct vk_device *device,
112                         struct vk_pipeline_cache_object *object)
113 {
114    struct dzn_cached_blob *shader =
115       container_of(object, struct dzn_cached_blob, base);
116 
117    vk_free(&device->alloc, shader);
118 }
119 
120 static struct vk_pipeline_cache_object *
121 dzn_cached_blob_create(struct vk_device *device,
122                        const void *hash,
123                        const void *data,
124                        size_t data_size);
125 
126 static struct vk_pipeline_cache_object *
dzn_cached_blob_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)127 dzn_cached_blob_deserialize(struct vk_pipeline_cache *cache,
128                             const void *key_data, size_t key_size,
129                             struct blob_reader *blob)
130 {
131    size_t data_size = blob->end - blob->current;
132    assert(key_size == SHA1_DIGEST_LENGTH);
133 
134    return dzn_cached_blob_create(cache->base.device, key_data,
135                                  blob_read_bytes(blob, data_size), data_size);
136 }
137 
138 const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
139    .serialize = dzn_cached_blob_serialize,
140    .deserialize = dzn_cached_blob_deserialize,
141    .destroy = dzn_cached_blob_destroy,
142 };
143 
144 
145 static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device * device,const void * hash,const void * data,size_t data_size)146 dzn_cached_blob_create(struct vk_device *device,
147                        const void *hash,
148                        const void *data,
149                        size_t data_size)
150 {
151    VK_MULTIALLOC(ma);
152    VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
153    VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);
154 
155    if (!vk_multialloc_alloc(&ma, &device->alloc,
156                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
157       return NULL;
158 
159    memcpy(blob->hash, hash, sizeof(blob->hash));
160 
161    vk_pipeline_cache_object_init(device, &blob->base,
162                                  &dzn_cached_blob_ops,
163                                  blob->hash, sizeof(blob->hash));
164 
165    if (data)
166       memcpy(copy, data, data_size);
167    blob->data = copy;
168    blob->size = data_size;
169 
170    return &blob->base;
171 }
172 
173 static VkResult
dzn_graphics_pipeline_prepare_for_variants(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline)174 dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
175                                            struct dzn_graphics_pipeline *pipeline)
176 {
177    if (pipeline->variants)
178       return VK_SUCCESS;
179 
180    pipeline->variants =
181       _mesa_hash_table_create(NULL,
182                               gfx_pipeline_variant_key_hash,
183                               gfx_pipeline_variant_key_equal);
184    if (!pipeline->variants)
185       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
186 
187    return VK_SUCCESS;
188 }
189 
190 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)191 to_dxil_shader_stage(VkShaderStageFlagBits in)
192 {
193    switch (in) {
194    case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
195    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
196    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
197    case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
198    case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
199    case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
200    default: unreachable("Unsupported stage");
201    }
202 }
203 
204 struct dzn_nir_options {
205    enum dxil_spirv_yz_flip_mode yz_flip_mode;
206    uint16_t y_flip_mask, z_flip_mask;
207    bool force_sample_rate_shading;
208    bool lower_view_index;
209    bool lower_view_index_to_rt_layer;
210    enum pipe_format *vi_conversions;
211    const nir_shader_compiler_options *nir_opts;
212 };
213 
214 static VkResult
dzn_pipeline_get_nir_shader(struct dzn_device * device,const struct dzn_pipeline_layout * layout,struct vk_pipeline_cache * cache,const uint8_t * hash,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage,const struct dzn_nir_options * options,struct dxil_spirv_metadata * metadata,nir_shader ** nir)215 dzn_pipeline_get_nir_shader(struct dzn_device *device,
216                             const struct dzn_pipeline_layout *layout,
217                             struct vk_pipeline_cache *cache,
218                             const uint8_t *hash,
219                             VkPipelineCreateFlags2KHR pipeline_flags,
220                             const VkPipelineShaderStageCreateInfo *stage_info,
221                             gl_shader_stage stage,
222                             const struct dzn_nir_options *options,
223                             struct dxil_spirv_metadata *metadata,
224                             nir_shader **nir)
225 {
226    if (cache) {
227       *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
228                                           options->nir_opts, NULL, NULL);
229       if (*nir) {
230          /* This bit is explicitly added into the info before caching, since this sysval wouldn't
231           * actually be present for this bit to be set by info gathering. */
232          if ((*nir)->info.stage == MESA_SHADER_VERTEX &&
233              BITSET_TEST((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX))
234             metadata->needs_draw_sysvals = true;
235          return VK_SUCCESS;
236       }
237    }
238 
239    struct dzn_physical_device *pdev =
240       container_of(device->vk.physical, struct dzn_physical_device, vk);
241    const struct spirv_to_nir_options *spirv_opts = dxil_spirv_nir_get_spirv_options();
242 
243    VkResult result =
244       vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
245                                       spirv_opts, options->nir_opts, NULL, nir);
246    if (result != VK_SUCCESS)
247       return result;
248 
249    struct dxil_spirv_runtime_conf conf = {
250       .runtime_data_cbv = {
251          .register_space = DZN_REGISTER_SPACE_SYSVALS,
252          .base_shader_register = 0,
253       },
254       .push_constant_cbv = {
255          .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
256          .base_shader_register = 0,
257       },
258       .first_vertex_and_base_instance_mode = pdev->options21.ExtendedCommandInfoSupported ?
259             DXIL_SPIRV_SYSVAL_TYPE_NATIVE : DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
260       .workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
261       .yz_flip = {
262          .mode = options->yz_flip_mode,
263          .y_mask = options->y_flip_mask,
264          .z_mask = options->z_flip_mask,
265       },
266       .declared_read_only_images_as_srvs = !device->bindless,
267       .inferred_read_only_images_as_srvs = !device->bindless,
268       .force_sample_rate_shading = options->force_sample_rate_shading,
269       .lower_view_index = options->lower_view_index,
270       .lower_view_index_to_rt_layer = options->lower_view_index_to_rt_layer,
271       .shader_model_max = dzn_get_shader_model(pdev),
272    };
273 
274    dxil_spirv_nir_passes(*nir, &conf, metadata);
275 
276    if (stage == MESA_SHADER_VERTEX) {
277       bool needs_conv = false;
278       for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
279          if (options->vi_conversions[i] != PIPE_FORMAT_NONE)
280             needs_conv = true;
281       }
282 
283       if (needs_conv)
284          NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, options->vi_conversions);
285    }
286 
287    if (cache) {
288       /* Cache this additional metadata */
289       if (metadata->needs_draw_sysvals)
290          BITSET_SET((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX);
291       vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
292    }
293 
294    return VK_SUCCESS;
295 }
296 
297 static bool
adjust_resource_index_binding(struct nir_builder * builder,nir_intrinsic_instr * intrin,void * cb_data)298 adjust_resource_index_binding(struct nir_builder *builder,
299                               nir_intrinsic_instr *intrin,
300                               void *cb_data)
301 {
302    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
303       return false;
304 
305    const struct dzn_pipeline_layout *layout = cb_data;
306    unsigned set = nir_intrinsic_desc_set(intrin);
307    unsigned binding = nir_intrinsic_binding(intrin);
308 
309    if (set >= layout->set_count ||
310        binding >= layout->binding_translation[set].binding_count)
311       return false;
312 
313    binding = layout->binding_translation[set].base_reg[binding];
314    nir_intrinsic_set_binding(intrin, binding);
315 
316    return true;
317 }
318 
319 static void
adjust_to_bindless_cb(struct dxil_spirv_binding_remapping * inout,void * context)320 adjust_to_bindless_cb(struct dxil_spirv_binding_remapping *inout, void *context)
321 {
322    const struct dzn_pipeline_layout *layout = context;
323    assert(inout->descriptor_set < layout->set_count);
324    uint32_t new_binding = layout->binding_translation[inout->descriptor_set].base_reg[inout->binding];
325    switch (layout->binding_translation[inout->descriptor_set].binding_class[inout->binding]) {
326    case DZN_PIPELINE_BINDING_DYNAMIC_BUFFER:
327       inout->descriptor_set = layout->set_count;
328       FALLTHROUGH;
329    case DZN_PIPELINE_BINDING_STATIC_SAMPLER:
330       if (inout->is_sampler) {
331          inout->descriptor_set = ~0;
332          break;
333       }
334       FALLTHROUGH;
335    case DZN_PIPELINE_BINDING_NORMAL:
336       inout->binding = new_binding;
337       break;
338    default:
339       unreachable("Invalid binding type");
340    }
341 }
342 
343 static bool
adjust_var_bindings(nir_shader * shader,struct dzn_device * device,const struct dzn_pipeline_layout * layout,uint8_t * bindings_hash)344 adjust_var_bindings(nir_shader *shader,
345                     struct dzn_device *device,
346                     const struct dzn_pipeline_layout *layout,
347                     uint8_t *bindings_hash)
348 {
349    uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
350    struct mesa_sha1 bindings_hash_ctx;
351 
352    if (bindings_hash)
353       _mesa_sha1_init(&bindings_hash_ctx);
354 
355    nir_foreach_variable_with_modes(var, shader, modes) {
356       if (var->data.mode == nir_var_uniform) {
357          const struct glsl_type *type = glsl_without_array(var->type);
358 
359          if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
360             continue;
361       }
362 
363       unsigned s = var->data.descriptor_set, b = var->data.binding;
364 
365       if (s >= layout->set_count)
366          continue;
367 
368       assert(b < layout->binding_translation[s].binding_count);
369       if (!device->bindless)
370          var->data.binding = layout->binding_translation[s].base_reg[b];
371 
372       if (bindings_hash) {
373          _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
374          _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
375          _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
376       }
377    }
378 
379    if (bindings_hash)
380       _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);
381 
382    if (device->bindless) {
383       struct dxil_spirv_nir_lower_bindless_options options = {
384          .dynamic_buffer_binding = layout->dynamic_buffer_count ? layout->set_count : ~0,
385          .num_descriptor_sets = layout->set_count,
386          .callback_context = (void *)layout,
387          .remap_binding = adjust_to_bindless_cb
388       };
389       bool ret = dxil_spirv_nir_lower_bindless(shader, &options);
390       /* We skipped remapping variable bindings in the hashing loop, but if there's static
391        * samplers still declared, we need to remap those now. */
392       nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
393          assert(glsl_type_is_sampler(glsl_without_array(var->type)));
394          var->data.binding = layout->binding_translation[var->data.descriptor_set].base_reg[var->data.binding];
395       }
396       return ret;
397    } else {
398       return nir_shader_intrinsics_pass(shader, adjust_resource_index_binding,
399                                           nir_metadata_all, (void *)layout);
400    }
401 }
402 
403 enum dxil_shader_model
dzn_get_shader_model(const struct dzn_physical_device * pdev)404    dzn_get_shader_model(const struct dzn_physical_device *pdev)
405 {
406    static_assert(D3D_SHADER_MODEL_6_0 == 0x60 && SHADER_MODEL_6_0 == 0x60000, "Validating math below");
407    static_assert(D3D_SHADER_MODEL_6_8 == 0x68 && SHADER_MODEL_6_8 == 0x60008, "Validating math below");
408    return ((pdev->shader_model & 0xf0) << 12) | (pdev->shader_model & 0xf);
409 }
410 
411 static VkResult
dzn_pipeline_compile_shader(struct dzn_device * device,nir_shader * nir,uint32_t input_clip_size,D3D12_SHADER_BYTECODE * slot)412 dzn_pipeline_compile_shader(struct dzn_device *device,
413                             nir_shader *nir,
414                             uint32_t input_clip_size,
415                             D3D12_SHADER_BYTECODE *slot)
416 {
417    struct dzn_instance *instance =
418       container_of(device->vk.physical->instance, struct dzn_instance, vk);
419    struct dzn_physical_device *pdev =
420       container_of(device->vk.physical, struct dzn_physical_device, vk);
421    struct nir_to_dxil_options opts = {
422       .environment = DXIL_ENVIRONMENT_VULKAN,
423       .lower_int16 = !pdev->options4.Native16BitShaderOpsSupported &&
424       /* Don't lower 16-bit types if they can only come from min-precision */
425          (device->vk.enabled_extensions.KHR_shader_float16_int8 ||
426           device->vk.enabled_features.shaderFloat16 ||
427           device->vk.enabled_features.shaderInt16),
428       .shader_model_max = dzn_get_shader_model(pdev),
429       .input_clip_size = input_clip_size,
430 #ifdef _WIN32
431       .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
432 #endif
433    };
434    struct blob dxil_blob;
435    VkResult result = VK_SUCCESS;
436 
437    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
438    if (instance->debug_flags & DZN_DEBUG_NIR)
439       nir_print_shader(nir, stderr);
440 
441    if (nir_to_dxil(nir, &opts, NULL, &dxil_blob)) {
442       blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
443                              (size_t *)&slot->BytecodeLength);
444    } else {
445       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
446    }
447 
448    if (dxil_blob.allocated)
449       blob_finish(&dxil_blob);
450 
451    if (result != VK_SUCCESS)
452       return result;
453 
454 #ifdef _WIN32
455    char *err;
456    bool res = dxil_validate_module(instance->dxil_validator,
457                                    (void *)slot->pShaderBytecode,
458                                    slot->BytecodeLength, &err);
459 
460    if (instance->debug_flags & DZN_DEBUG_DXIL) {
461       char *disasm = dxil_disasm_module(instance->dxil_validator,
462                                         (void *)slot->pShaderBytecode,
463                                         slot->BytecodeLength);
464       if (disasm) {
465          fprintf(stderr,
466                  "== BEGIN SHADER ============================================\n"
467                  "%s\n"
468                  "== END SHADER ==============================================\n",
469                   disasm);
470          ralloc_free(disasm);
471       }
472    }
473 
474    if (!res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
475       if (err) {
476          mesa_loge(
477                "== VALIDATION ERROR =============================================\n"
478                "%s\n"
479                "== END ==========================================================\n",
480                err);
481          ralloc_free(err);
482       }
483       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
484    }
485 #endif
486 
487    return VK_SUCCESS;
488 }
489 
490 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC * stream,gl_shader_stage in)491 dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
492                                  gl_shader_stage in)
493 {
494    switch (in) {
495    case MESA_SHADER_VERTEX: {
496       d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
497       return desc;
498    }
499    case MESA_SHADER_TESS_CTRL: {
500       d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
501       return desc;
502    }
503    case MESA_SHADER_TESS_EVAL: {
504       d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
505       return desc;
506    }
507    case MESA_SHADER_GEOMETRY: {
508       d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
509       return desc;
510    }
511    case MESA_SHADER_FRAGMENT: {
512       d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
513       return desc;
514    }
515    default: unreachable("Unsupported stage");
516    }
517 }
518 
519 struct dzn_cached_dxil_shader_header {
520    gl_shader_stage stage;
521    size_t size;
522    uint8_t data[0];
523 };
524 
525 static VkResult
dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage * stage,D3D12_SHADER_BYTECODE * bc)526 dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
527                                       const uint8_t *dxil_hash,
528                                       gl_shader_stage *stage,
529                                       D3D12_SHADER_BYTECODE *bc)
530 {
531    *stage = MESA_SHADER_NONE;
532 
533    if (!cache)
534       return VK_SUCCESS;
535 
536    struct vk_pipeline_cache_object *cache_obj = NULL;
537 
538    cache_obj =
539       vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
540                                       &dzn_cached_blob_ops,
541                                       NULL);
542    if (!cache_obj)
543       return VK_SUCCESS;
544 
545    struct dzn_cached_blob *cached_blob =
546       container_of(cache_obj, struct dzn_cached_blob, base);
547    VkResult ret = VK_SUCCESS;
548 
549    assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);
550 
551    const struct dzn_cached_dxil_shader_header *info =
552       (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
553 
554    assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
555    assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
556    assert(info->size > 0);
557 
558    void *code = malloc(info->size);
559    if (!code) {
560       ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
561       goto out;
562    }
563 
564    memcpy(code, info->data, info->size);
565 
566    bc->pShaderBytecode = code;
567    bc->BytecodeLength = info->size;
568    *stage = info->stage;
569 
570 out:
571    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
572    return ret;
573 }
574 
575 static void
dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage stage,const D3D12_SHADER_BYTECODE * bc)576 dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
577                                    const uint8_t *dxil_hash,
578                                    gl_shader_stage stage,
579                                    const D3D12_SHADER_BYTECODE *bc)
580 {
581    size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
582                  bc->BytecodeLength;
583 
584    struct vk_pipeline_cache_object *cache_obj =
585       dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
586    if (!cache_obj)
587       return;
588 
589    struct dzn_cached_blob *cached_blob =
590       container_of(cache_obj, struct dzn_cached_blob, base);
591    struct dzn_cached_dxil_shader_header *info =
592       (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
593    info->stage = stage;
594    info->size = bc->BytecodeLength;
595    memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);
596 
597    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
598    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
599 }
600 
601 struct dzn_cached_gfx_pipeline_header {
602    uint32_t stages : 30;
603    uint32_t needs_draw_sysvals : 1;
604    uint32_t rast_disabled_from_missing_position : 1;
605    uint32_t input_count;
606 };
607 
608 static VkResult
dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const uint8_t * pipeline_hash,bool * cache_hit)609 dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
610                                        struct vk_pipeline_cache *cache,
611                                        const uint8_t *pipeline_hash,
612                                        bool *cache_hit)
613 {
614    *cache_hit = false;
615 
616    if (!cache)
617       return VK_SUCCESS;
618 
619    struct vk_pipeline_cache_object *cache_obj = NULL;
620 
621    cache_obj =
622       vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
623                                       &dzn_cached_blob_ops,
624                                       NULL);
625    if (!cache_obj)
626       return VK_SUCCESS;
627 
628    struct dzn_cached_blob *cached_blob =
629       container_of(cache_obj, struct dzn_cached_blob, base);
630    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
631       &pipeline->templates.stream_desc;
632 
633    const struct dzn_cached_gfx_pipeline_header *info =
634       (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
635    size_t offset = ALIGN_POT(sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
636 
637    assert(cached_blob->size >= sizeof(*info));
638 
639    if (info->input_count > 0) {
640       const D3D12_INPUT_ELEMENT_DESC *inputs =
641          (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
642 
643       assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);
644 
645       memcpy(pipeline->templates.inputs, inputs,
646              sizeof(*inputs) * info->input_count);
647       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
648       desc->pInputElementDescs = pipeline->templates.inputs;
649       desc->NumElements = info->input_count;
650       offset += sizeof(*inputs) * info->input_count;
651    }
652 
653    assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);
654 
655    u_foreach_bit(s, info->stages) {
656       uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
657       gl_shader_stage stage;
658 
659       D3D12_SHADER_BYTECODE *slot =
660          dzn_pipeline_get_gfx_shader_slot(stream_desc, s);
661 
662       VkResult ret =
663          dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
664       if (ret != VK_SUCCESS)
665          return ret;
666 
667       assert(stage == s);
668       offset += SHA1_DIGEST_LENGTH;
669    }
670 
671    pipeline->rast_disabled_from_missing_position = info->rast_disabled_from_missing_position;
672    pipeline->needs_draw_sysvals = info->needs_draw_sysvals;
673 
674    *cache_hit = true;
675 
676    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
677    return VK_SUCCESS;
678 }
679 
680 static void
dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,uint32_t vertex_input_count,const uint8_t * pipeline_hash,const uint8_t * const * dxil_hashes)681 dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
682                                     struct vk_pipeline_cache *cache,
683                                     uint32_t vertex_input_count,
684                                     const uint8_t *pipeline_hash,
685                                     const uint8_t *const *dxil_hashes)
686 {
687    size_t offset =
688       ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
689       (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
690    uint32_t stages = 0;
691 
692    for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
693       if (pipeline->templates.shaders[i].bc) {
694          stages |= BITFIELD_BIT(i);
695          offset += SHA1_DIGEST_LENGTH;
696       }
697    }
698 
699    struct vk_pipeline_cache_object *cache_obj =
700       dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
701    if (!cache_obj)
702       return;
703 
704    struct dzn_cached_blob *cached_blob =
705       container_of(cache_obj, struct dzn_cached_blob, base);
706 
707    offset = 0;
708    struct dzn_cached_gfx_pipeline_header *info =
709       (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
710 
711    info->input_count = vertex_input_count;
712    info->stages = stages;
713    info->needs_draw_sysvals = pipeline->needs_draw_sysvals;
714    info->rast_disabled_from_missing_position = pipeline->rast_disabled_from_missing_position;
715 
716    offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
717 
718    D3D12_INPUT_ELEMENT_DESC *inputs =
719       (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
720    memcpy(inputs, pipeline->templates.inputs,
721           sizeof(*inputs) * vertex_input_count);
722    offset += sizeof(*inputs) * vertex_input_count;
723 
724    u_foreach_bit(s, stages) {
725       uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
726 
727       memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
728       offset += SHA1_DIGEST_LENGTH;
729    }
730 
731    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
732    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
733 }
734 
735 static void
dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,uint8_t * result)736 dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
737                                    enum pipe_format *vi_conversions,
738                                    uint8_t *result)
739 {
740    struct mesa_sha1 ctx;
741 
742    _mesa_sha1_init(&ctx);
743    _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
744    _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
745    _mesa_sha1_final(&ctx, result);
746 }
747 
748 static VkResult
dzn_graphics_pipeline_compile_shaders(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * out,D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,const VkGraphicsPipelineCreateInfo * info)749 dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
750                                       struct dzn_graphics_pipeline *pipeline,
751                                       struct vk_pipeline_cache *cache,
752                                       const struct dzn_pipeline_layout *layout,
753                                       D3D12_PIPELINE_STATE_STREAM_DESC *out,
754                                       D3D12_INPUT_ELEMENT_DESC *attribs,
755                                       enum pipe_format *vi_conversions,
756                                       const VkGraphicsPipelineCreateInfo *info)
757 {
758    struct dzn_physical_device *pdev =
759       container_of(device->vk.physical, struct dzn_physical_device, vk);
760    const VkPipelineViewportStateCreateInfo *vp_info =
761       info->pRasterizationState->rasterizerDiscardEnable ?
762       NULL : info->pViewportState;
763    struct {
764       const VkPipelineShaderStageCreateInfo *info;
765       uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
766       uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
767       uint8_t nir_hash[SHA1_DIGEST_LENGTH];
768       uint8_t link_hashes[SHA1_DIGEST_LENGTH][2];
769    } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
770    const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
771    uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
772    uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
773    gl_shader_stage last_raster_stage = MESA_SHADER_NONE;
774    uint32_t active_stage_mask = 0;
775    VkResult ret;
776 
777    /* First step: collect stage info in a table indexed by gl_shader_stage
778     * so we can iterate over stages in pipeline order or reverse pipeline
779     * order.
780     */
781    for (uint32_t i = 0; i < info->stageCount; i++) {
782       gl_shader_stage stage =
783          vk_to_mesa_shader_stage(info->pStages[i].stage);
784 
785       assert(stage <= MESA_SHADER_FRAGMENT);
786 
787       if ((stage == MESA_SHADER_VERTEX ||
788            stage == MESA_SHADER_TESS_EVAL ||
789            stage == MESA_SHADER_GEOMETRY) &&
790           last_raster_stage < stage)
791          last_raster_stage = stage;
792 
793       if (stage == MESA_SHADER_FRAGMENT &&
794           info->pRasterizationState &&
795           (info->pRasterizationState->rasterizerDiscardEnable ||
796            info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
797          /* Disable rasterization (AKA leave fragment shader NULL) when
798           * front+back culling or discard is set.
799           */
800          continue;
801       }
802 
803       stages[stage].info = &info->pStages[i];
804       active_stage_mask |= BITFIELD_BIT(stage);
805    }
806 
807    pipeline->use_gs_for_polygon_mode_point =
808       info->pRasterizationState &&
809       info->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT &&
810       !(active_stage_mask & (1 << MESA_SHADER_GEOMETRY));
811    if (pipeline->use_gs_for_polygon_mode_point)
812       last_raster_stage = MESA_SHADER_GEOMETRY;
813 
814    enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
815    uint16_t y_flip_mask = 0, z_flip_mask = 0;
816    bool lower_view_index =
817       !pipeline->multiview.native_view_instancing &&
818       pipeline->multiview.view_mask > 1;
819 
820    if (pipeline->vp.dynamic) {
821       yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
822    } else if (vp_info) {
823       for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
824          if (vp_info->pViewports[i].height > 0)
825             y_flip_mask |= BITFIELD_BIT(i);
826 
827          if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
828             z_flip_mask |= BITFIELD_BIT(i);
829       }
830 
831       if (y_flip_mask && z_flip_mask)
832          yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
833       else if (z_flip_mask)
834          yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
835       else if (y_flip_mask)
836          yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
837    }
838 
839    bool force_sample_rate_shading =
840       !info->pRasterizationState->rasterizerDiscardEnable &&
841       info->pMultisampleState &&
842       info->pMultisampleState->sampleShadingEnable;
843 
844    if (cache) {
845       dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);
846 
847       struct mesa_sha1 pipeline_hash_ctx;
848 
849       _mesa_sha1_init(&pipeline_hash_ctx);
850       _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
851       _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
852       _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
853       _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
854       _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
855       _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
856       _mesa_sha1_update(&pipeline_hash_ctx, &lower_view_index, sizeof(lower_view_index));
857       _mesa_sha1_update(&pipeline_hash_ctx, &pipeline->use_gs_for_polygon_mode_point, sizeof(pipeline->use_gs_for_polygon_mode_point));
858 
859       u_foreach_bit(stage, active_stage_mask) {
860          const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
861             (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
862             vk_find_struct_const(stages[stage].info->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
863          enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
864             subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;
865 
866          vk_pipeline_hash_shader_stage(pipeline->base.flags, stages[stage].info, NULL, stages[stage].spirv_hash);
867          _mesa_sha1_update(&pipeline_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
868          _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
869          _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
870       }
871       _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
872 
873       bool cache_hit;
874       ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
875                                                    &cache_hit);
876       if (ret != VK_SUCCESS)
877          return ret;
878 
879       if (cache_hit)
880          return VK_SUCCESS;
881    }
882 
883    /* Second step: get NIR shaders for all stages. */
884    nir_shader_compiler_options nir_opts;
885    unsigned supported_bit_sizes = (pdev->options4.Native16BitShaderOpsSupported ? 16 : 0) | 32 | 64;
886    dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
887    nir_opts.lower_base_vertex = true;
888    u_foreach_bit(stage, active_stage_mask) {
889       struct mesa_sha1 nir_hash_ctx;
890 
891       if (cache) {
892          _mesa_sha1_init(&nir_hash_ctx);
893          _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
894          if (stage != MESA_SHADER_FRAGMENT) {
895             _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
896             _mesa_sha1_update(&nir_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
897          }
898          if (stage == MESA_SHADER_VERTEX)
899             _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
900          if (stage == last_raster_stage) {
901             _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
902             _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
903             _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
904             _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
905          }
906          _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
907          _mesa_sha1_final(&nir_hash_ctx, stages[stage].nir_hash);
908       }
909 
910       struct dzn_nir_options options = {
911          .yz_flip_mode = stage == last_raster_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
912          .y_flip_mask = y_flip_mask,
913          .z_flip_mask = z_flip_mask,
914          .force_sample_rate_shading = stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
915          .lower_view_index = lower_view_index,
916          .lower_view_index_to_rt_layer = stage == last_raster_stage ? lower_view_index : false,
917          .vi_conversions = vi_conversions,
918          .nir_opts = &nir_opts,
919       };
920 
921       struct dxil_spirv_metadata metadata = { 0 };
922       ret = dzn_pipeline_get_nir_shader(device, layout,
923                                         cache, stages[stage].nir_hash,
924                                         pipeline->base.flags,
925                                         stages[stage].info, stage,
926                                         &options, &metadata,
927                                         &pipeline->templates.shaders[stage].nir);
928       if (ret != VK_SUCCESS)
929          return ret;
930 
931       if (stage == MESA_SHADER_VERTEX)
932          pipeline->needs_draw_sysvals = metadata.needs_draw_sysvals;
933    }
934 
935    if (pipeline->use_gs_for_polygon_mode_point) {
936       /* TODO: Cache; handle TES */
937       struct dzn_nir_point_gs_info gs_info = {
938          .cull_mode = info->pRasterizationState->cullMode,
939          .front_ccw = info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE,
940          .depth_bias = info->pRasterizationState->depthBiasEnable,
941          .depth_bias_dynamic = pipeline->zsa.dynamic_depth_bias,
942          .ds_fmt = pipeline->zsa.ds_fmt,
943          .constant_depth_bias = info->pRasterizationState->depthBiasConstantFactor,
944          .slope_scaled_depth_bias = info->pRasterizationState->depthBiasSlopeFactor,
945          .depth_bias_clamp = info->pRasterizationState->depthBiasClamp,
946          .runtime_data_cbv = {
947             .register_space = DZN_REGISTER_SPACE_SYSVALS,
948             .base_shader_register = 0,
949          }
950       };
951       pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir =
952          dzn_nir_polygon_point_mode_gs(pipeline->templates.shaders[MESA_SHADER_VERTEX].nir,
953                                        &gs_info);
954 
955       struct dxil_spirv_runtime_conf conf = {
956          .runtime_data_cbv = {
957             .register_space = DZN_REGISTER_SPACE_SYSVALS,
958             .base_shader_register = 0,
959          },
960          .yz_flip = {
961             .mode = yz_flip_mode,
962             .y_mask = y_flip_mask,
963             .z_mask = z_flip_mask,
964          },
965       };
966 
967       bool requires_runtime_data;
968       NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir, dxil_spirv_nir_lower_yz_flip,
969                  &conf, &requires_runtime_data);
970 
971       active_stage_mask |= (1 << MESA_SHADER_GEOMETRY);
972       memcpy(stages[MESA_SHADER_GEOMETRY].spirv_hash, stages[MESA_SHADER_VERTEX].spirv_hash, SHA1_DIGEST_LENGTH);
973 
974       if ((active_stage_mask & (1 << MESA_SHADER_FRAGMENT)) &&
975           BITSET_TEST(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE))
976          NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir, dxil_nir_forward_front_face);
977    }
978 
979    /* Third step: link those NIR shaders. We iterate in reverse order
980     * so we can eliminate outputs that are never read by the next stage.
981     */
982    uint32_t link_mask = active_stage_mask;
983    while (link_mask != 0) {
984       gl_shader_stage stage = util_last_bit(link_mask) - 1;
985       link_mask &= ~BITFIELD_BIT(stage);
986       gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;
987 
988       struct dxil_spirv_runtime_conf conf = {
989          .runtime_data_cbv = {
990             .register_space = DZN_REGISTER_SPACE_SYSVALS,
991             .base_shader_register = 0,
992       }};
993 
994       assert(pipeline->templates.shaders[stage].nir);
995       struct dxil_spirv_metadata metadata = { 0 };
996       dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
997                           prev_stage != MESA_SHADER_NONE ?
998                           pipeline->templates.shaders[prev_stage].nir : NULL,
999                           &conf, &metadata);
1000 
1001       if (prev_stage != MESA_SHADER_NONE) {
1002          memcpy(stages[stage].link_hashes[0], stages[prev_stage].spirv_hash, SHA1_DIGEST_LENGTH);
1003          memcpy(stages[prev_stage].link_hashes[1], stages[stage].spirv_hash, SHA1_DIGEST_LENGTH);
1004       }
1005    }
1006 
1007    u_foreach_bit(stage, active_stage_mask) {
1008       uint8_t bindings_hash[SHA1_DIGEST_LENGTH];
1009 
1010       NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, device, layout,
1011                  cache ? bindings_hash : NULL);
1012 
1013       if (cache) {
1014          struct mesa_sha1 dxil_hash_ctx;
1015 
1016          _mesa_sha1_init(&dxil_hash_ctx);
1017          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].nir_hash, sizeof(stages[stage].nir_hash));
1018          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
1019          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[0], sizeof(stages[stage].link_hashes[0]));
1020          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[1], sizeof(stages[stage].link_hashes[1]));
1021          _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
1022          _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
1023          dxil_hashes[stage] = stages[stage].dxil_hash;
1024 
1025          gl_shader_stage cached_stage;
1026          D3D12_SHADER_BYTECODE bc;
1027          ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
1028          if (ret != VK_SUCCESS)
1029             return ret;
1030 
1031          if (cached_stage != MESA_SHADER_NONE) {
1032             assert(cached_stage == stage);
1033             D3D12_SHADER_BYTECODE *slot =
1034                dzn_pipeline_get_gfx_shader_slot(out, stage);
1035             *slot = bc;
1036             pipeline->templates.shaders[stage].bc = slot;
1037          }
1038       }
1039    }
1040 
1041    uint32_t vert_input_count = 0;
1042    if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1043       /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
1044        * we can handle location overlaps properly.
1045        */
1046       nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1047          assert(var->data.location >= VERT_ATTRIB_GENERIC0);
1048          unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
1049          assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
1050          assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);
1051 
1052          pipeline->templates.inputs[vert_input_count] = attribs[loc];
1053          pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
1054          var->data.driver_location = vert_input_count++;
1055       }
1056 
1057       if (vert_input_count > 0) {
1058          d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
1059          desc->pInputElementDescs = pipeline->templates.inputs;
1060          desc->NumElements = vert_input_count;
1061       }
1062    }
1063 
1064    /* Last step: translate NIR shaders into DXIL modules */
1065    u_foreach_bit(stage, active_stage_mask) {
1066       gl_shader_stage prev_stage =
1067          util_last_bit(active_stage_mask & BITFIELD_MASK(stage)) - 1;
1068       uint32_t prev_stage_output_clip_size = 0;
1069       if (stage == MESA_SHADER_FRAGMENT) {
1070          /* Disable rasterization if the last geometry stage doesn't
1071           * write the position.
1072           */
1073          if (prev_stage == MESA_SHADER_NONE ||
1074              !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS)) {
1075             pipeline->rast_disabled_from_missing_position = true;
1076             /* Clear a cache hit if there was one. */
1077             pipeline->templates.shaders[stage].bc = NULL;
1078             continue;
1079          }
1080       } else if (prev_stage != MESA_SHADER_NONE) {
1081          prev_stage_output_clip_size = pipeline->templates.shaders[prev_stage].nir->info.clip_distance_array_size;
1082       }
1083 
1084       /* Cache hit, we can skip the compilation. */
1085       if (pipeline->templates.shaders[stage].bc)
1086          continue;
1087 
1088       D3D12_SHADER_BYTECODE *slot =
1089          dzn_pipeline_get_gfx_shader_slot(out, stage);
1090 
1091       ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, prev_stage_output_clip_size, slot);
1092       if (ret != VK_SUCCESS)
1093          return ret;
1094 
1095       pipeline->templates.shaders[stage].bc = slot;
1096 
1097       if (cache)
1098          dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
1099    }
1100 
1101    if (cache)
1102       dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
1103                                           dxil_hashes);
1104 
1105    return VK_SUCCESS;
1106 }
1107 
1108 VkFormat
dzn_graphics_pipeline_patch_vi_format(VkFormat format)1109 dzn_graphics_pipeline_patch_vi_format(VkFormat format)
1110 {
1111    switch (format) {
1112    case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
1113    case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1114    case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1115    case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1116    case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
1117    case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1118    case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1119       return VK_FORMAT_R32_UINT;
1120    case VK_FORMAT_R8G8B8A8_SSCALED:
1121       return VK_FORMAT_R8G8B8A8_SINT;
1122    case VK_FORMAT_R8G8B8A8_USCALED:
1123       return VK_FORMAT_R8G8B8A8_UINT;
1124    case VK_FORMAT_R16G16B16A16_USCALED:
1125       return VK_FORMAT_R16G16B16A16_UINT;
1126    case VK_FORMAT_R16G16B16A16_SSCALED:
1127       return VK_FORMAT_R16G16B16A16_SINT;
1128    default:
1129       return format;
1130    }
1131 }
1132 
1133 static VkResult
dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC * inputs,enum pipe_format * vi_conversions)1134 dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
1135                                    const VkGraphicsPipelineCreateInfo *in,
1136                                    D3D12_INPUT_ELEMENT_DESC *inputs,
1137                                    enum pipe_format *vi_conversions)
1138 {
1139    const VkPipelineVertexInputStateCreateInfo *in_vi =
1140       in->pVertexInputState;
1141    const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
1142       (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
1143       vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1144 
1145    if (!in_vi->vertexAttributeDescriptionCount)
1146       return VK_SUCCESS;
1147 
1148    D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
1149 
1150    pipeline->vb.count = 0;
1151    for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
1152       const struct VkVertexInputBindingDescription *bdesc =
1153          &in_vi->pVertexBindingDescriptions[i];
1154 
1155       pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
1156       pipeline->vb.strides[bdesc->binding] = bdesc->stride;
1157       if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
1158          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
1159       } else {
1160          assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
1161          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
1162       }
1163    }
1164 
1165    for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
1166       const VkVertexInputAttributeDescription *attr =
1167          &in_vi->pVertexAttributeDescriptions[i];
1168       const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;
1169 
1170       if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
1171           divisors) {
1172          for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
1173             if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
1174                divisor = &divisors->pVertexBindingDivisors[d];
1175                break;
1176             }
1177          }
1178       }
1179 
1180       VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
1181       if (patched_format != attr->format)
1182          vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);
1183 
1184       /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
1185       inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
1186          .SemanticName = "TEXCOORD",
1187          .Format = dzn_buffer_get_dxgi_format(patched_format),
1188          .InputSlot = attr->binding,
1189          .InputSlotClass = slot_class[attr->binding],
1190          .InstanceDataStepRate =
1191             divisor ? divisor->divisor :
1192             slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
1193          .AlignedByteOffset = attr->offset,
1194       };
1195    }
1196 
1197    return VK_SUCCESS;
1198 }
1199 
1200 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)1201 to_prim_topology_type(VkPrimitiveTopology in)
1202 {
1203    switch (in) {
1204    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
1205       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
1206    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
1207    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
1208    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
1209    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
1210       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
1211    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
1212    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
1213    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
1214    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
1215    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
1216       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
1217    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1218       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
1219    default: unreachable("Invalid primitive topology");
1220    }
1221 }
1222 
1223 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points,bool support_triangle_fan)1224 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points, bool support_triangle_fan)
1225 {
1226    switch (in) {
1227    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
1228    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
1229    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
1230    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
1231    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
1232    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1233    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
1234    /* Triangle fans are emulated using an intermediate index buffer. */
1235    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return support_triangle_fan ?
1236       D3D_PRIMITIVE_TOPOLOGY_TRIANGLEFAN : D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1237    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
1238    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
1239    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1240       assert(patch_control_points);
1241       return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
1242    default: unreachable("Invalid primitive topology");
1243    }
1244 }
1245 
1246 static VkResult
dzn_graphics_pipeline_translate_ia(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1247 dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
1248                                    struct dzn_graphics_pipeline *pipeline,
1249                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
1250                                    const VkGraphicsPipelineCreateInfo *in)
1251 {
1252    struct dzn_physical_device *pdev =
1253       container_of(device->vk.physical, struct dzn_physical_device, vk);
1254    const VkPipelineInputAssemblyStateCreateInfo *in_ia =
1255       in->pInputAssemblyState;
1256    bool has_tes = false;
1257    for (uint32_t i = 0; i < in->stageCount; i++) {
1258       if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
1259           in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1260          has_tes = true;
1261          break;
1262       }
1263    }
1264    const VkPipelineTessellationStateCreateInfo *in_tes =
1265       has_tes ? in->pTessellationState : NULL;
1266    VkResult ret = VK_SUCCESS;
1267 
1268    d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
1269    *prim_top_type = to_prim_topology_type(in_ia->topology);
1270    pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN && !pdev->options15.TriangleFanSupported;
1271    pipeline->ia.topology =
1272       to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0,
1273                        pdev->options15.TriangleFanSupported);
1274 
1275    if (in_ia->primitiveRestartEnable) {
1276       d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
1277       pipeline->templates.desc_offsets.ib_strip_cut =
1278          (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
1279       *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1280       ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1281    }
1282 
1283    return ret;
1284 }
1285 
1286 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)1287 translate_polygon_mode(VkPolygonMode in)
1288 {
1289    switch (in) {
1290    case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
1291    case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
1292    case VK_POLYGON_MODE_POINT:
1293       /* This is handled elsewhere */
1294       return D3D12_FILL_MODE_SOLID;
1295    default: unreachable("Unsupported polygon mode");
1296    }
1297 }
1298 
1299 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)1300 translate_cull_mode(VkCullModeFlags in)
1301 {
1302    switch (in) {
1303    case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
1304    case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
1305    case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
1306    /* Front+back face culling is equivalent to 'rasterization disabled' */
1307    case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
1308    default: unreachable("Unsupported cull mode");
1309    }
1310 }
1311 
1312 static int32_t
translate_depth_bias(double depth_bias)1313 translate_depth_bias(double depth_bias)
1314 {
1315    if (depth_bias > INT32_MAX)
1316       return INT32_MAX;
1317    else if (depth_bias < INT32_MIN)
1318       return INT32_MIN;
1319 
1320    return depth_bias;
1321 }
1322 
1323 static void
dzn_graphics_pipeline_translate_rast(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1324 dzn_graphics_pipeline_translate_rast(struct dzn_device *device,
1325                                      struct dzn_graphics_pipeline *pipeline,
1326                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
1327                                      const VkGraphicsPipelineCreateInfo *in)
1328 {
1329    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
1330    const VkPipelineRasterizationStateCreateInfo *in_rast =
1331       in->pRasterizationState;
1332    const VkPipelineViewportStateCreateInfo *in_vp =
1333       in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
1334    const VkPipelineMultisampleStateCreateInfo *in_ms =
1335       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1336 
1337    if (in_vp) {
1338       pipeline->vp.count = in_vp->viewportCount;
1339       if (in_vp->pViewports) {
1340          for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
1341             dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
1342       }
1343 
1344       pipeline->scissor.count = in_vp->scissorCount;
1345       if (in_vp->pScissors) {
1346          for (uint32_t i = 0; i < in_vp->scissorCount; i++)
1347             dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
1348       }
1349    }
1350 
1351    if (pdev->options19.NarrowQuadrilateralLinesSupported) {
1352       assert(pdev->options16.DynamicDepthBiasSupported);
1353       d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER2, D3D12_RASTERIZER_DESC2, desc);
1354       pipeline->templates.desc_offsets.rast =
1355          (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1356       desc->DepthClipEnable = !in_rast->depthClampEnable;
1357       desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1358       desc->CullMode = translate_cull_mode(in_rast->cullMode);
1359       desc->FrontCounterClockwise =
1360          in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1361       if (in_rast->depthBiasEnable) {
1362          desc->DepthBias = in_rast->depthBiasConstantFactor;
1363          desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1364          desc->DepthBiasClamp = in_rast->depthBiasClamp;
1365       }
1366       desc->LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_QUADRILATERAL_NARROW;
1367    } else {
1368       static_assert(sizeof(D3D12_RASTERIZER_DESC) == sizeof(D3D12_RASTERIZER_DESC1), "Casting between these");
1369       D3D12_PIPELINE_STATE_SUBOBJECT_TYPE rast_type = pdev->options16.DynamicDepthBiasSupported ?
1370          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1 :
1371          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER;
1372       d3d12_pipeline_state_stream_new_desc(out, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, rast_type, D3D12_RASTERIZER_DESC, desc);
1373       pipeline->templates.desc_offsets.rast =
1374          (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1375       desc->DepthClipEnable = !in_rast->depthClampEnable;
1376       desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1377       desc->CullMode = translate_cull_mode(in_rast->cullMode);
1378       desc->FrontCounterClockwise =
1379          in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1380       if (in_rast->depthBiasEnable) {
1381          if (rast_type == D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1)
1382             ((D3D12_RASTERIZER_DESC1 *)desc)->DepthBias = in_rast->depthBiasConstantFactor;
1383          else
1384             desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
1385          desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1386          desc->DepthBiasClamp = in_rast->depthBiasClamp;
1387       }
1388 
1389       /* The Vulkan conformance tests use different reference rasterizers for single-sampled
1390        * and multi-sampled lines. The single-sampled lines can be bresenham lines, but multi-
1391        * sampled need to be quadrilateral lines. This still isn't *quite* sufficient, because
1392        * D3D only supports a line width of 1.4 (per spec), but Vulkan requires us to support
1393        * 1.0 (and without claiming wide lines, that's all we can support).
1394        */
1395       if (in_ms && in_ms->rasterizationSamples > 1)
1396          desc->MultisampleEnable = true;
1397    }
1398 
1399    assert(in_rast->lineWidth == 1.0f);
1400 }
1401 
1402 static void
dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1403 dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
1404                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
1405                                    const VkGraphicsPipelineCreateInfo *in)
1406 {
1407    const VkPipelineRasterizationStateCreateInfo *in_rast =
1408       in->pRasterizationState;
1409    const VkPipelineMultisampleStateCreateInfo *in_ms =
1410       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1411 
1412    if (!in_ms)
1413       return;
1414 
1415    /* TODO: minSampleShading (use VRS), alphaToOneEnable */
1416    d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
1417    desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
1418    desc->Quality = 0;
1419 
1420    if (!in_ms->pSampleMask)
1421       return;
1422 
1423    d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
1424    *mask = *in_ms->pSampleMask;
1425 }
1426 
1427 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)1428 translate_stencil_op(VkStencilOp in)
1429 {
1430    switch (in) {
1431    case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
1432    case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
1433    case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
1434    case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
1435    case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
1436    case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
1437    case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
1438    case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
1439    default: unreachable("Invalid stencil op");
1440    }
1441 }
1442 
1443 static void
translate_stencil_test(struct dzn_graphics_pipeline * pipeline,D3D12_DEPTH_STENCIL_DESC2 * out,const VkGraphicsPipelineCreateInfo * in)1444 translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
1445                        D3D12_DEPTH_STENCIL_DESC2 *out,
1446                        const VkGraphicsPipelineCreateInfo *in)
1447 {
1448    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1449       in->pDepthStencilState;
1450 
1451    bool front_test_uses_ref =
1452       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1453       in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1454       in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1455       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1456        in_zsa->front.compareMask != 0);
1457    bool back_test_uses_ref =
1458       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1459       in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1460       in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1461       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1462        in_zsa->back.compareMask != 0);
1463 
1464    if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1465       pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
1466    else if (front_test_uses_ref)
1467       pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
1468    else
1469       pipeline->zsa.stencil_test.front.compare_mask = 0;
1470 
1471    if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1472       pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
1473    else if (back_test_uses_ref)
1474       pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
1475    else
1476       pipeline->zsa.stencil_test.back.compare_mask = 0;
1477 
1478    bool back_wr_uses_ref =
1479       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1480       ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1481         in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
1482        (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1483         (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1484         in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
1485        (in_zsa->depthTestEnable &&
1486         in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1487         in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
1488    bool front_wr_uses_ref =
1489       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1490       ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1491         in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
1492        (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1493         (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1494         in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
1495        (in_zsa->depthTestEnable &&
1496         in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1497         in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));
1498 
1499    pipeline->zsa.stencil_test.front.write_mask =
1500       (pipeline->zsa.stencil_test.dynamic_write_mask ||
1501        (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
1502       0 : in_zsa->front.writeMask;
1503    pipeline->zsa.stencil_test.back.write_mask =
1504       (pipeline->zsa.stencil_test.dynamic_write_mask ||
1505        (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
1506       0 : in_zsa->back.writeMask;
1507 
1508    pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
1509    pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
1510 
1511    pipeline->zsa.stencil_test.front.ref =
1512       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
1513    pipeline->zsa.stencil_test.back.ref =
1514       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
1515 
1516    out->FrontFace.StencilReadMask = pipeline->zsa.stencil_test.front.compare_mask;
1517    out->BackFace.StencilReadMask = pipeline->zsa.stencil_test.back.compare_mask;
1518    out->FrontFace.StencilWriteMask = pipeline->zsa.stencil_test.front.write_mask;
1519    out->BackFace.StencilWriteMask = pipeline->zsa.stencil_test.back.write_mask;
1520 }
1521 
1522 static void
dzn_graphics_pipeline_translate_zsa(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1523 dzn_graphics_pipeline_translate_zsa(struct dzn_device *device,
1524                                     struct dzn_graphics_pipeline *pipeline,
1525                                     D3D12_PIPELINE_STATE_STREAM_DESC *out,
1526                                     const VkGraphicsPipelineCreateInfo *in)
1527 {
1528    struct dzn_physical_device *pdev =
1529       container_of(device->vk.physical, struct dzn_physical_device, vk);
1530 
1531    const VkPipelineRasterizationStateCreateInfo *in_rast =
1532       in->pRasterizationState;
1533    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1534       in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
1535    const VkPipelineRenderingCreateInfo *ri = vk_find_struct_const(in, PIPELINE_RENDERING_CREATE_INFO);
1536 
1537    if (!in_zsa ||
1538        in_rast->cullMode == VK_CULL_MODE_FRONT_AND_BACK) {
1539       /* Ensure depth is disabled if the rasterizer should be disabled / everything culled */
1540       if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1541          d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1542          pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1543          memset(stream_desc, 0, sizeof(*stream_desc));
1544       } else {
1545          d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1546          pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1547          memset(stream_desc, 0, sizeof(*stream_desc));
1548       }
1549       return;
1550    }
1551 
1552    D3D12_DEPTH_STENCIL_DESC2 desc;
1553    memset(&desc, 0, sizeof(desc));
1554 
1555    bool has_no_depth = ri && ri->depthAttachmentFormat == VK_FORMAT_UNDEFINED;
1556    bool has_no_stencil = ri && ri->stencilAttachmentFormat == VK_FORMAT_UNDEFINED;
1557 
1558    desc.DepthEnable = !has_no_depth &&
1559       (in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable);
1560    if (desc.DepthEnable) {
1561       desc.DepthWriteMask =
1562          in_zsa->depthWriteEnable ?
1563          D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
1564       desc.DepthFunc =
1565          in_zsa->depthTestEnable ?
1566          dzn_translate_compare_op(in_zsa->depthCompareOp) :
1567          D3D12_COMPARISON_FUNC_ALWAYS;
1568    }
1569    pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
1570    pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
1571    pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
1572    desc.DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
1573    desc.StencilEnable = in_zsa->stencilTestEnable && !has_no_stencil;
1574    if (desc.StencilEnable) {
1575       desc.FrontFace.StencilFailOp = translate_stencil_op(in_zsa->front.failOp);
1576       desc.FrontFace.StencilDepthFailOp = translate_stencil_op(in_zsa->front.depthFailOp);
1577       desc.FrontFace.StencilPassOp = translate_stencil_op(in_zsa->front.passOp);
1578       desc.FrontFace.StencilFunc = dzn_translate_compare_op(in_zsa->front.compareOp);
1579       desc.BackFace.StencilFailOp = translate_stencil_op(in_zsa->back.failOp);
1580       desc.BackFace.StencilDepthFailOp = translate_stencil_op(in_zsa->back.depthFailOp);
1581       desc.BackFace.StencilPassOp = translate_stencil_op(in_zsa->back.passOp);
1582       desc.BackFace.StencilFunc = dzn_translate_compare_op(in_zsa->back.compareOp);
1583 
1584       pipeline->zsa.stencil_test.enable = true;
1585 
1586       translate_stencil_test(pipeline, &desc, in);
1587    }
1588 
1589    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1590       d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1591       pipeline->templates.desc_offsets.ds =
1592          (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1593       *stream_desc = desc;
1594    } else {
1595       d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1596       pipeline->templates.desc_offsets.ds =
1597          (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1598 
1599       stream_desc->DepthEnable = desc.DepthEnable;
1600       stream_desc->DepthWriteMask = desc.DepthWriteMask;
1601       stream_desc->DepthFunc = desc.DepthFunc;
1602       stream_desc->DepthBoundsTestEnable = desc.DepthBoundsTestEnable;
1603       stream_desc->StencilEnable = desc.StencilEnable;
1604       stream_desc->FrontFace.StencilFailOp = desc.FrontFace.StencilFailOp;
1605       stream_desc->FrontFace.StencilDepthFailOp = desc.FrontFace.StencilDepthFailOp;
1606       stream_desc->FrontFace.StencilPassOp = desc.FrontFace.StencilPassOp;
1607       stream_desc->FrontFace.StencilFunc = desc.FrontFace.StencilFunc;
1608       stream_desc->BackFace.StencilFailOp = desc.BackFace.StencilFailOp;
1609       stream_desc->BackFace.StencilDepthFailOp = desc.BackFace.StencilDepthFailOp;
1610       stream_desc->BackFace.StencilPassOp = desc.BackFace.StencilPassOp;
1611       stream_desc->BackFace.StencilFunc = desc.BackFace.StencilFunc;
1612 
1613       /* No support for independent front/back, just pick front (if set, else back) */
1614       stream_desc->StencilReadMask = desc.FrontFace.StencilReadMask ? desc.FrontFace.StencilReadMask : desc.BackFace.StencilReadMask;
1615       stream_desc->StencilWriteMask = desc.FrontFace.StencilWriteMask ? desc.FrontFace.StencilWriteMask : desc.BackFace.StencilWriteMask;
1616    }
1617 }
1618 
1619 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in,bool is_alpha,bool support_alpha_blend_factor)1620 translate_blend_factor(VkBlendFactor in, bool is_alpha, bool support_alpha_blend_factor)
1621 {
1622    switch (in) {
1623    case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
1624    case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
1625    case VK_BLEND_FACTOR_SRC_COLOR:
1626       return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
1627    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1628       return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
1629    case VK_BLEND_FACTOR_DST_COLOR:
1630       return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
1631    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1632       return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
1633    case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
1634    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
1635    case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
1636    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
1637    case VK_BLEND_FACTOR_CONSTANT_COLOR:
1638       return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1639    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1640       return support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1641    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1642       return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1643    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1644       return support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1645    case VK_BLEND_FACTOR_SRC1_COLOR:
1646       return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
1647    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
1648       return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
1649    case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
1650    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
1651    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
1652    default: unreachable("Invalid blend factor");
1653    }
1654 }
1655 
1656 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)1657 translate_blend_op(VkBlendOp in)
1658 {
1659    switch (in) {
1660    case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
1661    case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
1662    case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
1663    case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
1664    case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
1665    default: unreachable("Invalid blend op");
1666    }
1667 }
1668 
1669 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)1670 translate_logic_op(VkLogicOp in)
1671 {
1672    switch (in) {
1673    case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
1674    case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
1675    case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
1676    case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
1677    case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
1678    case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
1679    case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
1680    case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
1681    case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
1682    case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
1683    case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
1684    case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
1685    case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
1686    case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
1687    case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
1688    case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
1689    default: unreachable("Invalid logic op");
1690    }
1691 }
1692 
1693 static void
dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1694 dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
1695                                       D3D12_PIPELINE_STATE_STREAM_DESC *out,
1696                                       const VkGraphicsPipelineCreateInfo *in)
1697 {
1698    const VkPipelineRasterizationStateCreateInfo *in_rast =
1699       in->pRasterizationState;
1700    const VkPipelineColorBlendStateCreateInfo *in_blend =
1701       in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
1702    const VkPipelineMultisampleStateCreateInfo *in_ms =
1703       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1704 
1705    if (!in_blend || !in_ms)
1706       return;
1707 
1708    struct dzn_device *device =
1709       container_of(pipeline->base.base.device, struct dzn_device, vk);
1710    struct dzn_physical_device *pdev =
1711       container_of(device->vk.physical, struct dzn_physical_device, vk);
1712    bool support_alpha_blend_factor = pdev->options13.AlphaBlendFactorSupported;
1713 
1714    d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
1715    D3D12_LOGIC_OP logicop =
1716       in_blend->logicOpEnable ?
1717       translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
1718    desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
1719    memcpy(pipeline->blend.constants, in_blend->blendConstants,
1720           sizeof(pipeline->blend.constants));
1721 
1722    for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
1723       if (i > 0 &&
1724           memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
1725                  sizeof(*in_blend->pAttachments)) != 0)
1726          desc->IndependentBlendEnable = true;
1727 
1728       desc->RenderTarget[i].BlendEnable =
1729          in_blend->pAttachments[i].blendEnable;
1730       desc->RenderTarget[i].RenderTargetWriteMask =
1731          in_blend->pAttachments[i].colorWriteMask;
1732 
1733       if (in_blend->logicOpEnable) {
1734          desc->RenderTarget[i].LogicOpEnable = true;
1735          desc->RenderTarget[i].LogicOp = logicop;
1736       } else {
1737          desc->RenderTarget[i].SrcBlend =
1738             translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false, support_alpha_blend_factor);
1739          desc->RenderTarget[i].DestBlend =
1740             translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false, support_alpha_blend_factor);
1741          desc->RenderTarget[i].BlendOp =
1742             translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
1743          desc->RenderTarget[i].SrcBlendAlpha =
1744             translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true, support_alpha_blend_factor);
1745          desc->RenderTarget[i].DestBlendAlpha =
1746             translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true, support_alpha_blend_factor);
1747          desc->RenderTarget[i].BlendOpAlpha =
1748             translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
1749       }
1750    }
1751 }
1752 
1753 
1754 static void
dzn_pipeline_init(struct dzn_pipeline * pipeline,struct dzn_device * device,VkPipelineBindPoint type,VkPipelineCreateFlags2KHR flags,struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc)1755 dzn_pipeline_init(struct dzn_pipeline *pipeline,
1756                   struct dzn_device *device,
1757                   VkPipelineBindPoint type,
1758                   VkPipelineCreateFlags2KHR flags,
1759                   struct dzn_pipeline_layout *layout,
1760                   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
1761 {
1762    pipeline->type = type;
1763    pipeline->flags = flags;
1764    pipeline->root.sets_param_count = layout->root.sets_param_count;
1765    pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
1766    pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
1767    pipeline->root.dynamic_buffer_bindless_param_idx = layout->root.dynamic_buffer_bindless_param_idx;
1768    STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
1769    memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
1770    pipeline->root.sig = layout->root.sig;
1771    ID3D12RootSignature_AddRef(pipeline->root.sig);
1772 
1773    STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
1774    memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
1775 
1776    STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
1777    memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
1778    pipeline->set_count = layout->set_count;
1779    pipeline->dynamic_buffer_count = layout->dynamic_buffer_count;
1780    vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1781 
1782    ASSERTED uint32_t max_streamsz =
1783       type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
1784       MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
1785       MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;
1786 
1787    d3d12_pipeline_state_stream_new_desc_abbrev(stream_desc, max_streamsz, ROOT_SIGNATURE,
1788                                                ID3D12RootSignature *, root_sig);
1789    *root_sig = pipeline->root.sig;
1790 }
1791 
1792 static void
dzn_pipeline_finish(struct dzn_pipeline * pipeline)1793 dzn_pipeline_finish(struct dzn_pipeline *pipeline)
1794 {
1795    if (pipeline->state)
1796       ID3D12PipelineState_Release(pipeline->state);
1797    if (pipeline->root.sig)
1798       ID3D12RootSignature_Release(pipeline->root.sig);
1799 
1800    vk_object_base_finish(&pipeline->base);
1801 }
1802 
dzn_graphics_pipeline_delete_variant(struct hash_entry * he)1803 static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
1804 {
1805    struct dzn_graphics_pipeline_variant *variant = he->data;
1806 
1807    if (variant->state)
1808       ID3D12PipelineState_Release(variant->state);
1809 }
1810 
dzn_graphics_pipeline_delete_cmd_sig(struct hash_entry * he)1811 static void dzn_graphics_pipeline_delete_cmd_sig(struct hash_entry *he)
1812 {
1813    ID3D12CommandSignature_Release((ID3D12CommandSignature *)he->data);
1814 }
1815 
1816 static void
dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline * pipeline)1817 dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
1818 {
1819    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1820       ralloc_free(pipeline->templates.shaders[i].nir);
1821       pipeline->templates.shaders[i].nir = NULL;
1822    }
1823 }
1824 
1825 static void
dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline * pipeline)1826 dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
1827 {
1828    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1829       if (pipeline->templates.shaders[i].bc) {
1830          free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
1831          pipeline->templates.shaders[i].bc = NULL;
1832       }
1833    }
1834 }
1835 
1836 static void
dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)1837 dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
1838                               const VkAllocationCallbacks *alloc)
1839 {
1840    if (!pipeline)
1841       return;
1842 
1843    _mesa_hash_table_destroy(pipeline->variants,
1844                             dzn_graphics_pipeline_delete_variant);
1845 
1846    dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1847    dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1848 
1849    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
1850       if (pipeline->indirect_cmd_sigs[i])
1851          ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
1852    }
1853    _mesa_hash_table_destroy(pipeline->custom_stride_cmd_sigs,
1854                             dzn_graphics_pipeline_delete_cmd_sig);
1855 
1856    dzn_pipeline_finish(&pipeline->base);
1857    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1858 }
1859 
1860 static VkResult
dzn_graphics_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1861 dzn_graphics_pipeline_create(struct dzn_device *device,
1862                              VkPipelineCache cache,
1863                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1864                              const VkAllocationCallbacks *pAllocator,
1865                              VkPipeline *out)
1866 {
1867    struct dzn_physical_device *pdev =
1868       container_of(device->vk.physical, struct dzn_physical_device, vk);
1869    const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
1870       vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
1871    VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
1872    VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
1873    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1874    uint32_t color_count = 0;
1875    VkFormat color_fmts[MAX_RTS] = { 0 };
1876    VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
1877    VkResult ret;
1878    HRESULT hres = 0;
1879    D3D12_VIEW_INSTANCE_LOCATION vi_locs[D3D12_MAX_VIEW_INSTANCE_COUNT];
1880 
1881    struct dzn_graphics_pipeline *pipeline =
1882       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1883                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1884    if (!pipeline)
1885       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1886 
1887    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
1888    stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;
1889 
1890    dzn_pipeline_init(&pipeline->base, device,
1891                      VK_PIPELINE_BIND_POINT_GRAPHICS,
1892                      vk_graphics_pipeline_create_flags(pCreateInfo),
1893                      layout, stream_desc);
1894    D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1895    enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1896 
1897    ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
1898                                             attribs, vi_conversions);
1899    if (ret != VK_SUCCESS)
1900       goto out;
1901 
1902    d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, FLAGS, D3D12_PIPELINE_STATE_FLAGS, flags);
1903    *flags = D3D12_PIPELINE_STATE_FLAG_NONE;
1904 
1905    if (pCreateInfo->pDynamicState) {
1906       for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
1907          switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
1908          case VK_DYNAMIC_STATE_VIEWPORT:
1909             pipeline->vp.dynamic = true;
1910             break;
1911          case VK_DYNAMIC_STATE_SCISSOR:
1912             pipeline->scissor.dynamic = true;
1913             break;
1914          case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1915             pipeline->zsa.stencil_test.dynamic_ref = true;
1916             break;
1917          case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1918             pipeline->zsa.stencil_test.dynamic_compare_mask = true;
1919             ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1920             if (ret)
1921                goto out;
1922             break;
1923          case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1924             pipeline->zsa.stencil_test.dynamic_write_mask = true;
1925             ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1926             if (ret)
1927                goto out;
1928             break;
1929          case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1930             pipeline->blend.dynamic_constants = true;
1931             break;
1932          case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1933             pipeline->zsa.depth_bounds.dynamic = true;
1934             break;
1935          case VK_DYNAMIC_STATE_DEPTH_BIAS:
1936             pipeline->zsa.dynamic_depth_bias = true;
1937             if (pdev->options16.DynamicDepthBiasSupported) {
1938                *flags |= D3D12_PIPELINE_STATE_FLAG_DYNAMIC_DEPTH_BIAS;
1939             } else {
1940                ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1941                if (ret)
1942                   goto out;
1943             }
1944             break;
1945          case VK_DYNAMIC_STATE_LINE_WIDTH:
1946             /* Nothing to do since we just support lineWidth = 1. */
1947             break;
1948          default: unreachable("Unsupported dynamic state");
1949          }
1950       }
1951    }
1952 
1953    ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
1954    if (ret)
1955       goto out;
1956 
1957    dzn_graphics_pipeline_translate_rast(device, pipeline, stream_desc, pCreateInfo);
1958    dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
1959    dzn_graphics_pipeline_translate_zsa(device, pipeline, stream_desc, pCreateInfo);
1960    dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);
1961 
1962    unsigned view_mask = 0;
1963    if (pass) {
1964       const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
1965       color_count = subpass->color_count;
1966       for (uint32_t i = 0; i < subpass->color_count; i++) {
1967          uint32_t idx = subpass->color_attachments[i].attachment;
1968 
1969          if (idx == VK_ATTACHMENT_UNUSED) continue;
1970 
1971          const struct vk_render_pass_attachment *attachment =
1972             &pass->attachments[idx];
1973 
1974          color_fmts[i] = attachment->format;
1975       }
1976 
1977       if (subpass->depth_stencil_attachment &&
1978           subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
1979          const struct vk_render_pass_attachment *attachment =
1980             &pass->attachments[subpass->depth_stencil_attachment->attachment];
1981 
1982          zs_fmt = attachment->format;
1983       }
1984 
1985       view_mask = subpass->view_mask;
1986    } else if (ri) {
1987       color_count = ri->colorAttachmentCount;
1988       memcpy(color_fmts, ri->pColorAttachmentFormats,
1989              sizeof(color_fmts[0]) * color_count);
1990       if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
1991          zs_fmt = ri->depthAttachmentFormat;
1992       else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
1993          zs_fmt = ri->stencilAttachmentFormat;
1994 
1995       view_mask = ri->viewMask;
1996    }
1997 
1998    if (color_count > 0) {
1999       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
2000       rts->NumRenderTargets = color_count;
2001       for (uint32_t i = 0; i < color_count; i++) {
2002          rts->RTFormats[i] =
2003             dzn_image_get_dxgi_format(pdev, color_fmts[i],
2004                                       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
2005                                       VK_IMAGE_ASPECT_COLOR_BIT);
2006       }
2007    }
2008 
2009    if (zs_fmt != VK_FORMAT_UNDEFINED) {
2010       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
2011       *ds_fmt =
2012          dzn_image_get_dxgi_format(pdev, zs_fmt,
2013                                    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2014                                    VK_IMAGE_ASPECT_DEPTH_BIT |
2015                                    VK_IMAGE_ASPECT_STENCIL_BIT);
2016       pipeline->zsa.ds_fmt = *ds_fmt;
2017    }
2018 
2019    pipeline->multiview.view_mask = MAX2(view_mask, 1);
2020    if (view_mask != 0 && /* Is multiview */
2021        view_mask != 1 && /* Is non-trivially multiview */
2022        (view_mask & ~((1 << D3D12_MAX_VIEW_INSTANCE_COUNT) - 1)) == 0 && /* Uses only views 0 thru 3 */
2023        pdev->options3.ViewInstancingTier > D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED /* Actually supported */) {
2024       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, VIEW_INSTANCING, D3D12_VIEW_INSTANCING_DESC, vi);
2025       vi->pViewInstanceLocations = vi_locs;
2026       for (uint32_t i = 0; i < D3D12_MAX_VIEW_INSTANCE_COUNT; ++i) {
2027          vi_locs[i].RenderTargetArrayIndex = i;
2028          vi_locs[i].ViewportArrayIndex = 0;
2029          if (view_mask & (1 << i))
2030             vi->ViewInstanceCount = i + 1;
2031       }
2032       vi->Flags = D3D12_VIEW_INSTANCING_FLAG_ENABLE_VIEW_INSTANCE_MASKING;
2033       pipeline->multiview.native_view_instancing = true;
2034    }
2035 
2036    ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
2037                                                layout, stream_desc,
2038                                                attribs, vi_conversions,
2039                                                pCreateInfo);
2040    if (ret != VK_SUCCESS)
2041       goto out;
2042 
2043    /* If we have no position output from a pre-rasterizer stage, we need to make sure that
2044     * depth is disabled, to fully disable the rasterizer. We can only know this after compiling
2045     * or loading the shaders.
2046     */
2047    if (pipeline->rast_disabled_from_missing_position) {
2048       if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2049          D3D12_DEPTH_STENCIL_DESC2 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2050          if (ds)
2051             ds->DepthEnable = ds->StencilEnable = false;
2052       } else {
2053          D3D12_DEPTH_STENCIL_DESC1 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2054          if (ds)
2055             ds->DepthEnable = ds->StencilEnable = false;
2056       }
2057    }
2058 
2059    if (!pipeline->variants) {
2060       hres = ID3D12Device4_CreatePipelineState(device->dev, stream_desc,
2061                                                &IID_ID3D12PipelineState,
2062                                                (void **)&pipeline->base.state);
2063       if (FAILED(hres)) {
2064          ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2065          goto out;
2066       }
2067 
2068       dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
2069    }
2070 
2071    dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
2072    ret = VK_SUCCESS;
2073 
2074 out:
2075    if (ret != VK_SUCCESS)
2076       dzn_graphics_pipeline_destroy(pipeline, pAllocator);
2077    else
2078       *out = dzn_graphics_pipeline_to_handle(pipeline);
2079 
2080    return ret;
2081 }
2082 
2083 static void
mask_key_for_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key,struct dzn_graphics_pipeline_variant_key * masked_key)2084 mask_key_for_stencil_state(struct dzn_physical_device *pdev,
2085                            struct dzn_graphics_pipeline *pipeline,
2086                            const struct dzn_graphics_pipeline_variant_key *key,
2087                            struct dzn_graphics_pipeline_variant_key *masked_key)
2088 {
2089    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2090       const D3D12_DEPTH_STENCIL_DESC2 *ds_templ =
2091          dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2092       if (ds_templ && ds_templ->StencilEnable) {
2093          if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2094              ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2095             masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2096          if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2097              ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2098             masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2099          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2100             masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2101             masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2102          }
2103       }
2104    } else {
2105       const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
2106          dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2107       if (ds_templ && ds_templ->StencilEnable) {
2108          if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2109              ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2110             masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2111          if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2112              ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2113             masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2114          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2115             masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2116             masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2117          }
2118       }
2119    }
2120 }
2121 
2122 static void
update_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,uintptr_t * stream_buf,const struct dzn_graphics_pipeline_variant_key * masked_key)2123 update_stencil_state(struct dzn_physical_device *pdev,
2124                      struct dzn_graphics_pipeline *pipeline,
2125                      uintptr_t *stream_buf,
2126                      const struct dzn_graphics_pipeline_variant_key *masked_key)
2127 {
2128    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2129       D3D12_DEPTH_STENCIL_DESC2 *ds =
2130          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2131       if (ds && ds->StencilEnable) {
2132          if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2133             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2134                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2135                ds->FrontFace.StencilReadMask = masked_key->stencil_test.front.compare_mask;
2136             }
2137 
2138             if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2139                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2140                ds->BackFace.StencilReadMask = masked_key->stencil_test.back.compare_mask;
2141             }
2142          }
2143 
2144          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2145             ds->FrontFace.StencilWriteMask = masked_key->stencil_test.front.write_mask;
2146             ds->BackFace.StencilWriteMask = masked_key->stencil_test.back.write_mask;
2147          }
2148       }
2149    } else {
2150       D3D12_DEPTH_STENCIL_DESC1 *ds =
2151          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2152       if (ds && ds->StencilEnable) {
2153          if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2154             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2155                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2156                ds->StencilReadMask = masked_key->stencil_test.front.compare_mask;
2157             }
2158 
2159             if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2160                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2161                ds->StencilReadMask = masked_key->stencil_test.back.compare_mask;
2162             }
2163 
2164             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2165                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
2166                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2167                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2168                assert(masked_key->stencil_test.front.compare_mask == masked_key->stencil_test.back.compare_mask);
2169          }
2170 
2171          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2172             assert(!masked_key->stencil_test.front.write_mask ||
2173                      !masked_key->stencil_test.back.write_mask ||
2174                      masked_key->stencil_test.front.write_mask == masked_key->stencil_test.back.write_mask);
2175             ds->StencilWriteMask =
2176                masked_key->stencil_test.front.write_mask |
2177                masked_key->stencil_test.back.write_mask;
2178          }
2179       }
2180    }
2181 }
2182 
2183 ID3D12PipelineState *
dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key)2184 dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
2185                                 const struct dzn_graphics_pipeline_variant_key *key)
2186 {
2187    if (!pipeline->variants)
2188       return pipeline->base.state;
2189 
2190    struct dzn_device *device =
2191       container_of(pipeline->base.base.device, struct dzn_device, vk);
2192    struct dzn_physical_device *pdev =
2193       container_of(device->vk.physical, struct dzn_physical_device, vk);
2194 
2195    struct dzn_graphics_pipeline_variant_key masked_key = { 0 };
2196 
2197    if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
2198       masked_key.ib_strip_cut = key->ib_strip_cut;
2199 
2200    if (!pdev->options16.DynamicDepthBiasSupported &&
2201        dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
2202        pipeline->zsa.dynamic_depth_bias)
2203       masked_key.depth_bias = key->depth_bias;
2204 
2205    mask_key_for_stencil_state(pdev, pipeline, key, &masked_key);
2206 
2207    struct hash_entry *he =
2208       _mesa_hash_table_search(pipeline->variants, &masked_key);
2209 
2210    struct dzn_graphics_pipeline_variant *variant;
2211 
2212    if (!he) {
2213       variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
2214       variant->key = masked_key;
2215 
2216       uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2217       D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2218          .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
2219          .pPipelineStateSubobjectStream = stream_buf,
2220       };
2221 
2222       memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);
2223 
2224       D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
2225          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
2226       if (ib_strip_cut)
2227          *ib_strip_cut = masked_key.ib_strip_cut;
2228 
2229       D3D12_RASTERIZER_DESC *rast =
2230          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
2231       if (!pdev->options16.DynamicDepthBiasSupported && rast && pipeline->zsa.dynamic_depth_bias) {
2232          rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
2233          rast->DepthBiasClamp = masked_key.depth_bias.clamp;
2234          rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
2235       }
2236 
2237       update_stencil_state(pdev, pipeline, stream_buf, &masked_key);
2238 
2239       ASSERTED HRESULT hres = ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2240                                                                 &IID_ID3D12PipelineState,
2241                                                                 (void**)(&variant->state));
2242       assert(!FAILED(hres));
2243       he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
2244       assert(he);
2245    } else {
2246       variant = he->data;
2247    }
2248 
2249    if (variant->state)
2250       ID3D12PipelineState_AddRef(variant->state);
2251 
2252    if (pipeline->base.state)
2253       ID3D12PipelineState_Release(pipeline->base.state);
2254 
2255    pipeline->base.state = variant->state;
2256    return variant->state;
2257 }
2258 
2259 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4
2260 
2261 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline * pipeline,struct dzn_indirect_draw_cmd_sig_key key)2262 dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
2263                                            struct dzn_indirect_draw_cmd_sig_key key)
2264 {
2265    assert(key.value < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
2266 
2267    struct dzn_device *device = container_of(pipeline->base.base.device, struct dzn_device, vk);
2268 
2269    uint32_t cmd_arg_count = 0;
2270    D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
2271    uint32_t stride = 0;
2272 
2273    if (key.triangle_fan) {
2274       assert(key.indexed);
2275       cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2276          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
2277       };
2278       stride += sizeof(D3D12_INDEX_BUFFER_VIEW);
2279    }
2280 
2281    if (key.draw_params) {
2282       cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2283          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2284          .Constant = {
2285             .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2286             .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
2287             .Num32BitValuesToSet = 2,
2288          },
2289       };
2290       stride += sizeof(uint32_t) * 2;
2291    }
2292 
2293    if (key.draw_id) {
2294       struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2295       if (pdev->options21.ExecuteIndirectTier >= D3D12_EXECUTE_INDIRECT_TIER_1_1) {
2296          cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2297             .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INCREMENTING_CONSTANT,
2298             .IncrementingConstant = {
2299                .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2300                .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
2301             },
2302          };
2303       } else {
2304          cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
2305             .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2306             .Constant = {
2307                .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2308                .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
2309                .Num32BitValuesToSet = 1,
2310             },
2311          };
2312          stride += sizeof(uint32_t);
2313       }
2314    }
2315 
2316    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2317       .Type = key.indexed ?
2318               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
2319               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
2320    };
2321    stride += key.indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) :
2322                            sizeof(D3D12_DRAW_ARGUMENTS);
2323 
2324    assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
2325    assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
2326    ID3D12CommandSignature *cmdsig = NULL;
2327 
2328    if (key.custom_stride == 0 || key.custom_stride == stride)
2329       cmdsig = pipeline->indirect_cmd_sigs[key.value];
2330    else {
2331       if (!pipeline->custom_stride_cmd_sigs) {
2332          pipeline->custom_stride_cmd_sigs =
2333             _mesa_hash_table_create(NULL, gfx_pipeline_cmd_signature_key_hash, gfx_pipeline_cmd_signature_key_equal);
2334       }
2335       struct hash_entry *entry = _mesa_hash_table_search(pipeline->custom_stride_cmd_sigs, &key);
2336       if (entry)
2337          cmdsig = entry->data;
2338    }
2339 
2340    if (cmdsig)
2341       return cmdsig;
2342 
2343    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
2344       .ByteStride = key.custom_stride ? key.custom_stride : stride,
2345       .NumArgumentDescs = cmd_arg_count,
2346       .pArgumentDescs = cmd_args,
2347    };
2348    /* A root signature should be specified iff root params are changing */
2349    ID3D12RootSignature *root_sig = key.draw_id || key.draw_params ?
2350       pipeline->base.root.sig : NULL;
2351    HRESULT hres =
2352       ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
2353                                            root_sig,
2354                                            &IID_ID3D12CommandSignature,
2355                                            (void **)&cmdsig);
2356    if (FAILED(hres))
2357       return NULL;
2358 
2359    if (key.custom_stride == 0 || key.custom_stride == stride)
2360       pipeline->indirect_cmd_sigs[key.value] = cmdsig;
2361    else
2362       _mesa_hash_table_insert(pipeline->custom_stride_cmd_sigs, &key, cmdsig);
2363    return cmdsig;
2364 }
2365 
2366 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2367 dzn_CreateGraphicsPipelines(VkDevice dev,
2368                             VkPipelineCache pipelineCache,
2369                             uint32_t count,
2370                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
2371                             const VkAllocationCallbacks *pAllocator,
2372                             VkPipeline *pPipelines)
2373 {
2374    VK_FROM_HANDLE(dzn_device, device, dev);
2375    VkResult result = VK_SUCCESS;
2376 
2377    unsigned i;
2378    for (i = 0; i < count; i++) {
2379       result = dzn_graphics_pipeline_create(device,
2380                                             pipelineCache,
2381                                             &pCreateInfos[i],
2382                                             pAllocator,
2383                                             &pPipelines[i]);
2384       if (result != VK_SUCCESS) {
2385          pPipelines[i] = VK_NULL_HANDLE;
2386 
2387          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2388           * is not obvious what error should be report upon 2 different failures.
2389           */
2390          if (result != VK_PIPELINE_COMPILE_REQUIRED)
2391             break;
2392 
2393          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2394             break;
2395       }
2396    }
2397 
2398    for (; i < count; i++)
2399       pPipelines[i] = VK_NULL_HANDLE;
2400 
2401    return result;
2402 }
2403 
2404 static void
dzn_compute_pipeline_destroy(struct dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)2405 dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
2406                              const VkAllocationCallbacks *alloc)
2407 {
2408    if (!pipeline)
2409       return;
2410 
2411    if (pipeline->indirect_cmd_sig)
2412       ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);
2413 
2414    dzn_pipeline_finish(&pipeline->base);
2415    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
2416 }
2417 
2418 static VkResult
dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * dxil,bool * cache_hit)2419 dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
2420                                            uint8_t *pipeline_hash,
2421                                            D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2422                                            D3D12_SHADER_BYTECODE *dxil,
2423                                            bool *cache_hit)
2424 {
2425    *cache_hit = false;
2426 
2427    if (!cache)
2428       return VK_SUCCESS;
2429 
2430    struct vk_pipeline_cache_object *cache_obj = NULL;
2431 
2432    cache_obj =
2433       vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
2434                                       &dzn_cached_blob_ops,
2435                                       NULL);
2436    if (!cache_obj)
2437       return VK_SUCCESS;
2438 
2439    struct dzn_cached_blob *cached_blob =
2440       container_of(cache_obj, struct dzn_cached_blob, base);
2441 
2442    assert(cached_blob->size == SHA1_DIGEST_LENGTH);
2443 
2444    const uint8_t *dxil_hash = cached_blob->data;
2445    gl_shader_stage stage;
2446 
2447    VkResult ret =
2448       dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);
2449 
2450    if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
2451       goto out;
2452 
2453    assert(stage == MESA_SHADER_COMPUTE);
2454 
2455    d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
2456    *slot = *dxil;
2457    *cache_hit = true;
2458 
2459 out:
2460    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2461    return ret;
2462 }
2463 
2464 static void
dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,uint8_t * dxil_hash)2465 dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
2466                                         uint8_t *pipeline_hash,
2467                                         uint8_t *dxil_hash)
2468 {
2469    struct vk_pipeline_cache_object *cache_obj =
2470       dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
2471    if (!cache_obj)
2472       return;
2473 
2474    struct dzn_cached_blob *cached_blob =
2475       container_of(cache_obj, struct dzn_cached_blob, base);
2476 
2477    memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);
2478 
2479    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
2480    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2481 }
2482 
2483 static VkResult
dzn_compute_pipeline_compile_shader(struct dzn_device * device,struct dzn_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * shader,const VkComputePipelineCreateInfo * info)2484 dzn_compute_pipeline_compile_shader(struct dzn_device *device,
2485                                     struct dzn_compute_pipeline *pipeline,
2486                                     struct vk_pipeline_cache *cache,
2487                                     const struct dzn_pipeline_layout *layout,
2488                                     D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2489                                     D3D12_SHADER_BYTECODE *shader,
2490                                     const VkComputePipelineCreateInfo *info)
2491 {
2492    struct dzn_physical_device *pdev =
2493       container_of(device->vk.physical, struct dzn_physical_device, vk);
2494    uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH], nir_hash[SHA1_DIGEST_LENGTH];
2495    VkResult ret = VK_SUCCESS;
2496    nir_shader *nir = NULL;
2497 
2498    if (cache) {
2499       struct mesa_sha1 pipeline_hash_ctx;
2500 
2501       _mesa_sha1_init(&pipeline_hash_ctx);
2502       vk_pipeline_hash_shader_stage(pipeline->base.flags, &info->stage, NULL, spirv_hash);
2503       _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
2504       _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
2505       _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
2506                         sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
2507       _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
2508 
2509       bool cache_hit = false;
2510       ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
2511                                                        stream_desc, shader,
2512                                                        &cache_hit);
2513       if (ret != VK_SUCCESS || cache_hit)
2514          goto out;
2515    }
2516 
2517    if (cache) {
2518       struct mesa_sha1 nir_hash_ctx;
2519       _mesa_sha1_init(&nir_hash_ctx);
2520       _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
2521       _mesa_sha1_update(&nir_hash_ctx, spirv_hash, sizeof(spirv_hash));
2522       _mesa_sha1_final(&nir_hash_ctx, nir_hash);
2523    }
2524    nir_shader_compiler_options nir_opts;
2525    const unsigned supported_bit_sizes = 16 | 32 | 64;
2526    dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
2527    struct dzn_nir_options options = {
2528       .nir_opts = &nir_opts,
2529    };
2530    struct dxil_spirv_metadata metadata = { 0 };
2531    ret = dzn_pipeline_get_nir_shader(device, layout, cache, nir_hash,
2532                                      pipeline->base.flags, &info->stage,
2533                                      MESA_SHADER_COMPUTE,
2534                                      &options, &metadata, &nir);
2535    if (ret != VK_SUCCESS)
2536       return ret;
2537 
2538    uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];
2539 
2540    NIR_PASS_V(nir, adjust_var_bindings, device, layout, cache ? bindings_hash : NULL);
2541 
2542    if (cache) {
2543       struct mesa_sha1 dxil_hash_ctx;
2544 
2545       _mesa_sha1_init(&dxil_hash_ctx);
2546       _mesa_sha1_update(&dxil_hash_ctx, nir_hash, sizeof(nir_hash));
2547       _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
2548       _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
2549       _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);
2550 
2551       gl_shader_stage stage;
2552 
2553       ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
2554       if (ret != VK_SUCCESS)
2555          goto out;
2556 
2557       if (stage != MESA_SHADER_NONE) {
2558          assert(stage == MESA_SHADER_COMPUTE);
2559          d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2560          *cs = *shader;
2561          dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2562          goto out;
2563       }
2564    }
2565 
2566    ret = dzn_pipeline_compile_shader(device, nir, 0, shader);
2567    if (ret != VK_SUCCESS)
2568       goto out;
2569 
2570    d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2571    *cs = *shader;
2572 
2573    if (cache) {
2574       dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
2575       dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2576    }
2577 
2578 out:
2579    ralloc_free(nir);
2580    return ret;
2581 }
2582 
2583 static VkResult
dzn_compute_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)2584 dzn_compute_pipeline_create(struct dzn_device *device,
2585                             VkPipelineCache cache,
2586                             const VkComputePipelineCreateInfo *pCreateInfo,
2587                             const VkAllocationCallbacks *pAllocator,
2588                             VkPipeline *out)
2589 {
2590    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
2591    VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
2592 
2593    struct dzn_compute_pipeline *pipeline =
2594       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2595                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2596    if (!pipeline)
2597       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2598 
2599    uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2600    D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2601       .pPipelineStateSubobjectStream = state_buf,
2602    };
2603 
2604    dzn_pipeline_init(&pipeline->base, device,
2605                      VK_PIPELINE_BIND_POINT_COMPUTE,
2606                      vk_compute_pipeline_create_flags(pCreateInfo),
2607                      layout, &stream_desc);
2608 
2609    D3D12_SHADER_BYTECODE shader = { 0 };
2610    VkResult ret =
2611       dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
2612                                           &stream_desc, &shader, pCreateInfo);
2613    if (ret != VK_SUCCESS)
2614       goto out;
2615 
2616    if (FAILED(ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2617                                                 &IID_ID3D12PipelineState,
2618                                                 (void **)&pipeline->base.state)))
2619       ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2620 
2621 out:
2622    free((void *)shader.pShaderBytecode);
2623    if (ret != VK_SUCCESS)
2624       dzn_compute_pipeline_destroy(pipeline, pAllocator);
2625    else
2626       *out = dzn_compute_pipeline_to_handle(pipeline);
2627 
2628    return ret;
2629 }
2630 
2631 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline * pipeline)2632 dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
2633 {
2634    if (pipeline->indirect_cmd_sig)
2635       return pipeline->indirect_cmd_sig;
2636 
2637    struct dzn_device *device =
2638       container_of(pipeline->base.base.device, struct dzn_device, vk);
2639 
2640    D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
2641       {
2642          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2643          .Constant = {
2644             .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2645             .DestOffsetIn32BitValues = 0,
2646             .Num32BitValuesToSet = 3,
2647          },
2648       },
2649       {
2650          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
2651       },
2652    };
2653 
2654    D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
2655       .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
2656       .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
2657       .pArgumentDescs = indirect_dispatch_args,
2658    };
2659 
2660    HRESULT hres =
2661       ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
2662                                            pipeline->base.root.sig,
2663                                            &IID_ID3D12CommandSignature,
2664                                            (void **)&pipeline->indirect_cmd_sig);
2665    if (FAILED(hres))
2666       return NULL;
2667 
2668    return pipeline->indirect_cmd_sig;
2669 }
2670 
2671 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2672 dzn_CreateComputePipelines(VkDevice dev,
2673                            VkPipelineCache pipelineCache,
2674                            uint32_t count,
2675                            const VkComputePipelineCreateInfo *pCreateInfos,
2676                            const VkAllocationCallbacks *pAllocator,
2677                            VkPipeline *pPipelines)
2678 {
2679    VK_FROM_HANDLE(dzn_device, device, dev);
2680    VkResult result = VK_SUCCESS;
2681 
2682    unsigned i;
2683    for (i = 0; i < count; i++) {
2684       result = dzn_compute_pipeline_create(device,
2685                                            pipelineCache,
2686                                            &pCreateInfos[i],
2687                                            pAllocator,
2688                                            &pPipelines[i]);
2689       if (result != VK_SUCCESS) {
2690          pPipelines[i] = VK_NULL_HANDLE;
2691 
2692          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2693           * is not obvious what error should be report upon 2 different failures.
2694           */
2695          if (result != VK_PIPELINE_COMPILE_REQUIRED)
2696             break;
2697 
2698          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2699             break;
2700       }
2701    }
2702 
2703    for (; i < count; i++)
2704       pPipelines[i] = VK_NULL_HANDLE;
2705 
2706    return result;
2707 }
2708 
2709 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)2710 dzn_DestroyPipeline(VkDevice device,
2711                     VkPipeline pipeline,
2712                     const VkAllocationCallbacks *pAllocator)
2713 {
2714    VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
2715 
2716    if (!pipe)
2717       return;
2718 
2719    if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2720       struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
2721       dzn_graphics_pipeline_destroy(gfx, pAllocator);
2722    } else {
2723       assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
2724       struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
2725       dzn_compute_pipeline_destroy(compute, pAllocator);
2726    }
2727 }
2728