xref: /aosp_15_r20/external/mesa3d/src/microsoft/vulkan/dzn_cmd_buffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30 
31 #include "dxil_spirv_nir.h"
32 
33 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)34 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
35                                         D3D12_RESOURCE_BARRIER *barriers,
36                                         uint32_t barrier_count)
37 {
38    assert(!cmdbuf->enhanced_barriers);
39    uint32_t flush_count = 0;
40    for (uint32_t b = 0; b < barrier_count; b++) {
41       assert(barriers[b].Transition.pResource);
42 
43       /* some layouts map to the same states, and NOP-barriers are illegal */
44       if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
45          if (flush_count) {
46             ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
47                                                        &barriers[b - flush_count]);
48             flush_count = 0;
49          }
50       } else {
51          flush_count++;
52       }
53    }
54 
55    if (flush_count)
56       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
57                                                  &barriers[barrier_count - flush_count]);
58 
59    /* Set Before = After so we don't execute the same barrier twice. */
60    for (uint32_t b = 0; b < barrier_count; b++)
61       barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
62 }
63 
64 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)65 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
66                                          ID3D12Resource *res,
67                                          uint32_t first_subres,
68                                          uint32_t subres_count)
69 {
70    assert(!cmdbuf->enhanced_barriers);
71    struct hash_entry *he =
72       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
73    D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
74 
75    if (!barriers)
76       return;
77 
78    dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
79 }
80 
81 enum dzn_queue_transition_flags {
82    DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
83    DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
84 };
85 
86 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)87 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
88                                          ID3D12Resource *res,
89                                          uint32_t first_subres,
90                                          uint32_t subres_count,
91                                          D3D12_RESOURCE_STATES before,
92                                          D3D12_RESOURCE_STATES after,
93                                          uint32_t flags)
94 {
95    assert(!cmdbuf->enhanced_barriers);
96    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
97    struct hash_entry *he =
98       _mesa_hash_table_search(cmdbuf->transition_barriers, res);
99    struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
100 
101    if (!barriers) {
102       D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
103       D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
104       ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
105       uint32_t barrier_count =
106          fmt_info.PlaneCount *
107          desc.MipLevels * desc.DepthOrArraySize;
108 
109       barriers =
110          vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
111                    8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
112       if (!barriers)
113          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
114 
115       he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
116       if (!he)
117          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
118    }
119 
120    for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
121       if (!barriers[subres].Transition.pResource) {
122          barriers[subres] = (D3D12_RESOURCE_BARRIER) {
123             .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
124             .Flags = 0,
125             .Transition = {
126                .pResource = res,
127                .Subresource = subres,
128                .StateBefore = before,
129                .StateAfter = after,
130             },
131          };
132       } else {
133 	 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
134             before = barriers[subres].Transition.StateAfter;
135 
136          assert(barriers[subres].Transition.StateAfter == before ||
137                 barriers[subres].Transition.StateAfter == after);
138          barriers[subres].Transition.StateAfter = after;
139       }
140    }
141 
142    if (flags & DZN_QUEUE_TRANSITION_FLUSH)
143       dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
144 
145    return VK_SUCCESS;
146 }
147 
148 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)149 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
150                                                   const struct dzn_image *image,
151                                                   const VkImageSubresourceRange *range,
152                                                   D3D12_RESOURCE_STATES before,
153                                                   D3D12_RESOURCE_STATES after,
154                                                   uint32_t flags)
155 {
156    assert(!cmdbuf->enhanced_barriers);
157    uint32_t first_barrier = 0, barrier_count = 0;
158    VkResult ret = VK_SUCCESS;
159 
160    dzn_foreach_aspect(aspect, range->aspectMask) {
161       uint32_t layer_count = dzn_get_layer_count(image, range);
162       uint32_t level_count = dzn_get_level_count(image, range);
163       for (uint32_t layer = 0; layer < layer_count; layer++) {
164          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165          if (!barrier_count) {
166             first_barrier = subres;
167             barrier_count = level_count;
168             continue;
169          } else if (first_barrier + barrier_count == subres) {
170             barrier_count += level_count;
171             continue;
172          }
173 
174          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175                                                         first_barrier, barrier_count,
176                                                         before, after, flags);
177          if (ret != VK_SUCCESS)
178             return ret;
179 
180          barrier_count = 0;
181       }
182 
183       if (barrier_count) {
184          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185                                                         first_barrier, barrier_count,
186                                                         before, after, flags);
187          if (ret != VK_SUCCESS)
188             return ret;
189       }
190    }
191 
192    return VK_SUCCESS;
193 }
194 
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197                                                    const struct dzn_image *image,
198                                                    const VkImageSubresourceRange *range,
199                                                    VkImageLayout old_layout,
200                                                    VkImageLayout new_layout,
201                                                    uint32_t flags)
202 {
203    assert(!cmdbuf->enhanced_barriers);
204    uint32_t first_barrier = 0, barrier_count = 0;
205    VkResult ret = VK_SUCCESS;
206 
207    if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
208       flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
209 
210    dzn_foreach_aspect(aspect, range->aspectMask) {
211       D3D12_RESOURCE_STATES after =
212          dzn_image_layout_to_state(image, new_layout, aspect, cmdbuf->type);
213       D3D12_RESOURCE_STATES before =
214          (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
215           old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
216          D3D12_RESOURCE_STATE_COMMON :
217          dzn_image_layout_to_state(image, old_layout, aspect, cmdbuf->type);
218 
219       uint32_t layer_count = dzn_get_layer_count(image, range);
220       uint32_t level_count = dzn_get_level_count(image, range);
221       for (uint32_t layer = 0; layer < layer_count; layer++) {
222          uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
223          if (!barrier_count) {
224             first_barrier = subres;
225             barrier_count = level_count;
226             continue;
227          } else if (first_barrier + barrier_count == subres) {
228             barrier_count += level_count;
229             continue;
230          }
231 
232          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
233                                                         first_barrier, barrier_count,
234                                                         before, after, flags);
235          if (ret != VK_SUCCESS)
236             return ret;
237 
238          barrier_count = 0;
239       }
240 
241       if (barrier_count) {
242          ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
243                                                         first_barrier, barrier_count,
244                                                         before, after, flags);
245          if (ret != VK_SUCCESS)
246             return ret;
247       }
248    }
249 
250    return VK_SUCCESS;
251 }
252 
253 static void
dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer * cmdbuf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)254 dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer *cmdbuf,
255                               D3D12_BARRIER_SYNC sync_before,
256                               D3D12_BARRIER_SYNC sync_after,
257                               D3D12_BARRIER_ACCESS access_before,
258                               D3D12_BARRIER_ACCESS access_after)
259 {
260    assert(cmdbuf->enhanced_barriers);
261    D3D12_GLOBAL_BARRIER global = {
262       .SyncBefore = sync_before,
263       .SyncAfter = sync_after,
264       .AccessBefore = access_before,
265       .AccessAfter = access_after,
266    };
267    D3D12_BARRIER_GROUP group = {
268       .Type = D3D12_BARRIER_TYPE_GLOBAL,
269       .NumBarriers = 1,
270       .pGlobalBarriers = &global,
271    };
272    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
273 }
274 
275 static void
dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * buf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)276 dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer *cmdbuf,
277                               ID3D12Resource *buf,
278                               D3D12_BARRIER_SYNC sync_before,
279                               D3D12_BARRIER_SYNC sync_after,
280                               D3D12_BARRIER_ACCESS access_before,
281                               D3D12_BARRIER_ACCESS access_after)
282 {
283    assert(cmdbuf->enhanced_barriers);
284    D3D12_BUFFER_BARRIER buffer = {
285       .SyncBefore = sync_before,
286       .SyncAfter = sync_after,
287       .AccessBefore = access_before,
288       .AccessAfter = access_after,
289       .pResource = buf,
290       .Offset = 0,
291       .Size = UINT64_MAX,
292    };
293    D3D12_BARRIER_GROUP group = {
294       .Type = D3D12_BARRIER_TYPE_BUFFER,
295       .NumBarriers = 1,
296       .pBufferBarriers = &buffer,
297    };
298    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
299 }
300 
301 static void
dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after,D3D12_BARRIER_LAYOUT layout_before,D3D12_BARRIER_LAYOUT layout_after,const VkImageSubresourceRange * range)302 dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer *cmdbuf,
303                              const struct dzn_image *image,
304                              D3D12_BARRIER_SYNC sync_before,
305                              D3D12_BARRIER_SYNC sync_after,
306                              D3D12_BARRIER_ACCESS access_before,
307                              D3D12_BARRIER_ACCESS access_after,
308                              D3D12_BARRIER_LAYOUT layout_before,
309                              D3D12_BARRIER_LAYOUT layout_after,
310                              const VkImageSubresourceRange *range)
311 {
312    assert(cmdbuf->enhanced_barriers);
313    uint32_t first_plane = (range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
314    uint32_t plane_count = first_plane == 0 && (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? 2 : 1;
315    D3D12_TEXTURE_BARRIER texture = {
316       .SyncBefore = sync_before,
317       .SyncAfter = sync_after,
318       .AccessBefore = access_before,
319       .AccessAfter = access_after,
320       .LayoutBefore = layout_before,
321       .LayoutAfter = layout_after,
322       .Subresources.FirstArraySlice = range->baseArrayLayer,
323       .Subresources.NumArraySlices = dzn_get_layer_count(image, range),
324       .Subresources.IndexOrFirstMipLevel = range->baseMipLevel,
325       .Subresources.NumMipLevels = dzn_get_level_count(image, range),
326       .Subresources.FirstPlane = first_plane,
327       .Subresources.NumPlanes = plane_count,
328       .pResource = image->res,
329    };
330    D3D12_BARRIER_GROUP group = {
331       .Type = D3D12_BARRIER_TYPE_TEXTURE,
332       .NumBarriers = 1,
333       .pTextureBarriers = &texture,
334    };
335    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
336 }
337 
338 static D3D12_BARRIER_LAYOUT
dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout current_layout,D3D12_BARRIER_LAYOUT needed_layout,const VkImageSubresourceRange * range)339 dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer *cmdbuf,
340                               const struct dzn_image *image,
341                               VkImageLayout current_layout,
342                               D3D12_BARRIER_LAYOUT needed_layout,
343                               const VkImageSubresourceRange *range)
344 {
345    assert(cmdbuf->enhanced_barriers);
346    /* We shouldn't need these fixups on a subresource range which includes depth and stencil,
347       where one is read-only and the other is writable */
348    if (range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
349       assert(current_layout != VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL &&
350              current_layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL);
351    }
352 
353    /* Nothing needs to be done for these, the appropriate sync/access was already handled */
354    if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
355       return needed_layout;
356 
357    D3D12_BARRIER_LAYOUT current_d3d_layout = dzn_vk_layout_to_d3d_layout(current_layout, cmdbuf->type, range->aspectMask);
358    if (current_d3d_layout != needed_layout) {
359       dzn_cmd_buffer_image_barrier(cmdbuf, image,
360                                    D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
361                                    D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON,
362                                    current_d3d_layout, needed_layout, range);
363    }
364    return current_d3d_layout;
365 }
366 
367 static void
dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync,D3D12_BARRIER_ACCESS access,D3D12_BARRIER_LAYOUT needed_layout,D3D12_BARRIER_LAYOUT restore_layout,const VkImageSubresourceRange * range)368 dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer *cmdbuf,
369                               const struct dzn_image *image,
370                               D3D12_BARRIER_SYNC sync,
371                               D3D12_BARRIER_ACCESS access,
372                               D3D12_BARRIER_LAYOUT needed_layout,
373                               D3D12_BARRIER_LAYOUT restore_layout,
374                               const VkImageSubresourceRange *range)
375 {
376    if (needed_layout != restore_layout) {
377       dzn_cmd_buffer_image_barrier(cmdbuf, image,
378                                    sync, D3D12_BARRIER_SYNC_COPY,
379                                    access, D3D12_BARRIER_ACCESS_COMMON,
380                                    needed_layout, restore_layout, range);
381    }
382 }
383 
384 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)385 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
386 {
387    if (!cbuf)
388       return;
389 
390    struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
391 
392    if (cmdbuf->cmdlist)
393       ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
394 
395    if (cmdbuf->cmdlist8)
396       ID3D12GraphicsCommandList8_Release(cmdbuf->cmdlist8);
397 
398    if (cmdbuf->cmdlist9)
399       ID3D12GraphicsCommandList9_Release(cmdbuf->cmdlist9);
400 
401    if (cmdbuf->cmdalloc)
402       ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
403 
404    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
405       list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
406          list_del(&res->link);
407          ID3D12Resource_Release(res->res);
408          vk_free(&cbuf->pool->alloc, res);
409       }
410    }
411 
412    dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
413    dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
414    dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
415    dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
416    util_dynarray_fini(&cmdbuf->events.signal);
417    util_dynarray_fini(&cmdbuf->queries.reset);
418    util_dynarray_fini(&cmdbuf->queries.signal);
419 
420    if (cmdbuf->rtvs.ht) {
421       hash_table_foreach(cmdbuf->rtvs.ht, he)
422          vk_free(&cbuf->pool->alloc, he->data);
423       _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
424    }
425 
426    if (cmdbuf->dsvs.ht) {
427       hash_table_foreach(cmdbuf->dsvs.ht, he)
428          vk_free(&cbuf->pool->alloc, he->data);
429       _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
430    }
431 
432    if (cmdbuf->events.ht)
433       _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
434 
435    if (cmdbuf->queries.ht) {
436       hash_table_foreach(cmdbuf->queries.ht, he) {
437          struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
438          util_dynarray_fini(&qpstate->reset);
439          util_dynarray_fini(&qpstate->collect);
440          util_dynarray_fini(&qpstate->signal);
441          util_dynarray_fini(&qpstate->zero);
442          vk_free(&cbuf->pool->alloc, he->data);
443       }
444       _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
445    }
446 
447    if (cmdbuf->transition_barriers) {
448       hash_table_foreach(cmdbuf->transition_barriers, he)
449          vk_free(&cbuf->pool->alloc, he->data);
450       _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
451    }
452 
453    vk_command_buffer_finish(&cmdbuf->vk);
454    vk_free(&cbuf->pool->alloc, cmdbuf);
455 }
456 
457 static void
dzn_cmd_buffer_reset(struct vk_command_buffer * cbuf,VkCommandBufferResetFlags flags)458 dzn_cmd_buffer_reset(struct vk_command_buffer *cbuf, VkCommandBufferResetFlags flags)
459 {
460    struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
461 
462    /* Reset the state */
463    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
464    cmdbuf->state.multiview.num_views = 1;
465    cmdbuf->state.multiview.view_mask = 1;
466 
467    /* TODO: Return resources to the pool */
468    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
469       list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
470          list_del(&res->link);
471          ID3D12Resource_Release(res->res);
472          vk_free(&cmdbuf->vk.pool->alloc, res);
473       }
474    }
475    cmdbuf->cur_upload_buf = NULL;
476 
477    util_dynarray_clear(&cmdbuf->events.signal);
478    util_dynarray_clear(&cmdbuf->queries.reset);
479    util_dynarray_clear(&cmdbuf->queries.signal);
480    hash_table_foreach(cmdbuf->rtvs.ht, he)
481       vk_free(&cmdbuf->vk.pool->alloc, he->data);
482    _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
483    cmdbuf->null_rtv.ptr = 0;
484    dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
485    hash_table_foreach(cmdbuf->dsvs.ht, he)
486       vk_free(&cmdbuf->vk.pool->alloc, he->data);
487    _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
488    hash_table_foreach(cmdbuf->queries.ht, he) {
489       struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
490       util_dynarray_fini(&qpstate->reset);
491       util_dynarray_fini(&qpstate->collect);
492       util_dynarray_fini(&qpstate->signal);
493       util_dynarray_fini(&qpstate->zero);
494       vk_free(&cmdbuf->vk.pool->alloc, he->data);
495    }
496    _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
497    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
498    hash_table_foreach(cmdbuf->transition_barriers, he)
499       vk_free(&cmdbuf->vk.pool->alloc, he->data);
500    _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
501    dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
502    dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
503    dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
504 
505    if (cmdbuf->vk.state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING &&
506        cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
507       ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
508 
509    vk_command_buffer_reset(&cmdbuf->vk);
510 
511    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
512       ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
513 }
514 
515 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)516 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
517 {
518    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
519 }
520 
521 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)522 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
523 {
524    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
525 }
526 
527 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)528 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
529 {
530    return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
531 }
532 
533 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)534 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
535 {
536    return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
537 }
538 
539 static const struct vk_command_buffer_ops cmd_buffer_ops = {
540    .destroy = dzn_cmd_buffer_destroy,
541    .reset = dzn_cmd_buffer_reset,
542 };
543 
544 static const D3D12_BARRIER_SYNC cmd_buffer_valid_sync[] = {
545    [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_SYNC_VIDEO_DECODE |
546                                         D3D12_BARRIER_SYNC_VIDEO_PROCESS |
547                                         D3D12_BARRIER_SYNC_VIDEO_ENCODE),
548    [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_SYNC_ALL |
549                                         D3D12_BARRIER_SYNC_COMPUTE_SHADING |
550                                         D3D12_BARRIER_SYNC_RAYTRACING |
551                                         D3D12_BARRIER_SYNC_COPY |
552                                         D3D12_BARRIER_SYNC_EXECUTE_INDIRECT |
553                                         D3D12_BARRIER_SYNC_PREDICATION |
554                                         D3D12_BARRIER_SYNC_ALL_SHADING |
555                                         D3D12_BARRIER_SYNC_NON_PIXEL_SHADING |
556                                         D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
557                                         D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW |
558                                         D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
559                                         D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE),
560    [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_SYNC_ALL | D3D12_BARRIER_SYNC_COPY
561 };
562 static const D3D12_BARRIER_ACCESS cmd_buffer_valid_access[] = {
563    [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ |
564                                         D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE |
565                                         D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ |
566                                         D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE |
567                                         D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ |
568                                         D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE),
569    [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
570                                         D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
571                                         D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
572                                         D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
573                                         D3D12_BARRIER_ACCESS_PREDICATION |
574                                         D3D12_BARRIER_ACCESS_COPY_DEST |
575                                         D3D12_BARRIER_ACCESS_COPY_SOURCE |
576                                         D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ |
577                                         D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE),
578    [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_COPY_DEST,
579 };
580 
581 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)582 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
583                       VkCommandBuffer *out)
584 {
585    VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
586    struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
587    struct dzn_physical_device *pdev =
588       container_of(device->vk.physical, struct dzn_physical_device, vk);
589 
590    assert(pool->queue_family_index < pdev->queue_family_count);
591 
592    D3D12_COMMAND_LIST_TYPE type =
593       pdev->queue_families[pool->queue_family_index].desc.Type;
594 
595    struct dzn_cmd_buffer *cmdbuf =
596       vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
597    if (!cmdbuf)
598       return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
599 
600    VkResult result =
601       vk_command_buffer_init(pool, &cmdbuf->vk, &cmd_buffer_ops, info->level);
602    if (result != VK_SUCCESS) {
603       vk_free(&pool->alloc, cmdbuf);
604       return result;
605    }
606 
607    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
608    cmdbuf->state.multiview.num_views = 1;
609    cmdbuf->state.multiview.view_mask = 1;
610    for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket)
611       list_inithead(&cmdbuf->internal_bufs[bucket]);
612    util_dynarray_init(&cmdbuf->events.signal, NULL);
613    util_dynarray_init(&cmdbuf->queries.reset, NULL);
614    util_dynarray_init(&cmdbuf->queries.signal, NULL);
615    dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
616                                  D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
617                                  false, &pool->alloc);
618    dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
619                                  D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
620                                  false, &pool->alloc);
621    dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
622                                  D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
623                                  true, &pool->alloc);
624    dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
625                                  D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
626                                  true, &pool->alloc);
627 
628    cmdbuf->events.ht =
629       _mesa_pointer_hash_table_create(NULL);
630    cmdbuf->queries.ht =
631       _mesa_pointer_hash_table_create(NULL);
632    cmdbuf->transition_barriers =
633       _mesa_pointer_hash_table_create(NULL);
634    cmdbuf->rtvs.ht =
635       _mesa_hash_table_create(NULL,
636                               dzn_cmd_buffer_rtv_key_hash_function,
637                               dzn_cmd_buffer_rtv_key_equals_function);
638    cmdbuf->dsvs.ht =
639       _mesa_hash_table_create(NULL,
640                               dzn_cmd_buffer_dsv_key_hash_function,
641                               dzn_cmd_buffer_dsv_key_equals_function);
642    if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
643        !cmdbuf->transition_barriers ||
644        !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
645       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
646       goto out;
647    }
648 
649    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
650       if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
651                                                       &IID_ID3D12CommandAllocator,
652                                                       (void **)&cmdbuf->cmdalloc))) {
653          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
654          goto out;
655       }
656 
657       if (FAILED(ID3D12Device4_CreateCommandList1(device->dev, 0, type,
658                                                   D3D12_COMMAND_LIST_FLAG_NONE,
659                                                   &IID_ID3D12GraphicsCommandList1,
660                                                   (void **)&cmdbuf->cmdlist))) {
661          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
662          goto out;
663       }
664 
665       (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList8, (void **)&cmdbuf->cmdlist8);
666       (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList9, (void **)&cmdbuf->cmdlist9);
667    }
668 
669    cmdbuf->type = type;
670    cmdbuf->valid_sync = cmd_buffer_valid_sync[type];
671    cmdbuf->valid_access = cmd_buffer_valid_access[type];
672    cmdbuf->enhanced_barriers = pdev->options12.EnhancedBarriersSupported;
673 
674 out:
675    if (result != VK_SUCCESS)
676       dzn_cmd_buffer_destroy(&cmdbuf->vk);
677    else
678       *out = dzn_cmd_buffer_to_handle(cmdbuf);
679 
680    return result;
681 }
682 
683 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)684 dzn_AllocateCommandBuffers(VkDevice device,
685                            const VkCommandBufferAllocateInfo *pAllocateInfo,
686                            VkCommandBuffer *pCommandBuffers)
687 {
688    VK_FROM_HANDLE(dzn_device, dev, device);
689    VkResult result = VK_SUCCESS;
690    uint32_t i;
691 
692    for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
693       result = dzn_cmd_buffer_create(pAllocateInfo,
694                                      &pCommandBuffers[i]);
695       if (result != VK_SUCCESS)
696          break;
697    }
698 
699    if (result != VK_SUCCESS) {
700       dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
701                                                 i, pCommandBuffers);
702       for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
703          pCommandBuffers[i] = VK_NULL_HANDLE;
704    }
705 
706    return result;
707 }
708 
709 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)710 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
711                        const VkCommandBufferBeginInfo *info)
712 {
713    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
714    vk_command_buffer_begin(&cmdbuf->vk, info);
715    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
716       ID3D12GraphicsCommandList1_Reset(cmdbuf->cmdlist, cmdbuf->cmdalloc, NULL);
717    return vk_command_buffer_get_record_result(&cmdbuf->vk);
718 }
719 
720 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)721 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
722 {
723    if (vk_command_buffer_has_error(&cmdbuf->vk))
724       goto out;
725 
726    hash_table_foreach(cmdbuf->events.ht, he) {
727       enum dzn_event_state state = (uintptr_t)he->data;
728 
729       struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
730       struct dzn_cmd_event_signal *entry =
731          util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
732 
733       if (!entry) {
734          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
735          break;
736       }
737 
738       *entry = signal;
739    }
740 
741 out:
742    _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
743 }
744 
745 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)746 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
747 {
748 
749    if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
750       return VK_SUCCESS;
751 
752    unsigned old_sz = array->size;
753    void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
754    if (!ptr)
755       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
756 
757    memset(ptr, 0, array->size - old_sz);
758    return VK_SUCCESS;
759 }
760 
761 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)762 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
763 {
764    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
765 
766    if (bit < nbits)
767       return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
768 
769    return false;
770 }
771 
772 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)773 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
774 {
775    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
776    if (result != VK_SUCCESS)
777       return result;
778 
779    BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
780    return VK_SUCCESS;
781 }
782 
783 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)784 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
785 {
786    if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
787       return;
788 
789    BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
790 }
791 
792 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)793 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
794                                    struct util_dynarray *array,
795                                    uint32_t bit, uint32_t count)
796 {
797    VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
798    if (result != VK_SUCCESS)
799       return result;
800 
801    BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
802    return VK_SUCCESS;
803 }
804 
805 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)806 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
807                                      struct util_dynarray *array,
808                                      uint32_t bit, uint32_t count)
809 {
810    uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
811 
812    if (!nbits)
813       return;
814 
815    uint32_t end = MIN2(bit + count, nbits) - 1;
816 
817    while (bit <= end) {
818       uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
819       BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
820       bit += subcount;
821    }
822 }
823 
824 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)825 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
826 {
827    struct dzn_cmd_buffer_query_pool_state *state =
828       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
829                8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
830    if (!state) {
831       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
832       return NULL;
833    }
834 
835    util_dynarray_init(&state->reset, NULL);
836    util_dynarray_init(&state->collect, NULL);
837    util_dynarray_init(&state->signal, NULL);
838    util_dynarray_init(&state->zero, NULL);
839    return state;
840 }
841 
842 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)843 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
844                                         struct dzn_cmd_buffer_query_pool_state *state)
845 {
846    util_dynarray_fini(&state->reset);
847    util_dynarray_fini(&state->collect);
848    util_dynarray_fini(&state->signal);
849    util_dynarray_fini(&state->zero);
850    vk_free(&cmdbuf->vk.pool->alloc, state);
851 }
852 
853 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)854 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
855                                     struct dzn_query_pool *qpool)
856 {
857    struct dzn_cmd_buffer_query_pool_state *state = NULL;
858    struct hash_entry *he =
859       _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
860 
861    if (!he) {
862       state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
863       if (!state)
864          return NULL;
865 
866       he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
867       if (!he) {
868          dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
869          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
870          return NULL;
871       }
872    } else {
873       state = he->data;
874    }
875 
876    return state;
877 }
878 
879 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)880 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
881                                const struct dzn_query_pool *qpool,
882                                struct dzn_cmd_buffer_query_pool_state *state,
883                                uint32_t first_query,
884                                uint32_t query_count)
885 {
886    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
887    uint32_t nbits_collect = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
888    uint32_t nbits_zero = util_dynarray_num_elements(&state->zero, BITSET_WORD) * BITSET_WORDBITS;
889    uint32_t start, end;
890 
891    if (!nbits_collect && !nbits_zero)
892       return VK_SUCCESS;
893 
894    query_count = MIN2(query_count, MAX2(nbits_collect, nbits_zero) - first_query);
895    nbits_collect = MIN2(first_query + query_count, nbits_collect);
896    nbits_zero = MIN2(first_query + query_count, nbits_zero);
897 
898    VkResult result =
899       dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
900    if (result != VK_SUCCESS)
901       return result;
902 
903    if (cmdbuf->enhanced_barriers) {
904       /* A global barrier is used because both resolve_buffer and collect_buffer might have been
905        * copied from recently, and it's not worth the effort to track whether that's true. */
906       dzn_cmd_buffer_global_barrier(cmdbuf,
907                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
908                                     D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
909    } else {
910       dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
911    }
912 
913    /* Resolve the valid query regions into the resolve buffer */
914    BITSET_WORD *collect =
915       util_dynarray_element(&state->collect, BITSET_WORD, 0);
916 
917    for (start = first_query, end = first_query,
918         __bitset_next_range(&start, &end, collect, nbits_collect);
919         start < nbits_collect;
920         __bitset_next_range(&start, &end, collect, nbits_collect)) {
921       ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
922                                                   qpool->heap,
923                                                   qpool->queries[start].type,
924                                                   start, end - start,
925                                                   qpool->resolve_buffer,
926                                                   qpool->query_size * start);
927    }
928 
929    /* Zero out sections of the resolve buffer that contain queries for multi-view rendering
930     * for views other than the first one. */
931    BITSET_WORD *zero =
932       util_dynarray_element(&state->zero, BITSET_WORD, 0);
933    const uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
934 
935    for (start = first_query, end = first_query,
936         __bitset_next_range(&start, &end, zero, nbits_zero);
937         start < nbits_zero;
938         __bitset_next_range(&start, &end, zero, nbits_zero)) {
939       uint32_t count = end - start;
940 
941       for (unsigned i = 0; i < count; i += step) {
942          uint32_t sub_count = MIN2(step, count - i);
943 
944          ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
945                                                      qpool->resolve_buffer,
946                                                      dzn_query_pool_get_result_offset(qpool, start + i),
947                                                      device->queries.refs,
948                                                      DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
949                                                      qpool->query_size * sub_count);
950       }
951    }
952 
953    uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
954    uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
955 
956    if (cmdbuf->enhanced_barriers) {
957       dzn_cmd_buffer_buffer_barrier(cmdbuf,
958                                     qpool->resolve_buffer,
959                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
960                                     D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
961    } else {
962       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
963                                                0, 1,
964                                                D3D12_RESOURCE_STATE_COPY_DEST,
965                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
966                                                DZN_QUEUE_TRANSITION_FLUSH);
967    }
968 
969    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
970                                                qpool->collect_buffer, offset,
971                                                qpool->resolve_buffer, offset,
972                                                size);
973 
974    struct query_pass_data {
975       struct util_dynarray *dynarray;
976       BITSET_WORD *bitset;
977       uint32_t count;
978    } passes[] = {
979       { &state->collect, collect, nbits_collect },
980       { &state->zero, zero, nbits_zero }
981    };
982    for (uint32_t pass = 0; pass < ARRAY_SIZE(passes); ++pass) {
983       BITSET_WORD *bitset = passes[pass].bitset;
984       uint32_t nbits = passes[pass].count;
985       for (start = first_query, end = first_query,
986            __bitset_next_range(&start, &end, bitset, nbits);
987            start < nbits;
988            __bitset_next_range(&start, &end, bitset, nbits)) {
989          uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
990          uint32_t count = end - start;
991 
992          for (unsigned i = 0; i < count; i += step) {
993             uint32_t sub_count = MIN2(step, count - i);
994 
995             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
996                                                         qpool->collect_buffer,
997                                                         dzn_query_pool_get_availability_offset(qpool, start + i),
998                                                         device->queries.refs,
999                                                         DZN_QUERY_REFS_ALL_ONES_OFFSET,
1000                                                         sizeof(uint64_t) * sub_count);
1001          }
1002 
1003          dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
1004          dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, passes[pass].dynarray, start, count);
1005       }
1006    }
1007 
1008    if (!cmdbuf->enhanced_barriers) {
1009       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
1010                                                0, 1,
1011                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
1012                                                D3D12_RESOURCE_STATE_COPY_DEST,
1013                                                0);
1014    }
1015    return VK_SUCCESS;
1016 }
1017 
1018 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)1019 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
1020                                  struct dzn_query_pool *qpool,
1021                                  struct util_dynarray *bitset_array,
1022                                  struct util_dynarray *ops_array)
1023 {
1024    BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
1025    uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
1026    uint32_t start, end;
1027 
1028    BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
1029       struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
1030       struct dzn_cmd_buffer_query_range *entry =
1031          util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
1032 
1033       if (!entry)
1034          return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1035 
1036       *entry = range;
1037    }
1038 
1039    return VK_SUCCESS;
1040 }
1041 
1042 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)1043 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
1044 {
1045    hash_table_foreach(cmdbuf->queries.ht, he) {
1046       struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
1047       struct dzn_cmd_buffer_query_pool_state *state = he->data;
1048       VkResult result =
1049          dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
1050       if (result != VK_SUCCESS)
1051          return result;
1052 
1053       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
1054       if (result != VK_SUCCESS)
1055          return result;
1056 
1057       result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
1058       if (result != VK_SUCCESS)
1059          return result;
1060    }
1061 
1062    return VK_SUCCESS;
1063 }
1064 
1065 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)1066 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
1067 {
1068    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1069 
1070    if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1071       dzn_cmd_buffer_gather_events(cmdbuf);
1072       dzn_cmd_buffer_gather_queries(cmdbuf);
1073       HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
1074       if (FAILED(hres))
1075          vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1076    }
1077 
1078    return vk_command_buffer_end(&cmdbuf->vk);
1079 }
1080 
1081 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1082 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
1083                         const VkDependencyInfo *info)
1084 {
1085    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1086 
1087    bool execution_barrier =
1088       !info->memoryBarrierCount &&
1089       !info->bufferMemoryBarrierCount &&
1090       !info->imageMemoryBarrierCount;
1091 
1092    if (execution_barrier) {
1093       /* Execution barrier can be emulated with a NULL UAV barrier (AKA
1094        * pipeline flush). That's the best we can do with the standard D3D12
1095        * barrier API.
1096        */
1097       D3D12_RESOURCE_BARRIER barrier = {
1098          .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
1099          .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
1100          .UAV = { .pResource = NULL },
1101       };
1102 
1103       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1104    }
1105 
1106    /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
1107     * Scopes are not taken into account, but that's inherent to the current
1108     * D3D12 barrier API.
1109     */
1110    if (info->memoryBarrierCount) {
1111       D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
1112 
1113       barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1114       barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1115       barriers[0].UAV.pResource = NULL;
1116       barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
1117       barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1118       barriers[1].Aliasing.pResourceBefore = NULL;
1119       barriers[1].Aliasing.pResourceAfter = NULL;
1120       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
1121    }
1122 
1123    for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
1124       VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1125       D3D12_RESOURCE_BARRIER barrier = { 0 };
1126 
1127       /* UAV are used only for storage buffers, skip all other buffers. */
1128       if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
1129          continue;
1130 
1131       barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1132       barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1133       barrier.UAV.pResource = buf->res;
1134       ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1135    }
1136 
1137    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
1138       const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
1139       const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
1140       VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
1141 
1142       VkImageLayout old_layout = ibarrier->oldLayout;
1143       VkImageLayout new_layout = ibarrier->newLayout;
1144       if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1145           old_layout == VK_IMAGE_LAYOUT_GENERAL &&
1146           (ibarrier->srcAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1147          old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1148       if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1149           new_layout == VK_IMAGE_LAYOUT_GENERAL &&
1150           (ibarrier->dstAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1151          new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1152       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1153                                                          old_layout,
1154                                                          new_layout,
1155                                                          DZN_QUEUE_TRANSITION_FLUSH);
1156    }
1157 }
1158 
1159 /* A straightforward translation of the Vulkan sync flags to D3D sync flags */
1160 static D3D12_BARRIER_SYNC
translate_sync(VkPipelineStageFlags2 flags,bool before)1161 translate_sync(VkPipelineStageFlags2 flags, bool before)
1162 {
1163    if (!before && (flags & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT))
1164       return D3D12_BARRIER_SYNC_ALL;
1165    else if (before && (flags & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
1166       return D3D12_BARRIER_SYNC_ALL;
1167 
1168    if (flags & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
1169                 /* Theoretically transfer should be less, but it encompasses blit
1170                  * (which can be draws) and clears, so bloat it up to everything. */
1171                 VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
1172                 VK_PIPELINE_STAGE_2_BLIT_BIT))
1173       return D3D12_BARRIER_SYNC_ALL;
1174 
1175    D3D12_BARRIER_SYNC ret = D3D12_BARRIER_SYNC_NONE;
1176    if (flags & (VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
1177                 VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
1178                 VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT))
1179       ret |= D3D12_BARRIER_SYNC_INDEX_INPUT;
1180    if (flags & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT)
1181       ret |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
1182    if (flags & (VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
1183                 VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
1184                 VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
1185                 VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
1186                 VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
1187                 VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |
1188                 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT))
1189       ret |= D3D12_BARRIER_SYNC_NON_PIXEL_SHADING;
1190    if (flags & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
1191                 VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR))
1192       ret |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
1193    if (flags & (VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
1194                 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT))
1195       ret |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
1196    if (flags & VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT)
1197       ret |= D3D12_BARRIER_SYNC_RENDER_TARGET;
1198    if (flags & VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)
1199       ret |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
1200    if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
1201       ret |= D3D12_BARRIER_SYNC_DRAW;
1202    if (flags & VK_PIPELINE_STAGE_2_COPY_BIT)
1203       ret |= D3D12_BARRIER_SYNC_COPY;
1204    if (flags & VK_PIPELINE_STAGE_2_RESOLVE_BIT)
1205       ret |= D3D12_BARRIER_SYNC_RESOLVE;
1206    if (flags & VK_PIPELINE_STAGE_2_CLEAR_BIT)
1207       ret |= D3D12_BARRIER_SYNC_RENDER_TARGET |
1208              D3D12_BARRIER_SYNC_DEPTH_STENCIL |
1209              D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
1210    if (flags & VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
1211       ret |= D3D12_BARRIER_SYNC_PREDICATION;
1212    if (flags & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
1213                 VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
1214       ret |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
1215    if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)
1216       ret |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
1217    if (flags & VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR)
1218       ret |= D3D12_BARRIER_SYNC_RAYTRACING;
1219    if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR)
1220       ret |= D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE;
1221 
1222    return ret;
1223 }
1224 
1225 /* A straightforward translation of Vulkan access to D3D access */
1226 static D3D12_BARRIER_ACCESS
translate_access(VkAccessFlags2 flags)1227 translate_access(VkAccessFlags2 flags)
1228 {
1229    D3D12_BARRIER_ACCESS ret = D3D12_BARRIER_ACCESS_COMMON;
1230    if (flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
1231       ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
1232    if (flags & VK_ACCESS_2_INDEX_READ_BIT)
1233       ret |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
1234    if (flags & VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT)
1235       ret |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
1236    if (flags & VK_ACCESS_2_UNIFORM_READ_BIT)
1237       ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
1238    if (flags & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
1239                 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
1240       ret |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1241    if (flags & VK_ACCESS_2_SHADER_READ_BIT)
1242       ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1243              D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1244              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1245    if (flags & (VK_ACCESS_2_SHADER_WRITE_BIT |
1246                 VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1247                 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT))
1248       ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1249    if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT)
1250       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1251              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1252    if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT)
1253       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1254              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1255    if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT)
1256       ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1257    if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)
1258       ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1259    if (flags & VK_ACCESS_2_TRANSFER_READ_BIT)
1260       ret |= D3D12_BARRIER_ACCESS_COPY_SOURCE |
1261              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1262    if (flags & VK_ACCESS_2_TRANSFER_WRITE_BIT)
1263       ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1264              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1265              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1266              D3D12_BARRIER_ACCESS_COPY_DEST |
1267              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1268    if (flags & VK_ACCESS_2_MEMORY_READ_BIT)
1269       ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
1270              D3D12_BARRIER_ACCESS_INDEX_BUFFER |
1271              D3D12_BARRIER_ACCESS_VERTEX_BUFFER |
1272              D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1273              D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1274              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1275              D3D12_BARRIER_ACCESS_RENDER_TARGET |
1276              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1277              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1278              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1279    if (flags & VK_ACCESS_2_MEMORY_WRITE_BIT)
1280       ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1281              D3D12_BARRIER_ACCESS_RENDER_TARGET |
1282              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1283              D3D12_BARRIER_ACCESS_COPY_DEST |
1284              D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1285    if (flags & (VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
1286                 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
1287                 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT))
1288       ret |= D3D12_BARRIER_ACCESS_STREAM_OUTPUT;
1289    if (flags & VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)
1290       ret |= D3D12_BARRIER_ACCESS_PREDICATION;
1291    if (flags & VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)
1292       ret |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1293    if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR)
1294       ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
1295    if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
1296       ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
1297    return ret;
1298 }
1299 
1300 /* For texture barriers, D3D will validate that the access flags used are actually
1301  * things that were valid for the specified layout. Use the mask returned from here
1302  * to scope down the set of app-provided access flags to make validation happy. */
1303 static D3D12_BARRIER_ACCESS
valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)1304 valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)
1305 {
1306    switch (layout) {
1307    case D3D12_BARRIER_LAYOUT_UNDEFINED:
1308       return D3D12_BARRIER_ACCESS_NO_ACCESS;
1309    case D3D12_BARRIER_LAYOUT_COMMON:
1310       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1311              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1312              D3D12_BARRIER_ACCESS_COPY_DEST;
1313    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON:
1314    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON:
1315       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1316              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1317              D3D12_BARRIER_ACCESS_COPY_DEST |
1318              D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1319    case D3D12_BARRIER_LAYOUT_GENERIC_READ:
1320    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ:
1321       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1322              D3D12_BARRIER_ACCESS_COPY_SOURCE |
1323              D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1324              D3D12_BARRIER_ACCESS_RESOLVE_SOURCE |
1325              D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1326    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ:
1327       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE|
1328              D3D12_BARRIER_ACCESS_COPY_SOURCE;
1329    case D3D12_BARRIER_LAYOUT_RENDER_TARGET:
1330       return D3D12_BARRIER_ACCESS_RENDER_TARGET;
1331    case D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS:
1332    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS:
1333    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS:
1334       return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1335    case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE:
1336       return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1337    case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ:
1338       return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1339    case D3D12_BARRIER_LAYOUT_SHADER_RESOURCE:
1340    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE:
1341    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE:
1342       return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1343    case D3D12_BARRIER_LAYOUT_COPY_SOURCE:
1344    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE:
1345    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE:
1346       return D3D12_BARRIER_ACCESS_COPY_SOURCE;
1347    case D3D12_BARRIER_LAYOUT_COPY_DEST:
1348    case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST:
1349    case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST:
1350       return D3D12_BARRIER_ACCESS_COPY_DEST;
1351    case D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE:
1352       return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1353    case D3D12_BARRIER_LAYOUT_RESOLVE_DEST:
1354       return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1355    case D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE:
1356       return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1357    default:
1358       return D3D12_BARRIER_ACCESS_COMMON;
1359    }
1360 }
1361 
1362 /* Similar to layout -> access, there's access -> sync validation too. D3D
1363  * doesn't like over-synchronizing if you weren't accessing a resource through
1364  * a relevant access bit. */
1365 static D3D12_BARRIER_SYNC
adjust_sync_for_access(D3D12_BARRIER_SYNC in,D3D12_BARRIER_ACCESS access)1366 adjust_sync_for_access(D3D12_BARRIER_SYNC in, D3D12_BARRIER_ACCESS access)
1367 {
1368    /* NO_ACCESS must not add sync */
1369    if (access == D3D12_BARRIER_ACCESS_NO_ACCESS)
1370       return D3D12_BARRIER_SYNC_NONE;
1371    /* SYNC_ALL can be used with any access bits */
1372    if (in == D3D12_BARRIER_SYNC_ALL)
1373       return in;
1374    /* ACCESS_COMMON needs at least one sync bit */
1375    if (access == D3D12_BARRIER_ACCESS_COMMON)
1376       return in == D3D12_BARRIER_SYNC_NONE ? D3D12_BARRIER_SYNC_ALL : in;
1377 
1378    D3D12_BARRIER_SYNC out = D3D12_BARRIER_SYNC_NONE;
1379    if (access & D3D12_BARRIER_ACCESS_VERTEX_BUFFER)
1380       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1381                    D3D12_BARRIER_SYNC_DRAW |
1382                    D3D12_BARRIER_SYNC_ALL_SHADING |
1383                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1384    if (access & D3D12_BARRIER_ACCESS_CONSTANT_BUFFER)
1385       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1386                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1387                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1388                    D3D12_BARRIER_SYNC_DRAW |
1389                    D3D12_BARRIER_SYNC_ALL_SHADING |
1390                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1391    if (access & D3D12_BARRIER_ACCESS_INDEX_BUFFER)
1392       out |= in & D3D12_BARRIER_SYNC_INDEX_INPUT;
1393    if (access & D3D12_BARRIER_ACCESS_RENDER_TARGET)
1394       out |= in & D3D12_BARRIER_SYNC_RENDER_TARGET;
1395    if (access & D3D12_BARRIER_ACCESS_UNORDERED_ACCESS)
1396       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1397                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1398                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1399                    D3D12_BARRIER_SYNC_DRAW |
1400                    D3D12_BARRIER_SYNC_ALL_SHADING |
1401                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1402    if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE)
1403       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1404                    D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1405    if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ)
1406       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1407                    D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1408    if (access & D3D12_BARRIER_ACCESS_SHADER_RESOURCE)
1409       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1410                    D3D12_BARRIER_SYNC_PIXEL_SHADING |
1411                    D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1412                    D3D12_BARRIER_SYNC_DRAW |
1413                    D3D12_BARRIER_SYNC_ALL_SHADING |
1414                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1415    if (access & D3D12_BARRIER_ACCESS_STREAM_OUTPUT)
1416       out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1417                    D3D12_BARRIER_SYNC_DRAW |
1418                    D3D12_BARRIER_SYNC_ALL_SHADING |
1419                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1420    if (access & D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT)
1421       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1422                    D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1423    if (access & D3D12_BARRIER_ACCESS_PREDICATION)
1424       out |= in & (D3D12_BARRIER_SYNC_DRAW |
1425                    D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1426    if (access & (D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE))
1427       out |= in & D3D12_BARRIER_SYNC_COPY;
1428    if (access & (D3D12_BARRIER_ACCESS_RESOLVE_DEST | D3D12_BARRIER_ACCESS_RESOLVE_SOURCE))
1429       out |= in & D3D12_BARRIER_SYNC_RESOLVE;
1430    if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ)
1431       out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1432                    D3D12_BARRIER_SYNC_RAYTRACING |
1433                    D3D12_BARRIER_SYNC_ALL_SHADING |
1434                    D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1435                    D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1436                    D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
1437                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1438    if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE)
1439       out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1440                    D3D12_BARRIER_SYNC_RAYTRACING |
1441                    D3D12_BARRIER_SYNC_ALL_SHADING |
1442                    D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1443                    D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1444                    D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1445    if (access & D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE)
1446       out |= in & (D3D12_BARRIER_SYNC_PIXEL_SHADING |
1447                    D3D12_BARRIER_SYNC_ALL_SHADING);
1448    /* SYNC_NONE means it won't be accessed, so if we can't express the app's original intent
1449     * here, then be conservative and over-sync. */
1450    return out ? out : D3D12_BARRIER_SYNC_ALL;
1451 }
1452 
1453 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1454 dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,
1455                                  const VkDependencyInfo *info)
1456 {
1457    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1458 
1459    uint32_t num_barrier_groups = 0;
1460    D3D12_BARRIER_GROUP groups[3];
1461 
1462    /* Some input image barriers will expand into 2 outputs, and some will turn into buffer barriers.
1463     * Do a first pass and count how much we need to allocate. */
1464    uint32_t num_image_barriers = 0;
1465    uint32_t num_buffer_barriers = info->bufferMemoryBarrierCount;
1466    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1467       VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1468       bool need_separate_aspect_barriers =
1469          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1470          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1471          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1472          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1473       if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1474          ++num_buffer_barriers;
1475       else
1476          num_image_barriers += need_separate_aspect_barriers ? 2 : 1;
1477    }
1478 
1479    VK_MULTIALLOC(ma);
1480    VK_MULTIALLOC_DECL(&ma, D3D12_GLOBAL_BARRIER, global_barriers, info->memoryBarrierCount);
1481    VK_MULTIALLOC_DECL(&ma, D3D12_BUFFER_BARRIER, buffer_barriers, num_buffer_barriers);
1482    VK_MULTIALLOC_DECL(&ma, D3D12_TEXTURE_BARRIER, texture_barriers, num_image_barriers);
1483 
1484    if (ma.size == 0)
1485       return;
1486 
1487    if (!vk_multialloc_alloc(&ma, &cmdbuf->vk.pool->alloc,
1488                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
1489       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1490       return;
1491    }
1492 
1493    if (info->memoryBarrierCount) {
1494       groups[num_barrier_groups].NumBarriers = info->memoryBarrierCount;
1495       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_GLOBAL;
1496       groups[num_barrier_groups].pGlobalBarriers = global_barriers;
1497       ++num_barrier_groups;
1498       for (uint32_t i = 0; i < info->memoryBarrierCount; ++i) {
1499          global_barriers[i].SyncBefore = translate_sync(info->pMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1500          global_barriers[i].SyncAfter = translate_sync(info->pMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1501          global_barriers[i].AccessBefore = global_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1502             D3D12_BARRIER_ACCESS_NO_ACCESS :
1503             translate_access(info->pMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access;
1504          global_barriers[i].AccessAfter = global_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1505             D3D12_BARRIER_ACCESS_NO_ACCESS :
1506             translate_access(info->pMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access;
1507 
1508          if ((global_barriers[i].AccessBefore & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) &&
1509              (global_barriers[i].AccessAfter == D3D12_BARRIER_ACCESS_COMMON ||
1510               global_barriers[i].AccessAfter & ~(D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ))) {
1511             /* D3D validates against a global barrier attempting to transition from depth write to something other than depth write,
1512              * but this is a D3D bug; it's absolutely valid to use a global barrier to transition *multiple* types of accesses.
1513              * The validation does say that you'd need an image barrier to actually get that kind of transition, which is still correct,
1514              * so just remove this bit under the assumption that a dedicated image barrier will be submitted to do any necessary work later. */
1515             global_barriers[i].AccessBefore &= ~D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1516          }
1517          if (global_barriers[i].AccessBefore == D3D12_BARRIER_ACCESS_COMMON)
1518             global_barriers[i].AccessAfter = D3D12_BARRIER_ACCESS_COMMON;
1519          global_barriers[i].SyncBefore = adjust_sync_for_access(global_barriers[i].SyncBefore, global_barriers[i].AccessBefore);
1520          global_barriers[i].SyncAfter = adjust_sync_for_access(global_barriers[i].SyncAfter, global_barriers[i].AccessAfter);
1521       }
1522    }
1523 
1524    if (num_buffer_barriers) {
1525       groups[num_barrier_groups].NumBarriers = num_buffer_barriers;
1526       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_BUFFER;
1527       groups[num_barrier_groups].pBufferBarriers = buffer_barriers;
1528       ++num_barrier_groups;
1529       for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; ++i) {
1530          VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1531          buffer_barriers[i].SyncBefore = translate_sync(info->pBufferMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1532          buffer_barriers[i].SyncAfter = translate_sync(info->pBufferMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1533          buffer_barriers[i].AccessBefore = buffer_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1534             D3D12_BARRIER_ACCESS_NO_ACCESS :
1535             translate_access(info->pBufferMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & buf->valid_access;
1536          buffer_barriers[i].AccessAfter = buffer_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1537             D3D12_BARRIER_ACCESS_NO_ACCESS :
1538             translate_access(info->pBufferMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & buf->valid_access;
1539          buffer_barriers[i].SyncBefore = adjust_sync_for_access(buffer_barriers[i].SyncBefore, buffer_barriers[i].AccessBefore);
1540          buffer_barriers[i].SyncAfter = adjust_sync_for_access(buffer_barriers[i].SyncAfter, buffer_barriers[i].AccessAfter);
1541          buffer_barriers[i].pResource = buf->res;
1542          buffer_barriers[i].Offset = 0;
1543          buffer_barriers[i].Size = UINT64_MAX;
1544       }
1545    }
1546 
1547    if (num_image_barriers) {
1548       groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_TEXTURE;
1549       groups[num_barrier_groups].pTextureBarriers = texture_barriers;
1550       groups[num_barrier_groups].NumBarriers = num_image_barriers;
1551       ++num_barrier_groups;
1552    }
1553 
1554    uint32_t tbar = 0;
1555    uint32_t bbar = info->bufferMemoryBarrierCount;
1556    for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1557       VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1558 
1559       if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) {
1560          /* Barriers on linear images turn into buffer barriers */
1561          buffer_barriers[bbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1562          buffer_barriers[bbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1563          buffer_barriers[bbar].AccessBefore = buffer_barriers[bbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1564             D3D12_BARRIER_ACCESS_NO_ACCESS :
1565             translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & image->valid_access;
1566          buffer_barriers[bbar].AccessAfter = buffer_barriers[bbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1567             D3D12_BARRIER_ACCESS_NO_ACCESS :
1568             translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & image->valid_access;
1569          buffer_barriers[bbar].SyncBefore = adjust_sync_for_access(buffer_barriers[bbar].SyncBefore, buffer_barriers[bbar].AccessBefore);
1570          buffer_barriers[bbar].SyncAfter = adjust_sync_for_access(buffer_barriers[bbar].SyncAfter, buffer_barriers[bbar].AccessAfter);
1571          buffer_barriers[bbar].pResource = image->res;
1572          buffer_barriers[bbar].Offset = 0;
1573          buffer_barriers[bbar].Size = UINT64_MAX;
1574          ++bbar;
1575          continue;
1576       }
1577 
1578       const VkImageSubresourceRange *range = &info->pImageMemoryBarriers[i].subresourceRange;
1579       const bool simultaneous_access = image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
1580 
1581       bool need_separate_aspect_barriers =
1582          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1583          info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1584          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1585          info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1586       uint32_t num_aspects = need_separate_aspect_barriers ? 2 : 1;
1587       VkImageAspectFlags aspect_0_mask = need_separate_aspect_barriers ?
1588          (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) : VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM;
1589       VkImageAspectFlags aspects[] = {
1590          range->aspectMask & aspect_0_mask,
1591          range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT,
1592       };
1593 
1594       for (uint32_t aspect_idx = 0; aspect_idx < num_aspects; ++aspect_idx) {
1595          VkImageAspectFlags aspect = aspects[aspect_idx];
1596          texture_barriers[tbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1597          texture_barriers[tbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1598          const bool queue_ownership_transfer = info->pImageMemoryBarriers[i].srcQueueFamilyIndex != info->pImageMemoryBarriers[i].dstQueueFamilyIndex;
1599          D3D12_BARRIER_ACCESS layout_before_valid_access = ~0;
1600          D3D12_BARRIER_ACCESS layout_after_valid_access = ~0;
1601          if (simultaneous_access) {
1602             /* Simultaneous access textures never perform layout transitions, and can do any type of access from COMMON layout */
1603             texture_barriers[tbar].LayoutAfter = texture_barriers[tbar].LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED;
1604          } else if (queue_ownership_transfer) {
1605             /* For an ownership transfer, force the foreign layout to COMMON and the matching sync/access to NONE */
1606             assert(info->pImageMemoryBarriers[i].srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1607             assert(info->pImageMemoryBarriers[i].dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1608             const bool is_release = info->pImageMemoryBarriers[i].srcQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1609             const bool is_acquire = info->pImageMemoryBarriers[i].dstQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1610             assert(is_release ^ is_acquire);
1611             texture_barriers[tbar].LayoutBefore = is_acquire ?
1612                D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1613             texture_barriers[tbar].LayoutAfter = is_release ?
1614                D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1615             if (is_acquire) {
1616                texture_barriers[tbar].SyncBefore = D3D12_BARRIER_SYNC_NONE;
1617                texture_barriers[tbar].AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
1618                layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1619             } else {
1620                texture_barriers[tbar].SyncAfter = D3D12_BARRIER_SYNC_NONE;
1621                texture_barriers[tbar].AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
1622                layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1623             }
1624          } else {
1625             texture_barriers[tbar].LayoutBefore = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1626             texture_barriers[tbar].LayoutAfter = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1627             layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1628             layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1629          }
1630 
1631          texture_barriers[tbar].AccessBefore = texture_barriers[tbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ||
1632                                                 texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED ?
1633             D3D12_BARRIER_ACCESS_NO_ACCESS :
1634             translate_access(info->pImageMemoryBarriers[i].srcAccessMask) &
1635                cmdbuf->valid_access & image->valid_access & layout_before_valid_access;
1636          texture_barriers[tbar].AccessAfter = texture_barriers[tbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1637             D3D12_BARRIER_ACCESS_NO_ACCESS :
1638             translate_access(info->pImageMemoryBarriers[i].dstAccessMask) &
1639                cmdbuf->valid_access & image->valid_access & layout_after_valid_access;
1640 
1641          texture_barriers[tbar].SyncBefore = adjust_sync_for_access(texture_barriers[tbar].SyncBefore, texture_barriers[tbar].AccessBefore);
1642          texture_barriers[tbar].SyncAfter = adjust_sync_for_access(texture_barriers[tbar].SyncAfter, texture_barriers[tbar].AccessAfter);
1643          texture_barriers[tbar].Subresources.FirstArraySlice = range->baseArrayLayer;
1644          texture_barriers[tbar].Subresources.NumArraySlices = dzn_get_layer_count(image, range);
1645          texture_barriers[tbar].Subresources.IndexOrFirstMipLevel = range->baseMipLevel;
1646          texture_barriers[tbar].Subresources.NumMipLevels = dzn_get_level_count(image, range);
1647          texture_barriers[tbar].Subresources.FirstPlane = aspect_idx;
1648          texture_barriers[tbar].Subresources.NumPlanes = util_bitcount(aspect);
1649          texture_barriers[tbar].pResource = image->res;
1650          texture_barriers[tbar].Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE;
1651          if (texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED)
1652             texture_barriers[tbar].Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
1653          ++tbar;
1654       }
1655    }
1656    assert(bbar == num_buffer_barriers);
1657    assert(tbar == num_image_barriers);
1658 
1659    ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, num_barrier_groups, groups);
1660 
1661    vk_free(&cmdbuf->vk.pool->alloc, global_barriers);
1662 }
1663 
1664 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)1665 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
1666                        const struct dzn_image *image,
1667                        const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
1668 {
1669    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1670    struct dzn_cmd_buffer_dsv_key key = { image, *desc };
1671    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
1672    struct dzn_cmd_buffer_dsv_entry *dsve;
1673 
1674    if (!he) {
1675       struct dzn_descriptor_heap *heap;
1676       uint32_t slot;
1677 
1678       // TODO: error handling
1679       dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
1680                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1681       dsve->key = key;
1682       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
1683       dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1684       ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
1685       _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
1686    } else {
1687       dsve = he->data;
1688    }
1689 
1690    return dsve->handle;
1691 }
1692 
1693 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1694 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1695                        const struct dzn_image *image,
1696                        const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1697 {
1698    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1699    struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1700    struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1701    struct dzn_cmd_buffer_rtv_entry *rtve;
1702 
1703    if (!he) {
1704       struct dzn_descriptor_heap *heap;
1705       uint32_t slot;
1706 
1707       // TODO: error handling
1708       rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1709                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1710       rtve->key = key;
1711       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1712       rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1713       ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1714       he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1715    } else {
1716       rtve = he->data;
1717    }
1718 
1719    return rtve->handle;
1720 }
1721 
1722 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1723 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1724 {
1725    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1726 
1727    if (!cmdbuf->null_rtv.ptr) {
1728       struct dzn_descriptor_heap *heap;
1729       uint32_t slot;
1730       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1731       cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1732 
1733       D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1734       desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1735       desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1736       desc.Texture2D.MipSlice = 0;
1737       desc.Texture2D.PlaneSlice = 0;
1738 
1739       ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1740    }
1741 
1742    return cmdbuf->null_rtv;
1743 }
1744 
1745 static D3D12_HEAP_TYPE
heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)1746 heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)
1747 {
1748    switch (bucket) {
1749    case DZN_INTERNAL_BUF_UPLOAD: return D3D12_HEAP_TYPE_UPLOAD;
1750    case DZN_INTERNAL_BUF_DEFAULT: return D3D12_HEAP_TYPE_DEFAULT;
1751    default: unreachable("Invalid value");
1752    }
1753 }
1754 
1755 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,enum dzn_internal_buf_bucket bucket,D3D12_RESOURCE_STATES init_state,uint64_t align,ID3D12Resource ** out,uint64_t * offset)1756 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1757                                   uint32_t size,
1758                                   enum dzn_internal_buf_bucket bucket,
1759                                   D3D12_RESOURCE_STATES init_state,
1760                                   uint64_t align,
1761                                   ID3D12Resource **out,
1762                                   uint64_t *offset)
1763 {
1764    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1765    ID3D12Resource *res;
1766    *out = NULL;
1767    D3D12_HEAP_TYPE heap_type = heap_type_for_bucket(bucket);
1768 
1769    if (bucket == DZN_INTERNAL_BUF_UPLOAD && cmdbuf->cur_upload_buf) {
1770       uint64_t new_offset = ALIGN_POT(cmdbuf->cur_upload_buf_offset, align);
1771       if (cmdbuf->cur_upload_buf->size >= size + new_offset) {
1772          cmdbuf->cur_upload_buf_offset = new_offset + size;
1773          *out = cmdbuf->cur_upload_buf->res;
1774          *offset = new_offset;
1775          return VK_SUCCESS;
1776       }
1777       cmdbuf->cur_upload_buf = NULL;
1778       cmdbuf->cur_upload_buf_offset = 0;
1779    }
1780 
1781    uint32_t alloc_size = size;
1782    if (bucket == DZN_INTERNAL_BUF_UPLOAD)
1783       /* Walk through a 4MB upload buffer */
1784       alloc_size = ALIGN_POT(size, 4 * 1024 * 1024);
1785    else
1786       /* Align size on 64k (the default alignment) */
1787       alloc_size = ALIGN_POT(size, 64 * 1024);
1788 
1789    D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_type);
1790    D3D12_RESOURCE_DESC rdesc = {
1791       .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1792       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1793       .Width = alloc_size,
1794       .Height = 1,
1795       .DepthOrArraySize = 1,
1796       .MipLevels = 1,
1797       .Format = DXGI_FORMAT_UNKNOWN,
1798       .SampleDesc = { .Count = 1, .Quality = 0 },
1799       .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1800       .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1801    };
1802 
1803    HRESULT hres =
1804       ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1805                                             D3D12_HEAP_FLAG_NONE, &rdesc,
1806                                             init_state, NULL,
1807                                             &IID_ID3D12Resource,
1808                                             (void **)&res);
1809    if (FAILED(hres)) {
1810       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1811    }
1812 
1813    struct dzn_internal_resource *entry =
1814       vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1815                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1816    if (!entry) {
1817       ID3D12Resource_Release(res);
1818       return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1819    }
1820 
1821    entry->res = res;
1822    entry->size = alloc_size;
1823    list_addtail(&entry->link, &cmdbuf->internal_bufs[bucket]);
1824    *out = entry->res;
1825    if (offset)
1826       *offset = 0;
1827    if (bucket == DZN_INTERNAL_BUF_UPLOAD) {
1828       cmdbuf->cur_upload_buf = entry;
1829       cmdbuf->cur_upload_buf_offset = size;
1830    }
1831    return VK_SUCCESS;
1832 }
1833 
1834 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1835 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1836                                      const struct dzn_image *image,
1837                                      VkImageLayout layout,
1838                                      const VkClearColorValue *color,
1839                                      const VkImageSubresourceRange *range,
1840                                      uint32_t rect_count, D3D12_RECT *rects)
1841 {
1842    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1843    uint32_t blksize = util_format_get_blocksize(pfmt);
1844    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1845    uint32_t raw[4] = { 0 };
1846 
1847    assert(blksize <= sizeof(raw));
1848    assert(!(sizeof(buf) % blksize));
1849 
1850    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1851 
1852    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1853    while (fill_step % blksize)
1854       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1855 
1856    uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1857    uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1858    uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1859    uint32_t res_size = max_h * row_pitch;
1860 
1861    assert(fill_step <= sizeof(buf));
1862 
1863    for (uint32_t i = 0; i < fill_step; i += blksize)
1864       memcpy(&buf[i], raw, blksize);
1865 
1866    ID3D12Resource *src_res;
1867    uint64_t src_offset;
1868 
1869    VkResult result =
1870       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1871                                         DZN_INTERNAL_BUF_UPLOAD,
1872                                         D3D12_RESOURCE_STATE_GENERIC_READ,
1873                                         D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
1874                                         &src_res,
1875                                         &src_offset);
1876    if (result != VK_SUCCESS)
1877       return;
1878 
1879    assert(!(res_size % fill_step));
1880 
1881    uint8_t *cpu_ptr;
1882    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1883    cpu_ptr += src_offset;
1884    for (uint32_t i = 0; i < res_size; i += fill_step)
1885       memcpy(&cpu_ptr[i], buf, fill_step);
1886 
1887    ID3D12Resource_Unmap(src_res, 0, NULL);
1888 
1889    D3D12_TEXTURE_COPY_LOCATION src_loc = {
1890       .pResource = src_res,
1891       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1892       .PlacedFootprint = {
1893          .Offset = src_offset,
1894          .Footprint = {
1895             .Width = max_w,
1896             .Height = max_h,
1897             .Depth = 1,
1898             .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1899          },
1900       },
1901    };
1902 
1903    if (!cmdbuf->enhanced_barriers) {
1904       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1905                                                          layout,
1906                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1907                                                          DZN_QUEUE_TRANSITION_FLUSH);
1908    }
1909 
1910    assert(dzn_get_level_count(image, range) == 1);
1911    uint32_t layer_count = dzn_get_layer_count(image, range);
1912 
1913    dzn_foreach_aspect(aspect, range->aspectMask) {
1914       VkImageSubresourceLayers subres = {
1915          .aspectMask = (VkImageAspectFlags)aspect,
1916          .mipLevel = range->baseMipLevel,
1917          .baseArrayLayer = range->baseArrayLayer,
1918          .layerCount = layer_count,
1919       };
1920 
1921       for (uint32_t layer = 0; layer < layer_count; layer++) {
1922          D3D12_TEXTURE_COPY_LOCATION dst_loc =
1923             dzn_image_get_copy_loc(image, &subres, aspect, layer);
1924 
1925          src_loc.PlacedFootprint.Footprint.Format =
1926             dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1927             dst_loc.PlacedFootprint.Footprint.Format :
1928             image->desc.Format;
1929 
1930          for (uint32_t r = 0; r < rect_count; r++) {
1931             D3D12_BOX src_box = {
1932                .left = 0,
1933                .top = 0,
1934                .front = 0,
1935                .right = (UINT)(rects[r].right - rects[r].left),
1936                .bottom = (UINT)(rects[r].bottom - rects[r].top),
1937                .back = 1,
1938             };
1939 
1940             ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1941                                                          &dst_loc,
1942                                                          rects[r].left,
1943                                                          rects[r].top, 0,
1944                                                          &src_loc,
1945                                                          &src_box);
1946          }
1947       }
1948    }
1949 
1950    if (!cmdbuf->enhanced_barriers) {
1951       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1952                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1953                                                          layout,
1954                                                          DZN_QUEUE_TRANSITION_FLUSH);
1955    }
1956 }
1957 
1958 static VkClearColorValue
adjust_clear_color(struct dzn_physical_device * pdev,VkFormat format,const VkClearColorValue * col)1959 adjust_clear_color(struct dzn_physical_device *pdev,
1960                    VkFormat format, const VkClearColorValue *col)
1961 {
1962    VkClearColorValue out = *col;
1963 
1964    // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1965    // manually where it matters, like here, in the clear path.
1966    if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1967       if (pdev->support_a4b4g4r4) {
1968          DZN_SWAP(float, out.float32[0], out.float32[2]);
1969       } else {
1970          DZN_SWAP(float, out.float32[0], out.float32[1]);
1971          DZN_SWAP(float, out.float32[2], out.float32[3]);
1972       }
1973    }
1974 
1975    return out;
1976 }
1977 
1978 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1979 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1980                                       const struct dzn_image *image,
1981                                       VkImageLayout layout,
1982                                       const VkClearColorValue *color,
1983                                       uint32_t range_count,
1984                                       const VkImageSubresourceRange *ranges)
1985 {
1986    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1987    uint32_t blksize = util_format_get_blocksize(pfmt);
1988    uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1989    uint32_t raw[4] = { 0 };
1990 
1991    assert(blksize <= sizeof(raw));
1992    assert(!(sizeof(buf) % blksize));
1993 
1994    util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1995 
1996    uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1997    while (fill_step % blksize)
1998       fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1999 
2000    uint32_t res_size = 0;
2001    for (uint32_t r = 0; r < range_count; r++) {
2002       uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
2003       uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
2004       uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
2005       uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
2006 
2007       res_size = MAX2(res_size, h * d * row_pitch);
2008    }
2009 
2010    assert(fill_step <= sizeof(buf));
2011 
2012    for (uint32_t i = 0; i < fill_step; i += blksize)
2013       memcpy(&buf[i], raw, blksize);
2014 
2015    ID3D12Resource *src_res;
2016    uint64_t src_offset;
2017 
2018    VkResult result =
2019       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
2020                                         DZN_INTERNAL_BUF_UPLOAD,
2021                                         D3D12_RESOURCE_STATE_GENERIC_READ,
2022                                         D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
2023                                         &src_res,
2024                                         &src_offset);
2025    if (result != VK_SUCCESS)
2026       return;
2027 
2028    assert(!(res_size % fill_step));
2029 
2030    uint8_t *cpu_ptr;
2031    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
2032    cpu_ptr += src_offset;
2033    for (uint32_t i = 0; i < res_size; i += fill_step)
2034       memcpy(&cpu_ptr[i], buf, fill_step);
2035 
2036    ID3D12Resource_Unmap(src_res, 0, NULL);
2037 
2038    D3D12_TEXTURE_COPY_LOCATION src_loc = {
2039       .pResource = src_res,
2040       .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
2041       .PlacedFootprint = {
2042          .Offset = src_offset,
2043       },
2044    };
2045 
2046    for (uint32_t r = 0; r < range_count; r++) {
2047       uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
2048       uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
2049 
2050       if (!cmdbuf->enhanced_barriers) {
2051          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2052                                                             layout,
2053                                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2054                                                             DZN_QUEUE_TRANSITION_FLUSH);
2055       }
2056 
2057       dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
2058          for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2059             uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
2060             uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
2061             uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
2062             VkImageSubresourceLayers subres = {
2063                .aspectMask = (VkImageAspectFlags)aspect,
2064                .mipLevel = ranges[r].baseMipLevel + lvl,
2065                .baseArrayLayer = ranges[r].baseArrayLayer,
2066                .layerCount = layer_count,
2067             };
2068 
2069             for (uint32_t layer = 0; layer < layer_count; layer++) {
2070                D3D12_TEXTURE_COPY_LOCATION dst_loc =
2071                   dzn_image_get_copy_loc(image, &subres, aspect, layer);
2072 
2073                src_loc.PlacedFootprint.Footprint.Format =
2074                   dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
2075                   dst_loc.PlacedFootprint.Footprint.Format :
2076                   image->desc.Format;
2077                src_loc.PlacedFootprint.Footprint.Width = w;
2078                src_loc.PlacedFootprint.Footprint.Height = h;
2079                src_loc.PlacedFootprint.Footprint.Depth = d;
2080                src_loc.PlacedFootprint.Footprint.RowPitch =
2081                   ALIGN_NPOT(w * blksize, fill_step);
2082                D3D12_BOX src_box = {
2083                   .left = 0,
2084                   .top = 0,
2085                   .front = 0,
2086                   .right = w,
2087                   .bottom = h,
2088                   .back = d,
2089                };
2090 
2091                ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
2092                                                   &src_loc, &src_box);
2093 
2094             }
2095          }
2096       }
2097 
2098       if (!cmdbuf->enhanced_barriers) {
2099          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2100                                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2101                                                             layout,
2102                                                             DZN_QUEUE_TRANSITION_FLUSH);
2103       }
2104    }
2105 }
2106 
2107 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)2108 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
2109                                 struct dzn_image_view *view,
2110                                 VkImageLayout layout,
2111                                 const VkClearValue *value,
2112                                 VkImageAspectFlags aspects,
2113                                 uint32_t base_layer,
2114                                 uint32_t layer_count,
2115                                 uint32_t rect_count,
2116                                 D3D12_RECT *rects)
2117 {
2118    struct dzn_image *image =
2119       container_of(view->vk.image, struct dzn_image, vk);
2120    struct dzn_physical_device *pdev =
2121       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2122 
2123    VkImageSubresourceRange range = {
2124       .aspectMask = aspects,
2125       .baseMipLevel = view->vk.base_mip_level,
2126       .levelCount = 1,
2127       .baseArrayLayer = view->vk.base_array_layer + base_layer,
2128       .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
2129                     view->vk.layer_count - base_layer : layer_count,
2130    };
2131 
2132    layer_count = vk_image_subresource_layer_count(&image->vk, &range);
2133    D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2134 
2135    if (vk_format_is_depth_or_stencil(view->vk.format)) {
2136       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2137 
2138       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2139          flags |= D3D12_CLEAR_FLAG_DEPTH;
2140       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2141          flags |= D3D12_CLEAR_FLAG_STENCIL;
2142 
2143       if (flags != 0) {
2144          if (cmdbuf->enhanced_barriers) {
2145             restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2146                                                            layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2147                                                            &range);
2148          } else {
2149             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2150                                                                layout,
2151                                                                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2152                                                                DZN_QUEUE_TRANSITION_FLUSH);
2153          }
2154 
2155          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
2156          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2157          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
2158                                                 value->depthStencil.depth,
2159                                                 value->depthStencil.stencil,
2160                                                 rect_count, rects);
2161 
2162          if (cmdbuf->enhanced_barriers) {
2163             dzn_cmd_buffer_restore_layout(cmdbuf, image,
2164                                           D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2165                                           D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2166                                           &range);
2167          } else {
2168             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2169                                                                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2170                                                                layout,
2171                                                                DZN_QUEUE_TRANSITION_FLUSH);
2172          }
2173       }
2174    } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2175       VkClearColorValue color = adjust_clear_color(pdev, view->vk.format, &value->color);
2176       bool clear_with_cpy = false;
2177       float vals[4];
2178 
2179       if (vk_format_is_sint(view->vk.format)) {
2180          for (uint32_t i = 0; i < 4; i++) {
2181             vals[i] = color.int32[i];
2182             if (color.int32[i] != (int32_t)vals[i]) {
2183                clear_with_cpy = true;
2184                break;
2185             }
2186          }
2187       } else if (vk_format_is_uint(view->vk.format)) {
2188          for (uint32_t i = 0; i < 4; i++) {
2189             vals[i] = color.uint32[i];
2190             if (color.uint32[i] != (uint32_t)vals[i]) {
2191                clear_with_cpy = true;
2192                break;
2193             }
2194          }
2195       } else {
2196          for (uint32_t i = 0; i < 4; i++)
2197             vals[i] = color.float32[i];
2198       }
2199 
2200       if (clear_with_cpy) {
2201          dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
2202                                               VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2203                                               &value->color,
2204                                               &range, rect_count, rects);
2205       } else {
2206          if (cmdbuf->enhanced_barriers) {
2207             restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2208                                                            layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2209                                                            &range);
2210          } else {
2211             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2212                                                                layout,
2213                                                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2214                                                                DZN_QUEUE_TRANSITION_FLUSH);
2215          }
2216 
2217          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
2218          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2219          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
2220 
2221          if (cmdbuf->enhanced_barriers) {
2222             dzn_cmd_buffer_restore_layout(cmdbuf, image,
2223                                           D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2224                                           D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2225                                           &range);
2226          } else {
2227             dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2228                                                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2229                                                                layout,
2230                                                                DZN_QUEUE_TRANSITION_FLUSH);
2231          }
2232       }
2233    }
2234 }
2235 
2236 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)2237 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
2238                            const struct dzn_image *image,
2239                            VkImageLayout layout,
2240                            const VkClearColorValue *col,
2241                            uint32_t range_count,
2242                            const VkImageSubresourceRange *ranges)
2243 {
2244    struct dzn_physical_device *pdev =
2245       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2246    if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ||
2247        cmdbuf->type != D3D12_COMMAND_LIST_TYPE_DIRECT) {
2248       dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2249       return;
2250    }
2251 
2252    VkClearColorValue color = adjust_clear_color(pdev, image->vk.format, col);
2253    float clear_vals[4];
2254 
2255    enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
2256    D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2257 
2258    if (util_format_is_pure_sint(pfmt)) {
2259       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2260          clear_vals[c] = color.int32[c];
2261          if (color.int32[c] != (int32_t)clear_vals[c]) {
2262             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2263             return;
2264          }
2265       }
2266    } else if (util_format_is_pure_uint(pfmt)) {
2267       for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2268          clear_vals[c] = color.uint32[c];
2269          if (color.uint32[c] != (uint32_t)clear_vals[c]) {
2270             dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2271             return;
2272          }
2273       }
2274    } else {
2275       memcpy(clear_vals, color.float32, sizeof(clear_vals));
2276    }
2277 
2278    for (uint32_t r = 0; r < range_count; r++) {
2279       const VkImageSubresourceRange *range = &ranges[r];
2280       uint32_t level_count = dzn_get_level_count(image, range);
2281 
2282       if (cmdbuf->enhanced_barriers) {
2283          restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2284                                                         layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2285                                                         range);
2286       } else {
2287          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2288                                                             layout,
2289                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2290                                                             DZN_QUEUE_TRANSITION_FLUSH);
2291       }
2292 
2293       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2294          VkImageSubresourceRange view_range = *range;
2295 
2296          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
2297             view_range.baseArrayLayer = 0;
2298             view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
2299          }
2300 
2301          D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
2302          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2303          ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
2304       }
2305 
2306       if (cmdbuf->enhanced_barriers) {
2307          dzn_cmd_buffer_restore_layout(cmdbuf, image,
2308                                        D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2309                                        D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2310                                        range);
2311       } else {
2312          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2313                                                             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2314                                                             layout,
2315                                                             DZN_QUEUE_TRANSITION_FLUSH);
2316       }
2317    }
2318 }
2319 
2320 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)2321 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
2322                         const struct dzn_image *image,
2323                         VkImageLayout layout,
2324                         const VkClearDepthStencilValue *zs,
2325                         uint32_t range_count,
2326                         const VkImageSubresourceRange *ranges)
2327 {
2328    assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
2329 
2330    for (uint32_t r = 0; r < range_count; r++) {
2331       const VkImageSubresourceRange *range = &ranges[r];
2332       uint32_t level_count = dzn_get_level_count(image, range);
2333 
2334       D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2335       D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2336 
2337       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2338          flags |= D3D12_CLEAR_FLAG_DEPTH;
2339       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2340          flags |= D3D12_CLEAR_FLAG_STENCIL;
2341 
2342       if (cmdbuf->enhanced_barriers) {
2343          restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2344                                                         layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2345                                                         range);
2346       } else {
2347          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2348                                                             layout,
2349                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2350                                                             DZN_QUEUE_TRANSITION_FLUSH);
2351       }
2352 
2353       for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2354          D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
2355          D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2356          ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
2357                                                           handle, flags,
2358                                                           zs->depth,
2359                                                           zs->stencil,
2360                                                           0, NULL);
2361       }
2362 
2363       if (cmdbuf->enhanced_barriers) {
2364          dzn_cmd_buffer_restore_layout(cmdbuf, image,
2365                                        D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2366                                        D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2367                                        range);
2368       } else {
2369          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2370                                                             VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2371                                                             layout,
2372                                                             DZN_QUEUE_TRANSITION_FLUSH);
2373       }
2374    }
2375 }
2376 
2377 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2378 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
2379                                    const VkCopyBufferToImageInfo2 *info,
2380                                    uint32_t r,
2381                                    VkImageAspectFlagBits aspect,
2382                                    uint32_t l)
2383 {
2384    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2385    VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
2386    struct dzn_physical_device *pdev =
2387       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2388 
2389    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2390 
2391    VkBufferImageCopy2 region = info->pRegions[r];
2392    enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
2393    uint32_t blkh = util_format_get_blockheight(pfmt);
2394    uint32_t blkd = util_format_get_blockdepth(pfmt);
2395 
2396    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2397     * to not be block aligned if it's reaching the image boundary, offsets still
2398     * have to be aligned. Align the image extent to make D3D12 happy.
2399     */
2400    dzn_image_align_extent(dst_image, &region.imageExtent);
2401 
2402    D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
2403       dzn_image_get_copy_loc(dst_image, &region.imageSubresource, aspect, l);
2404    D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
2405       dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, &region, aspect, l);
2406 
2407    if (dzn_buffer_supports_region_copy(pdev, &src_buf_loc)) {
2408       /* RowPitch and Offset are properly aligned, we can copy
2409        * the whole thing in one call.
2410        */
2411       D3D12_BOX src_box = {
2412          .left = 0,
2413          .top = 0,
2414          .front = 0,
2415          .right = region.imageExtent.width,
2416          .bottom = region.imageExtent.height,
2417          .back = region.imageExtent.depth,
2418       };
2419 
2420       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
2421                                                    region.imageOffset.x,
2422                                                    region.imageOffset.y,
2423                                                    region.imageOffset.z,
2424                                                    &src_buf_loc, &src_box);
2425       return;
2426    }
2427 
2428    /* Copy line-by-line if things are not properly aligned. */
2429    D3D12_BOX src_box = {
2430       .top = 0,
2431       .front = 0,
2432       .bottom = blkh,
2433       .back = blkd,
2434    };
2435 
2436    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2437       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2438          uint32_t src_x;
2439 
2440          D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
2441             dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
2442                                          &region, &src_buf_loc,
2443                                          y, z, &src_x);
2444 
2445          src_box.left = src_x;
2446          src_box.right = src_x + region.imageExtent.width;
2447          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2448                                                       &dst_img_loc,
2449                                                       region.imageOffset.x,
2450                                                       region.imageOffset.y + y,
2451                                                       region.imageOffset.z + z,
2452                                                       &src_buf_line_loc,
2453                                                       &src_box);
2454       }
2455    }
2456 }
2457 
2458 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2459 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
2460                                    const VkCopyImageToBufferInfo2 *info,
2461                                    uint32_t r,
2462                                    VkImageAspectFlagBits aspect,
2463                                    uint32_t l)
2464 {
2465    VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
2466    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2467    struct dzn_physical_device *pdev =
2468       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2469 
2470    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2471 
2472    VkBufferImageCopy2 region = info->pRegions[r];
2473    enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
2474    uint32_t blkh = util_format_get_blockheight(pfmt);
2475    uint32_t blkd = util_format_get_blockdepth(pfmt);
2476 
2477    /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2478     * to not be block aligned if it's reaching the image boundary, offsets still
2479     * have to be aligned. Align the image extent to make D3D12 happy.
2480     */
2481    dzn_image_align_extent(src_image, &region.imageExtent);
2482 
2483    D3D12_TEXTURE_COPY_LOCATION src_img_loc =
2484       dzn_image_get_copy_loc(src_image, &region.imageSubresource, aspect, l);
2485    D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
2486       dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, &region, aspect, l);
2487 
2488    if (dzn_buffer_supports_region_copy(pdev, &dst_buf_loc)) {
2489       /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
2490        * the whole thing in one call.
2491        */
2492       D3D12_BOX src_box = {
2493          .left = (UINT)region.imageOffset.x,
2494          .top = (UINT)region.imageOffset.y,
2495          .front = (UINT)region.imageOffset.z,
2496          .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2497          .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
2498          .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
2499       };
2500 
2501       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
2502                                                    0, 0, 0, &src_img_loc,
2503                                                    &src_box);
2504       return;
2505    }
2506 
2507    D3D12_BOX src_box = {
2508       .left = (UINT)region.imageOffset.x,
2509       .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2510    };
2511 
2512    /* Copy line-by-line if things are not properly aligned. */
2513    for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2514       src_box.front = region.imageOffset.z + z;
2515       src_box.back = src_box.front + blkd;
2516 
2517       for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2518          uint32_t dst_x;
2519 
2520          D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
2521             dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
2522                                          &region, &dst_buf_loc,
2523                                          y, z, &dst_x);
2524 
2525          src_box.top = region.imageOffset.y + y;
2526          src_box.bottom = src_box.top + blkh;
2527 
2528          ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2529                                                       &dst_buf_line_loc,
2530                                                       dst_x, 0, 0,
2531                                                       &src_img_loc,
2532                                                       &src_box);
2533       }
2534    }
2535 }
2536 
2537 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2538 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
2539                               const VkCopyImageInfo2 *info,
2540                               D3D12_RESOURCE_DESC *tmp_desc,
2541                               D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
2542                               uint32_t r,
2543                               VkImageAspectFlagBits aspect,
2544                               uint32_t l)
2545 {
2546    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2547    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2548    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2549    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2550 
2551    ID3D12Device4 *dev = device->dev;
2552    ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2553 
2554    VkImageCopy2 region = info->pRegions[r];
2555    dzn_image_align_extent(src, &region.extent);
2556 
2557    const VkImageSubresourceLayers *src_subres = &region.srcSubresource;
2558    const VkImageSubresourceLayers *dst_subres = &region.dstSubresource;
2559    VkFormat src_format =
2560       dzn_image_get_plane_format(src->vk.format, aspect);
2561    VkFormat dst_format =
2562       dzn_image_get_plane_format(dst->vk.format, aspect);
2563 
2564    enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
2565    uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
2566    uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
2567    uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
2568    enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
2569    uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
2570    uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
2571    uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
2572    uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
2573    uint32_t depth = region.extent.depth;
2574    uint32_t dst_l = l, src_l = l;
2575 
2576    assert(src_subres->aspectMask == dst_subres->aspectMask);
2577 
2578    if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
2579        dst->vk.image_type == VK_IMAGE_TYPE_2D) {
2580       assert(src_subres->layerCount == 1);
2581       src_l = 0;
2582       src_z += l;
2583       depth = 1;
2584    } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
2585               dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2586       assert(dst_subres->layerCount == 1);
2587       dst_l = 0;
2588       dst_z += l;
2589       depth = 1;
2590    } else {
2591       assert(src_subres->layerCount == dst_subres->layerCount);
2592    }
2593 
2594    D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
2595    D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
2596 
2597    D3D12_BOX src_box = {
2598       .left = (UINT)MAX2(region.srcOffset.x, 0),
2599       .top = (UINT)MAX2(region.srcOffset.y, 0),
2600       .front = (UINT)MAX2(src_z, 0),
2601       .right = (UINT)region.srcOffset.x + region.extent.width,
2602       .bottom = (UINT)region.srcOffset.y + region.extent.height,
2603       .back = (UINT)src_z + depth,
2604    };
2605 
2606    if (!tmp_loc->pResource) {
2607       ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2608                                                    region.dstOffset.x,
2609                                                    region.dstOffset.y,
2610                                                    dst_z, &src_loc,
2611                                                    &src_box);
2612       return;
2613    }
2614 
2615    tmp_desc->Format =
2616       dzn_image_get_placed_footprint_format(pdev, src->vk.format, aspect);
2617    tmp_desc->Width = region.extent.width;
2618    tmp_desc->Height = region.extent.height;
2619 
2620    ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
2621                                        0, 1, 0,
2622                                        &tmp_loc->PlacedFootprint,
2623                                        NULL, NULL, NULL);
2624 
2625    tmp_loc->PlacedFootprint.Footprint.Depth = depth;
2626 
2627    if (r > 0 || l > 0) {
2628       if (cmdbuf->enhanced_barriers) {
2629          dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2630                                        D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2631                                        D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
2632       } else {
2633          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2634                                                   D3D12_RESOURCE_STATE_COPY_SOURCE,
2635                                                   D3D12_RESOURCE_STATE_COPY_DEST,
2636                                                   DZN_QUEUE_TRANSITION_FLUSH);
2637       }
2638    }
2639 
2640    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
2641 
2642    if (cmdbuf->enhanced_barriers) {
2643       dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2644                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2645                                     D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
2646    } else {
2647       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2648                                                 D3D12_RESOURCE_STATE_COPY_DEST,
2649                                                 D3D12_RESOURCE_STATE_COPY_SOURCE,
2650                                                 DZN_QUEUE_TRANSITION_FLUSH);
2651    }
2652 
2653    tmp_desc->Format =
2654       dzn_image_get_placed_footprint_format(pdev, dst->vk.format, aspect);
2655    if (src_blkw != dst_blkw)
2656       tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
2657    if (src_blkh != dst_blkh)
2658       tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
2659 
2660    ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
2661                                        0, 1, 0,
2662                                        &tmp_loc->PlacedFootprint,
2663                                        NULL, NULL, NULL);
2664 
2665    if (src_blkd != dst_blkd) {
2666       tmp_loc->PlacedFootprint.Footprint.Depth =
2667          DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
2668    } else {
2669       tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
2670    }
2671 
2672    D3D12_BOX tmp_box = {
2673       .left = 0,
2674       .top = 0,
2675       .front = 0,
2676       .right = tmp_loc->PlacedFootprint.Footprint.Width,
2677       .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
2678       .back = tmp_loc->PlacedFootprint.Footprint.Depth,
2679    };
2680 
2681    ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2682                                                 region.dstOffset.x,
2683                                                 region.dstOffset.y,
2684                                                 dst_z,
2685                                                 tmp_loc, &tmp_box);
2686 }
2687 
2688 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)2689 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
2690                                      VkImage image,
2691                                      VkImageAspectFlagBits aspect,
2692                                      const VkImageSubresourceLayers *subres,
2693                                      struct dzn_descriptor_heap *heap,
2694                                      uint32_t heap_slot)
2695 {
2696    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2697    VK_FROM_HANDLE(dzn_image, img, image);
2698    VkImageViewCreateInfo iview_info = {
2699       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2700       .image = image,
2701       .format = img->vk.format,
2702       .subresourceRange = {
2703          .aspectMask = (VkImageAspectFlags)aspect,
2704          .baseMipLevel = subres->mipLevel,
2705          .levelCount = 1,
2706          .baseArrayLayer = subres->baseArrayLayer,
2707          .layerCount = subres->layerCount,
2708       },
2709    };
2710 
2711    switch (img->vk.image_type) {
2712    case VK_IMAGE_TYPE_1D:
2713       iview_info.viewType = img->vk.array_layers > 1 ?
2714                             VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
2715       break;
2716    case VK_IMAGE_TYPE_2D:
2717       iview_info.viewType = img->vk.array_layers > 1 ?
2718                             VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
2719       break;
2720    case VK_IMAGE_TYPE_3D:
2721       iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2722       break;
2723    default:
2724       unreachable("Invalid type");
2725    }
2726 
2727    struct dzn_image_view iview;
2728    dzn_image_view_init(device, &iview, &iview_info);
2729    dzn_descriptor_heap_write_image_view_desc(device, heap, heap_slot, false, false, &iview);
2730    dzn_image_view_finish(&iview);
2731 
2732    D3D12_GPU_DESCRIPTOR_HANDLE handle =
2733       dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
2734    ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
2735 }
2736 
2737 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer,const VkOffset3D * dst_offsets)2738 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
2739                                      struct dzn_image *img,
2740                                      VkImageAspectFlagBits aspect,
2741                                      uint32_t level, uint32_t layer,
2742                                      const VkOffset3D *dst_offsets)
2743 {
2744    bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
2745    VkImageSubresourceRange range = {
2746       .aspectMask = (VkImageAspectFlags)aspect,
2747       .baseMipLevel = level,
2748       .levelCount = 1,
2749       .baseArrayLayer = layer,
2750       .layerCount = 1,
2751    };
2752 
2753    if (ds) {
2754       D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
2755       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
2756       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
2757 
2758       if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
2759          const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2760          if (!pdev->options.PSSpecifiedStencilRefSupported) {
2761             D3D12_RECT clear_rect = {
2762                .left = dst_offsets[0].x,
2763                .right = dst_offsets[1].x,
2764                .top = dst_offsets[0].y,
2765                .bottom = dst_offsets[1].y,
2766             };
2767             ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
2768          }
2769       }
2770    } else {
2771       D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
2772       D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
2773       ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, false, NULL);
2774    }
2775 }
2776 
2777 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,enum dzn_blit_resolve_mode resolve_mode,uint32_t stencil_bit)2778 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
2779                                  const struct dzn_image *src,
2780                                  const struct dzn_image *dst,
2781                                  VkImageAspectFlagBits aspect,
2782                                  VkFilter filter,
2783                                  enum dzn_blit_resolve_mode resolve_mode,
2784                                  uint32_t stencil_bit)
2785 {
2786    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2787    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2788    assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
2789    enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
2790    VkImageUsageFlags usage =
2791       vk_format_is_depth_or_stencil(dst->vk.format) ?
2792       VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
2793       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2794    struct dzn_meta_blit_key ctx_key = {
2795       .out_format = dzn_image_get_dxgi_format(pdev, dst->vk.format, usage, aspect),
2796       .samples = (uint32_t)src->vk.samples,
2797       .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
2798                         FRAG_RESULT_DEPTH :
2799                         aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
2800                         FRAG_RESULT_STENCIL :
2801                         FRAG_RESULT_DATA0),
2802       .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
2803                              util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
2804                              aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
2805                              GLSL_TYPE_FLOAT),
2806       .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
2807                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
2808                                 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
2809                                 GLSL_SAMPLER_DIM_3D),
2810       .src_is_array = src->vk.array_layers > 1,
2811       .resolve_mode = resolve_mode,
2812       /* Filter doesn't need to be part of the key if we're not embedding a static sampler */
2813       .linear_filter = filter == VK_FILTER_LINEAR && device->support_static_samplers,
2814       .stencil_bit = stencil_bit,
2815       .padding = 0,
2816    };
2817 
2818    const struct dzn_meta_blit *ctx =
2819       dzn_meta_blits_get_context(device, &ctx_key);
2820    assert(ctx);
2821 
2822    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2823    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
2824       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
2825       ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2826    }
2827    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2828 }
2829 
2830 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2831 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2832                                   const struct dzn_image *src,
2833                                   const VkImageSubresourceLayers *src_subres,
2834                                   const VkOffset3D *src_offsets,
2835                                   const struct dzn_image *dst,
2836                                   const VkImageSubresourceLayers *dst_subres,
2837                                   const VkOffset3D *dst_offsets,
2838                                   bool normalize_src_coords)
2839 {
2840    uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2841    uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2842    uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2843    uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2844 
2845    float dst_pos[4] = {
2846       (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2847       (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2848    };
2849 
2850    float src_pos[4] = {
2851       (float)src_offsets[0].x, (float)src_offsets[0].y,
2852       (float)src_offsets[1].x, (float)src_offsets[1].y,
2853    };
2854 
2855    if (normalize_src_coords) {
2856       src_pos[0] /= src_w;
2857       src_pos[1] /= src_h;
2858       src_pos[2] /= src_w;
2859       src_pos[3] /= src_h;
2860    }
2861 
2862    float coords[] = {
2863       dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2864       dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2865       dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2866       dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2867    };
2868 
2869    ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2870 
2871    D3D12_VIEWPORT vp = {
2872       .TopLeftX = 0,
2873       .TopLeftY = 0,
2874       .Width = (float)dst_w,
2875       .Height = (float)dst_h,
2876       .MinDepth = 0,
2877       .MaxDepth = 1,
2878    };
2879    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2880 
2881    D3D12_RECT scissor = {
2882       .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2883       .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2884       .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2885       .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2886    };
2887    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2888 }
2889 
2890 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,D3D12_BARRIER_LAYOUT * restore_src_layout,D3D12_BARRIER_LAYOUT * restore_dst_layout,bool post)2891 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2892                                    struct dzn_image *src, VkImageLayout src_layout,
2893                                    const VkImageSubresourceLayers *src_subres,
2894                                    struct dzn_image *dst, VkImageLayout dst_layout,
2895                                    const VkImageSubresourceLayers *dst_subres,
2896                                    VkImageAspectFlagBits aspect,
2897                                    D3D12_BARRIER_LAYOUT *restore_src_layout,
2898                                    D3D12_BARRIER_LAYOUT *restore_dst_layout,
2899                                    bool post)
2900 {
2901    VkImageSubresourceRange src_range = {
2902       .aspectMask = aspect,
2903       .baseMipLevel = src_subres->mipLevel,
2904       .levelCount = 1,
2905       .baseArrayLayer = src_subres->baseArrayLayer,
2906       .layerCount = src_subres->layerCount,
2907    };
2908    VkImageSubresourceRange dst_range = {
2909       .aspectMask = aspect,
2910       .baseMipLevel = dst_subres->mipLevel,
2911       .levelCount = 1,
2912       .baseArrayLayer = dst_subres->baseArrayLayer,
2913       .layerCount = dst_subres->layerCount,
2914    };
2915 
2916    if (!post) {
2917       if (cmdbuf->enhanced_barriers) {
2918          D3D12_BARRIER_LAYOUT dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2919             D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
2920          *restore_src_layout = dzn_cmd_buffer_require_layout(cmdbuf, src, src_layout,
2921                                                              D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
2922                                                              &src_range);
2923          *restore_dst_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst,
2924                                                              dst_layout,
2925                                                              dst_new_layout,
2926                                                              &dst_range);
2927       } else {
2928          VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2929                                           VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2930          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2931                                                             src_layout,
2932                                                             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2933                                                             DZN_QUEUE_TRANSITION_FLUSH);
2934          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2935                                                             dst_layout,
2936                                                             dst_new_layout,
2937                                                             DZN_QUEUE_TRANSITION_FLUSH);
2938       }
2939    } else {
2940       if (cmdbuf->enhanced_barriers) {
2941          dzn_cmd_buffer_restore_layout(cmdbuf, src,
2942                                        D3D12_BARRIER_SYNC_PIXEL_SHADING, D3D12_BARRIER_ACCESS_SHADER_RESOURCE,
2943                                        D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, *restore_src_layout,
2944                                        &src_range);
2945          if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2946             dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2947                                           D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2948                                           D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, *restore_dst_layout,
2949                                           &dst_range);
2950          } else {
2951             dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2952                                           D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2953                                           D3D12_BARRIER_LAYOUT_RENDER_TARGET, *restore_dst_layout,
2954                                           &dst_range);
2955          }
2956       } else {
2957          VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2958                                           VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2959          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2960                                                             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2961                                                             src_layout,
2962                                                             DZN_QUEUE_TRANSITION_FLUSH);
2963          dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2964                                                             dst_new_layout,
2965                                                             dst_layout,
2966                                                             DZN_QUEUE_TRANSITION_FLUSH);
2967       }
2968    }
2969 }
2970 
2971 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)2972 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2973                            const VkBlitImageInfo2 *info,
2974                            struct dzn_descriptor_heap *heap,
2975                            uint32_t *heap_slot,
2976                            struct dzn_descriptor_heap *sampler_heap,
2977                            uint32_t sampler_heap_slot,
2978                            uint32_t r)
2979 {
2980    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2981    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2982 
2983    const VkImageBlit2 *region = &info->pRegions[r];
2984    bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2985    bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2986    const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2987    const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2988    bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
2989    uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
2990    uint32_t stencil_bit_root_param_slot = 2;
2991    assert(device->support_static_samplers == (sampler_heap == NULL));
2992 
2993    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2994       D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
2995       D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
2996       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
2997       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2998                                          src, info->srcImageLayout, &region->srcSubresource,
2999                                          dst, info->dstImageLayout, &region->dstSubresource,
3000                                          aspect, &restore_src_layout, &restore_dst_layout, false);
3001       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
3002                                            aspect, &region->srcSubresource,
3003                                            heap, (*heap_slot)++);
3004       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3005                                         src, &region->srcSubresource, region->srcOffsets,
3006                                         dst, &region->dstSubresource, region->dstOffsets,
3007                                         src->vk.samples == 1);
3008 
3009       uint32_t dst_depth =
3010          region->dstOffsets[1].z > region->dstOffsets[0].z ?
3011          region->dstOffsets[1].z - region->dstOffsets[0].z :
3012          region->dstOffsets[0].z - region->dstOffsets[1].z;
3013       uint32_t src_depth =
3014          region->srcOffsets[1].z > region->srcOffsets[0].z ?
3015          region->srcOffsets[1].z - region->srcOffsets[0].z :
3016          region->srcOffsets[0].z - region->srcOffsets[1].z;
3017 
3018       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
3019       uint32_t dst_level = region->dstSubresource.mipLevel;
3020 
3021       float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
3022       if (region->srcOffsets[0].z > region->srcOffsets[1].z)
3023          src_slice_step = -src_slice_step;
3024       float src_z_coord =
3025          src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
3026       uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
3027       uint32_t dst_z_coord =
3028          dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
3029       if (region->dstOffsets[0].z > region->dstOffsets[1].z)
3030          dst_z_coord--;
3031 
3032       uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
3033                                 1 : -1;
3034 
3035       /* Normalize the src coordinates/step */
3036       if (src_is_3d) {
3037          src_z_coord /= src->vk.extent.depth;
3038          src_slice_step /= src->vk.extent.depth;
3039       }
3040 
3041       for (uint32_t slice = 0; slice < slice_count; slice++) {
3042          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
3043          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3044          if (!device->support_static_samplers) {
3045             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3046             stencil_bit_root_param_slot++;
3047          }
3048          if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3049             cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3050             ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
3051             for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3052                dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
3053                ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3054                ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3055             }
3056          } else {
3057             ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3058          }
3059          src_z_coord += src_slice_step;
3060          dst_z_coord += dst_slice_step;
3061       }
3062 
3063       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3064                                          src, info->srcImageLayout, &region->srcSubresource,
3065                                          dst, info->dstImageLayout, &region->dstSubresource,
3066                                          aspect, &restore_src_layout, &restore_dst_layout, true);
3067    }
3068 }
3069 
3070 static enum dzn_blit_resolve_mode
get_blit_resolve_mode(VkResolveModeFlagBits mode)3071 get_blit_resolve_mode(VkResolveModeFlagBits mode)
3072 {
3073    switch (mode) {
3074    case VK_RESOLVE_MODE_AVERAGE_BIT: return dzn_blit_resolve_average;
3075    case VK_RESOLVE_MODE_MIN_BIT: return dzn_blit_resolve_min;
3076    case VK_RESOLVE_MODE_MAX_BIT: return dzn_blit_resolve_max;
3077    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return dzn_blit_resolve_sample_zero;
3078    default: unreachable("Unexpected resolve mode");
3079    }
3080 }
3081 
3082 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,VkResolveModeFlags mode,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)3083 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
3084                               const VkResolveImageInfo2 *info,
3085                               VkResolveModeFlags mode,
3086                               struct dzn_descriptor_heap *heap,
3087                               uint32_t *heap_slot,
3088                               struct dzn_descriptor_heap *sampler_heap,
3089                               uint32_t sampler_heap_slot,
3090                               uint32_t r)
3091 {
3092    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3093    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3094 
3095    const VkImageResolve2 *region = &info->pRegions[r];
3096 
3097    const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3098    const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3099    bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
3100    uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
3101    uint32_t stencil_bit_root_param_slot = 2;
3102    assert(device->support_static_samplers == (sampler_heap == NULL));
3103    enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
3104 
3105    dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3106       D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
3107       D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
3108       dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3109       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3110                                          src, info->srcImageLayout, &region->srcSubresource,
3111                                          dst, info->dstImageLayout, &region->dstSubresource,
3112                                          aspect, &restore_src_layout, &restore_dst_layout, false);
3113       dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
3114                                            &region->srcSubresource,
3115                                            heap, (*heap_slot)++);
3116 
3117       VkOffset3D src_offset[2] = {
3118          {
3119             .x = region->srcOffset.x,
3120             .y = region->srcOffset.y,
3121          },
3122          {
3123             .x = (int32_t)(region->srcOffset.x + region->extent.width),
3124             .y = (int32_t)(region->srcOffset.y + region->extent.height),
3125          },
3126       };
3127       VkOffset3D dst_offset[2] = {
3128          {
3129             .x = region->dstOffset.x,
3130             .y = region->dstOffset.y,
3131          },
3132          {
3133             .x = (int32_t)(region->dstOffset.x + region->extent.width),
3134             .y = (int32_t)(region->dstOffset.y + region->extent.height),
3135          },
3136       };
3137 
3138       dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3139                                         src, &region->srcSubresource, src_offset,
3140                                         dst, &region->dstSubresource, dst_offset,
3141                                         false);
3142 
3143       uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
3144       for (uint32_t layer = 0; layer < layer_count; layer++) {
3145          float src_z_coord = layer;
3146 
3147          dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
3148                                               dst, aspect, region->dstSubresource.mipLevel,
3149                                               region->dstSubresource.baseArrayLayer + layer,
3150                                               dst_offset);
3151          ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3152          if (!device->support_static_samplers) {
3153             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3154             stencil_bit_root_param_slot++;
3155          }
3156          if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3157             cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3158             ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
3159             for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3160                dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3161                ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3162                ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3163             }
3164          } else {
3165             ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3166          }
3167       }
3168 
3169       dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3170                                          src, info->srcImageLayout, &region->srcSubresource,
3171                                          dst, info->dstImageLayout, &region->dstSubresource,
3172                                          aspect, &restore_src_layout, &restore_dst_layout, true);
3173    }
3174 }
3175 
3176 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3177 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3178 {
3179    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3180 
3181    if (!pipeline)
3182       return;
3183 
3184    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3185    ID3D12PipelineState *old_pipeline_state =
3186       cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
3187 
3188    uint32_t view_instance_mask = 0;
3189    if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
3190       if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) {
3191          cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig;
3192          /* Changing root signature always requires re-binding descriptor heaps */
3193          cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3194 
3195          if (device->bindless) {
3196             /* Note: The D3D12 spec for descriptor heap indexing requires that the descriptor heaps
3197              * are bound *before* the root signature. */
3198             bool bind_heaps = false;
3199             dzn_foreach_pool_type(type) {
3200                if (cmdbuf->state.heaps[type] != &device->device_heaps[type].heap) {
3201                   bind_heaps = true;
3202                   cmdbuf->state.heaps[type] = &device->device_heaps[type].heap;
3203                }
3204             }
3205             if (bind_heaps) {
3206                ID3D12DescriptorHeap *heaps[NUM_POOL_TYPES];
3207                dzn_foreach_pool_type(type)
3208                   heaps[type] = cmdbuf->state.heaps[type]->heap;
3209                ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, NUM_POOL_TYPES, heaps);
3210             }
3211          }
3212 
3213          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3214             ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3215          else
3216             ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3217       }
3218       if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3219          struct dzn_graphics_pipeline *gfx =
3220             (struct dzn_graphics_pipeline *)pipeline;
3221          ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
3222          dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
3223          if (gfx->multiview.native_view_instancing)
3224             view_instance_mask = gfx->multiview.view_mask;
3225          else
3226             view_instance_mask = 1;
3227 
3228          if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point)
3229             cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3230       }
3231    }
3232 
3233    ID3D12PipelineState *new_pipeline_state = pipeline->state;
3234 
3235    if (old_pipeline_state != new_pipeline_state) {
3236       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
3237       cmdbuf->state.pipeline = pipeline;
3238    }
3239 
3240    /* Deferring this until after the pipeline has been set due to an NVIDIA driver bug
3241     * when view instancing mask is set with no pipeline bound. */
3242    if (view_instance_mask)
3243       ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask);
3244 }
3245 
3246 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3247 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3248 {
3249    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3250    struct dzn_descriptor_state *desc_state =
3251       &cmdbuf->state.bindpoint[bindpoint].desc_state;
3252    struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
3253       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
3254       desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
3255    };
3256    uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
3257    bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
3258    const struct dzn_pipeline *pipeline =
3259       cmdbuf->state.bindpoint[bindpoint].pipeline;
3260 
3261    /* The set of dirty bits that are cleared by running this function. Notably,
3262     * for bindless, descriptor sets that are bound but unused by the currently
3263     * set pipeline are not processed, meaning their dirty bits should persist
3264     * until such a point as a pipeline does use them. For not-bindless,
3265     * all sets are processed. */
3266    uint32_t dirty_bits_bindless =
3267       (pipeline->dynamic_buffer_count ? DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS : 0) |
3268       (((DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << pipeline->set_count) - 1) & DZN_CMD_BINDPOINT_DIRTY_DESC_SETS);
3269    uint32_t dirty_bits = (device->bindless ? dirty_bits_bindless : DZN_CMD_BINDPOINT_DIRTY_DESC_SETS | DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS);
3270    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bits))
3271       return;
3272 
3273    dzn_foreach_pool_type (type) {
3274       if (device->bindless) {
3275          new_heaps[type] = &device->device_heaps[type].heap;
3276       } else {
3277          uint32_t desc_count = pipeline->desc_count[type];
3278          if (!desc_count)
3279             continue;
3280 
3281          struct dzn_descriptor_heap_pool *pool =
3282             type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
3283             &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
3284          struct dzn_descriptor_heap *dst_heap = NULL;
3285          uint32_t dst_heap_offset = 0;
3286 
3287          dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
3288                                               &dst_heap, &dst_heap_offset);
3289          new_heap_offsets[type] = dst_heap_offset;
3290          update_root_desc_table[type] = true;
3291 
3292          for (uint32_t s = 0; s < MAX_SETS; s++) {
3293             const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3294             if (!set) continue;
3295 
3296             uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
3297             uint32_t set_desc_count = MIN2(pipeline->sets[s].range_desc_count[type], set->heap_sizes[type]);
3298             if (set_desc_count) {
3299                dzn_descriptor_heap_copy(device, dst_heap, dst_heap_offset + set_heap_offset,
3300                                         &set->pool->heaps[type], set->heap_offsets[type],
3301                                         set_desc_count, type);
3302             }
3303 
3304             if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
3305                uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3306                for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3307                   struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
3308                   if (!bdesc.buffer)
3309                      continue;
3310                   bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
3311 
3312                   bool primary_is_writable = bdesc.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
3313                   uint32_t desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary;
3314                   dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3315                                                         dst_heap_offset + set_heap_offset + desc_heap_offset,
3316                                                         primary_is_writable, &bdesc);
3317 
3318                   if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt != ~0) {
3319                      assert(primary_is_writable);
3320                      desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt;
3321                      dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3322                                                            dst_heap_offset + set_heap_offset + desc_heap_offset,
3323                                                            false, &bdesc);
3324                   }
3325                }
3326             }
3327          }
3328 
3329          new_heaps[type] = dst_heap;
3330       }
3331    }
3332 
3333    if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
3334        new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
3335       ID3D12DescriptorHeap *desc_heaps[2];
3336       uint32_t num_desc_heaps = 0;
3337       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
3338          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
3339       if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
3340          desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
3341       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
3342 
3343       for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
3344          cmdbuf->state.heaps[h] = new_heaps[h];
3345    }
3346 
3347    if (!device->bindless) {
3348       for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
3349          D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
3350 
3351          if (!update_root_desc_table[type])
3352             continue;
3353 
3354          D3D12_GPU_DESCRIPTOR_HANDLE handle =
3355             dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
3356 
3357          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3358             ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3359          else
3360             ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3361       }
3362    }
3363 
3364    if (device->bindless) {
3365       for (uint32_t s = 0; s < pipeline->set_count; ++s) {
3366          const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3367          if (!set || !set->pool->bindless.buf)
3368             continue;
3369 
3370          uint32_t dirty_bit = DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << s;
3371          if (cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bit) {
3372             uint64_t gpuva = set->pool->bindless.gpuva + (set->heap_offsets[0] * sizeof(struct dxil_spirv_bindless_entry));
3373             if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3374                ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3375             else
3376                ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3377          }
3378       }
3379       if (pipeline->dynamic_buffer_count &&
3380           (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS)) {
3381          ID3D12Resource *dynamic_buffer_buf = NULL;
3382          uint64_t dynamic_buffer_buf_offset;
3383          VkResult result =
3384             dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(struct dxil_spirv_bindless_entry) * pipeline->dynamic_buffer_count,
3385                                               DZN_INTERNAL_BUF_UPLOAD,
3386                                               D3D12_RESOURCE_STATE_GENERIC_READ,
3387                                               D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT,
3388                                               &dynamic_buffer_buf,
3389                                               &dynamic_buffer_buf_offset);
3390          if (result != VK_SUCCESS)
3391             return;
3392 
3393          uint64_t gpuva = ID3D12Resource_GetGPUVirtualAddress(dynamic_buffer_buf) + dynamic_buffer_buf_offset;
3394          struct dxil_spirv_bindless_entry *map;
3395          ID3D12Resource_Map(dynamic_buffer_buf, 0, NULL, (void **)&map);
3396          map += (dynamic_buffer_buf_offset / sizeof(*map));
3397 
3398          for (uint32_t s = 0; s < MAX_SETS; ++s) {
3399             const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3400             if (!set)
3401                continue;
3402 
3403             uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3404             for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3405                const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o];
3406                volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary];
3407                struct dzn_buffer_desc bdesc_updated = *bdesc;
3408                bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o];
3409                dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry);
3410             }
3411          }
3412 
3413          ID3D12Resource_Unmap(dynamic_buffer_buf, 0, NULL);
3414          if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3415             ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist,
3416                                                                          pipeline->root.dynamic_buffer_bindless_param_idx,
3417                                                                          gpuva);
3418          else
3419             ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3420                                                                         pipeline->root.dynamic_buffer_bindless_param_idx,
3421                                                                         gpuva);
3422       }
3423    }
3424 
3425    cmdbuf->state.bindpoint[bindpoint].dirty &= ~dirty_bits;
3426 }
3427 
3428 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3429 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3430 {
3431    if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
3432       return;
3433 
3434    const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3435    uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
3436 
3437    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3438       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3439                                                      sizeof(cmdbuf->state.sysvals.gfx) / 4,
3440                                                      &cmdbuf->state.sysvals.gfx, 0);
3441    } else {
3442       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3443                                                     sizeof(cmdbuf->state.sysvals.compute) / 4,
3444                                                     &cmdbuf->state.sysvals.compute, 0);
3445    }
3446 
3447    cmdbuf->state.bindpoint[bindpoint].dirty &= ~DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3448 }
3449 
3450 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)3451 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
3452 {
3453    const struct dzn_graphics_pipeline *pipeline =
3454       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3455 
3456    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
3457        !pipeline->vp.count)
3458       return;
3459 
3460    ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
3461 }
3462 
3463 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)3464 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
3465 {
3466    const struct dzn_graphics_pipeline *pipeline =
3467       (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3468 
3469    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
3470       return;
3471 
3472    if (!pipeline->scissor.count) {
3473       /* Apply a scissor delimiting the render area. */
3474       ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
3475       return;
3476    }
3477 
3478    D3D12_RECT scissors[MAX_SCISSOR];
3479 
3480    memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
3481    for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
3482       scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
3483       scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
3484       scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
3485       scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
3486    }
3487 
3488    ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
3489 }
3490 
3491 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)3492 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
3493 {
3494    unsigned start, end;
3495 
3496    BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
3497       ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, &cmdbuf->state.vb.views[start]);
3498 
3499    BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
3500 }
3501 
3502 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)3503 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
3504 {
3505    if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
3506       return;
3507 
3508    ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
3509 }
3510 
3511 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3512 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3513 {
3514    struct dzn_cmd_buffer_push_constant_state *state =
3515       bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
3516       &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
3517 
3518    uint32_t offset = state->offset / 4;
3519    uint32_t end = ALIGN(state->end, 4) / 4;
3520    uint32_t count = end - offset;
3521 
3522    if (!count)
3523       return;
3524 
3525    uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
3526    uint32_t *vals = state->values + offset;
3527 
3528    if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3529       ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3530    else
3531       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3532 
3533    state->offset = 0;
3534    state->end = 0;
3535 }
3536 
3537 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)3538 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
3539 {
3540    struct dzn_physical_device *pdev =
3541       container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
3542    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
3543       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
3544          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3545       if (cmdbuf->cmdlist8 &&
3546           pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
3547          ID3D12GraphicsCommandList8_OMSetFrontAndBackStencilRef(cmdbuf->cmdlist8,
3548                                                                 cmdbuf->state.zsa.stencil_test.front.ref,
3549                                                                 cmdbuf->state.zsa.stencil_test.back.ref);
3550       } else {
3551          uint32_t ref =
3552             gfx->zsa.stencil_test.front.uses_ref ?
3553             cmdbuf->state.zsa.stencil_test.front.ref :
3554             cmdbuf->state.zsa.stencil_test.back.ref;
3555          ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
3556       }
3557    }
3558 }
3559 
3560 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)3561 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
3562 {
3563    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
3564       ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
3565                                                   cmdbuf->state.blend.constants);
3566 }
3567 
3568 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)3569 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
3570 {
3571    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
3572       ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
3573                                                   cmdbuf->state.zsa.depth_bounds.min,
3574                                                   cmdbuf->state.zsa.depth_bounds.max);
3575    }
3576 }
3577 
3578 static void
dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer * cmdbuf)3579 dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer *cmdbuf)
3580 {
3581    if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BIAS) {
3582       assert(cmdbuf->cmdlist9);
3583       ID3D12GraphicsCommandList9_RSSetDepthBias(cmdbuf->cmdlist9,
3584                                                 cmdbuf->state.pipeline_variant.depth_bias.constant_factor,
3585                                                 cmdbuf->state.pipeline_variant.depth_bias.clamp,
3586                                                 cmdbuf->state.pipeline_variant.depth_bias.slope_factor);
3587    }
3588 }
3589 
3590 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)3591 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
3592 {
3593    uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
3594    uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
3595 
3596    *vertex_count = triangle_count * 3;
3597    if (!*vertex_count)
3598       return VK_SUCCESS;
3599 
3600    ID3D12Resource *index_buf;
3601    uint64_t index_offset;
3602    VkResult result =
3603       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
3604                                         DZN_INTERNAL_BUF_UPLOAD,
3605                                         D3D12_RESOURCE_STATE_GENERIC_READ,
3606                                         index_size,
3607                                         &index_buf,
3608                                         &index_offset);
3609    if (result != VK_SUCCESS)
3610       return result;
3611 
3612    void *cpu_ptr;
3613    ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
3614    cpu_ptr = (uint8_t *)cpu_ptr + index_offset;
3615 
3616    /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
3617    if (index_size == 2) {
3618       uint16_t *indices = (uint16_t *)cpu_ptr;
3619       for (uint32_t t = 0; t < triangle_count; t++) {
3620          indices[t * 3] = t + 1;
3621          indices[(t * 3) + 1] = t + 2;
3622          indices[(t * 3) + 2] = 0;
3623       }
3624       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
3625    } else {
3626       uint32_t *indices = (uint32_t *)cpu_ptr;
3627       for (uint32_t t = 0; t < triangle_count; t++) {
3628          indices[t * 3] = t + 1;
3629          indices[(t * 3) + 1] = t + 2;
3630          indices[(t * 3) + 2] = 0;
3631       }
3632       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3633    }
3634 
3635    cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
3636    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf) + index_offset;
3637    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3638    return VK_SUCCESS;
3639 }
3640 
3641 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)3642 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
3643                                           uint32_t *index_count,
3644                                           uint32_t *first_index)
3645 {
3646    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3647    uint32_t triangle_count = MAX2(*index_count, 2) - 2;
3648 
3649    *index_count = triangle_count * 3;
3650    if (!*index_count)
3651       return VK_SUCCESS;
3652 
3653    /* New index is always 32bit to make the compute shader rewriting the
3654     * index simpler */
3655    ID3D12Resource *new_index_buf;
3656    VkResult result =
3657       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
3658                                         DZN_INTERNAL_BUF_DEFAULT,
3659                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3660                                         4,
3661                                         &new_index_buf,
3662                                         NULL);
3663    if (result != VK_SUCCESS)
3664       return result;
3665 
3666    D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
3667       cmdbuf->state.ib.view.BufferLocation;
3668 
3669    ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
3670       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3671    ASSERTED bool prim_restart =
3672       dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
3673 
3674    assert(!prim_restart);
3675 
3676    enum dzn_index_type index_type =
3677       dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
3678    const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3679       &device->triangle_fan[index_type];
3680 
3681    struct dzn_triangle_fan_rewrite_index_params params = {
3682       .first_index = *first_index,
3683    };
3684 
3685    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3686    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3687    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3688    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3689    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
3690    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
3691                                                  &params, 0);
3692    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
3693    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
3694 
3695    if (cmdbuf->enhanced_barriers) {
3696       dzn_cmd_buffer_buffer_barrier(cmdbuf, new_index_buf,
3697                                     D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_INDEX_INPUT,
3698                                     D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDEX_BUFFER);
3699    } else {
3700       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
3701                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3702                                                D3D12_RESOURCE_STATE_INDEX_BUFFER,
3703                                                DZN_QUEUE_TRANSITION_FLUSH);
3704    }
3705 
3706    /* We don't mess up with the driver state when executing our internal
3707     * compute shader, but we still change the D3D12 state, so let's mark
3708     * things dirty if needed.
3709     */
3710    cmdbuf->state.pipeline = NULL;
3711    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3712       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3713          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3714    }
3715 
3716    cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
3717    cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
3718    cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3719    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3720    *first_index = 0;
3721    return VK_SUCCESS;
3722 }
3723 
3724 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)3725 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3726 {
3727    if (indexed)
3728       dzn_cmd_buffer_update_ibview(cmdbuf);
3729 
3730    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3731    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3732    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3733    dzn_cmd_buffer_update_viewports(cmdbuf);
3734    dzn_cmd_buffer_update_scissors(cmdbuf);
3735    dzn_cmd_buffer_update_vbviews(cmdbuf);
3736    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3737    dzn_cmd_buffer_update_zsa(cmdbuf);
3738    dzn_cmd_buffer_update_blend_constants(cmdbuf);
3739    dzn_cmd_buffer_update_depth_bounds(cmdbuf);
3740    dzn_cmd_buffer_update_depth_bias(cmdbuf);
3741 
3742    /* Reset the dirty states */
3743    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3744    cmdbuf->state.dirty = 0;
3745 }
3746 
3747 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)3748 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3749 {
3750    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3751       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3752 
3753    if (!pipeline->ia.triangle_fan)
3754       return 0;
3755 
3756    uint32_t max_triangles;
3757 
3758    if (indexed) {
3759       uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
3760       uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
3761 
3762       max_triangles = MAX2(max_indices, 2) - 2;
3763    } else {
3764       uint32_t max_vertex = 0;
3765       for (uint32_t i = 0; i < pipeline->vb.count; i++) {
3766          max_vertex =
3767             MAX2(max_vertex,
3768                  cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
3769       }
3770 
3771       max_triangles = MAX2(max_vertex, 2) - 2;
3772    }
3773 
3774    return max_triangles * 3;
3775 }
3776 
3777 static void
dzn_cmd_buffer_patch_indirect_draw(struct dzn_cmd_buffer * cmdbuf,struct dzn_indirect_draw_type draw_type,ID3D12Resource ** inout_draw_buf,size_t * inout_draw_buf_offset,ID3D12Resource ** inout_count_buf,size_t * inout_count_buf_offset,uint32_t max_draw_count,uint32_t * inout_draw_buf_stride,bool * inout_indexed)3778 dzn_cmd_buffer_patch_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
3779                                    struct dzn_indirect_draw_type draw_type,
3780                                    ID3D12Resource **inout_draw_buf,
3781                                    size_t *inout_draw_buf_offset,
3782                                    ID3D12Resource **inout_count_buf,
3783                                    size_t *inout_count_buf_offset,
3784                                    uint32_t max_draw_count,
3785                                    uint32_t *inout_draw_buf_stride,
3786                                    bool *inout_indexed)
3787 {
3788    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3789    uint32_t triangle_fan_index_buf_stride =
3790       dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, *inout_indexed) *
3791       sizeof(uint32_t);
3792 
3793    uint32_t min_draw_buf_stride = *inout_indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : sizeof(D3D12_DRAW_ARGUMENTS);
3794 
3795    uint32_t exec_buf_stride =
3796       (draw_type.triangle_fan ? sizeof(D3D12_INDEX_BUFFER_VIEW) : 0) +
3797       (draw_type.draw_params ? sizeof(uint32_t) * 2 : 0) +
3798       (draw_type.draw_id ? sizeof(uint32_t) : 0) +
3799       min_draw_buf_stride;
3800    uint32_t triangle_fan_exec_buf_stride =
3801       sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
3802    uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
3803    uint32_t exec_buf_draw_offset = 0;
3804 
3805    // We reserve the first slot for the draw_count value when indirect count is
3806    // involved.
3807    if (*inout_count_buf != NULL) {
3808       exec_buf_size += exec_buf_stride;
3809       exec_buf_draw_offset = exec_buf_stride;
3810    }
3811 
3812    ID3D12Resource *exec_buf;
3813    VkResult result =
3814       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
3815                                         DZN_INTERNAL_BUF_DEFAULT,
3816                                         D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3817                                         0,
3818                                         &exec_buf, NULL);
3819    if (result != VK_SUCCESS)
3820       return;
3821 
3822    D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
3823       ID3D12Resource_GetGPUVirtualAddress(*inout_draw_buf) + *inout_draw_buf_offset;
3824    ID3D12Resource *triangle_fan_index_buf = NULL;
3825    ID3D12Resource *triangle_fan_exec_buf = NULL;
3826 
3827    if (triangle_fan_index_buf_stride) {
3828       result =
3829          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3830                                            max_draw_count * triangle_fan_index_buf_stride,
3831                                            DZN_INTERNAL_BUF_DEFAULT,
3832                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3833                                            0,
3834                                            &triangle_fan_index_buf, NULL);
3835       if (result != VK_SUCCESS)
3836          return;
3837 
3838       result =
3839          dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3840                                            max_draw_count * triangle_fan_exec_buf_stride,
3841                                            DZN_INTERNAL_BUF_DEFAULT,
3842                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3843                                            0,
3844                                            &triangle_fan_exec_buf, NULL);
3845       if (result != VK_SUCCESS)
3846          return;
3847    }
3848 
3849    struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
3850       .draw_buf_stride = *inout_draw_buf_stride,
3851       .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
3852       .triangle_fan_index_buf_start =
3853          triangle_fan_index_buf ?
3854          ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
3855       .exec_buf_start =
3856          draw_type.triangle_fan_primitive_restart ?
3857          ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
3858    };
3859    uint32_t params_size;
3860    if (draw_type.triangle_fan_primitive_restart)
3861       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
3862    else if (draw_type.triangle_fan)
3863       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
3864    else
3865       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
3866 
3867    struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type.value];
3868    uint32_t root_param_idx = 0;
3869 
3870    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3871    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3872    ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
3873    ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
3874    ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3875                                                            params_size / 4, (const void *)&params, 0);
3876    ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
3877                                                                draw_buf_gpu);
3878    ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3879                                                                 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
3880    if (*inout_count_buf) {
3881       ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3882                                                                   root_param_idx++,
3883                                                                   ID3D12Resource_GetGPUVirtualAddress(*inout_count_buf) +
3884                                                                   *inout_count_buf_offset);
3885    }
3886 
3887    if (triangle_fan_exec_buf) {
3888       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
3889                                                                    root_param_idx++,
3890                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
3891    }
3892 
3893    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
3894 
3895    D3D12_BUFFER_BARRIER buf_barriers[2];
3896    D3D12_BARRIER_GROUP enhanced_barriers = {
3897       .NumBarriers = 0,
3898       .Type = D3D12_BARRIER_TYPE_BUFFER,
3899       .pBufferBarriers = buf_barriers
3900    };
3901 
3902    if (triangle_fan_exec_buf) {
3903       enum dzn_index_type index_type =
3904          *inout_indexed ?
3905          dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, draw_type.triangle_fan_primitive_restart) :
3906          DZN_NO_INDEX;
3907       struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3908          &device->triangle_fan[index_type];
3909 
3910       struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
3911 
3912       assert(rewrite_index->root_sig);
3913       assert(rewrite_index->pipeline_state);
3914       assert(rewrite_index->cmd_sig);
3915 
3916       if (cmdbuf->enhanced_barriers) {
3917          dzn_cmd_buffer_buffer_barrier(cmdbuf, triangle_fan_exec_buf,
3918                                        D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3919                                        D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
3920       }
3921       else {
3922          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
3923                                                   D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3924                                                   D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3925                                                   DZN_QUEUE_TRANSITION_FLUSH);
3926       }
3927 
3928       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3929       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3930       ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3931       ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3932       root_param_idx = 0;
3933       ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3934                                                                    ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
3935       ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3936                                                               sizeof(rewrite_index_params) / 4,
3937                                                               (const void *)&rewrite_index_params, 0);
3938 
3939       if (*inout_indexed) {
3940          ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3941                                                                      root_param_idx++,
3942                                                                      cmdbuf->state.ib.view.BufferLocation);
3943       }
3944 
3945       ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
3946                                                  max_draw_count, triangle_fan_exec_buf, 0,
3947                                                  *inout_count_buf ? exec_buf : NULL, 0);
3948 
3949       if (cmdbuf->enhanced_barriers) {
3950          buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3951             .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3952             .SyncAfter = D3D12_BARRIER_SYNC_INDEX_INPUT,
3953             .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3954             .AccessAfter = D3D12_BARRIER_ACCESS_INDEX_BUFFER,
3955             .pResource = triangle_fan_index_buf,
3956             .Offset = 0, .Size = UINT64_MAX
3957          };
3958       }
3959       else {
3960          dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
3961                                                   D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3962                                                   D3D12_RESOURCE_STATE_INDEX_BUFFER,
3963                                                   DZN_QUEUE_TRANSITION_FLUSH);
3964       }
3965 
3966       /* After our triangle-fan lowering the draw is indexed */
3967       *inout_indexed = true;
3968       cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
3969       cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
3970       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3971       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3972    }
3973 
3974    if (cmdbuf->enhanced_barriers) {
3975       buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3976          .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3977          .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3978          .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3979          .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT,
3980          .pResource = exec_buf,
3981          .Offset = 0, .Size = UINT64_MAX
3982       };
3983       ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &enhanced_barriers);
3984    }
3985    else {
3986       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
3987                                                D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3988                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3989                                                DZN_QUEUE_TRANSITION_FLUSH);
3990    }
3991 
3992    /* We don't mess up with the driver state when executing our internal
3993     * compute shader, but we still change the D3D12 state, so let's mark
3994     * things dirty if needed.
3995     */
3996    cmdbuf->state.pipeline = NULL;
3997    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3998       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3999          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4000    }
4001 
4002    *inout_draw_buf = exec_buf;
4003    *inout_draw_buf_offset = exec_buf_draw_offset;
4004    if (*inout_count_buf) {
4005       *inout_count_buf = exec_buf;
4006       *inout_count_buf_offset = 0;
4007    }
4008    *inout_draw_buf_stride = exec_buf_stride;
4009 }
4010 
4011 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)4012 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
4013                              ID3D12Resource *draw_buf,
4014                              size_t draw_buf_offset,
4015                              ID3D12Resource *count_buf,
4016                              size_t count_buf_offset,
4017                              uint32_t max_draw_count,
4018                              uint32_t draw_buf_stride,
4019                              bool indexed)
4020 {
4021    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4022    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4023    struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
4024       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
4025    uint32_t min_draw_buf_stride =
4026       indexed ?
4027       sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) :
4028       sizeof(D3D12_DRAW_ARGUMENTS);
4029    bool prim_restart =
4030       dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
4031 
4032    draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
4033    assert(draw_buf_stride >= min_draw_buf_stride);
4034    assert((draw_buf_stride & 3) == 0);
4035 
4036    D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
4037 
4038    struct dzn_indirect_draw_type draw_type;
4039    draw_type.value = 0;
4040    draw_type.indexed = indexed;
4041    draw_type.indirect_count = count_buf != NULL;
4042    draw_type.draw_params = pipeline->needs_draw_sysvals && !pdev->options21.ExtendedCommandInfoSupported;
4043    draw_type.draw_id = max_draw_count > 1 && pdev->options21.ExecuteIndirectTier < D3D12_EXECUTE_INDIRECT_TIER_1_1;
4044    draw_type.triangle_fan = pipeline->ia.triangle_fan;
4045    draw_type.triangle_fan_primitive_restart = draw_type.triangle_fan && prim_restart;
4046 
4047    if (draw_type.draw_params || draw_type.draw_id || draw_type.triangle_fan) {
4048       dzn_cmd_buffer_patch_indirect_draw(cmdbuf, draw_type,
4049                                          &draw_buf, &draw_buf_offset,
4050                                          &count_buf, &count_buf_offset,
4051                                          max_draw_count, &draw_buf_stride, &indexed);
4052    }
4053 
4054 
4055    struct dzn_indirect_draw_cmd_sig_key cmd_sig_key;
4056    memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
4057    cmd_sig_key.indexed = indexed;
4058    cmd_sig_key.triangle_fan = draw_type.triangle_fan;
4059    cmd_sig_key.draw_params = draw_type.draw_params;
4060    cmd_sig_key.draw_id = max_draw_count > 1;
4061    cmd_sig_key.custom_stride = draw_buf_stride;
4062    ID3D12CommandSignature *cmdsig =
4063       dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_key);
4064 
4065    if (!cmdsig) {
4066       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4067       return;
4068    }
4069 
4070    cmdbuf->state.sysvals.gfx.first_vertex = 0;
4071    cmdbuf->state.sysvals.gfx.base_instance = 0;
4072    cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
4073 
4074    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
4075       1 : pipeline->multiview.view_mask;
4076    u_foreach_bit(view, view_mask) {
4077       cmdbuf->state.sysvals.gfx.view_index = view;
4078       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4079          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4080 
4081       dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
4082 
4083       ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
4084                                                  max_draw_count,
4085                                                  draw_buf, draw_buf_offset,
4086                                                  count_buf, count_buf_offset);
4087    }
4088 
4089    /* Restore the old IB view if we modified it during the triangle fan lowering */
4090    if (draw_type.triangle_fan) {
4091       cmdbuf->state.ib.view = ib_view;
4092       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4093    }
4094 }
4095 
4096 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)4097 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
4098 {
4099    dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4100    dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4101    dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4102    dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4103 
4104    /* Reset the dirty states */
4105    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
4106 }
4107 
4108 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)4109 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
4110                    const VkCopyBufferInfo2 *info)
4111 {
4112    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4113    VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
4114    VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
4115 
4116    for (int i = 0; i < info->regionCount; i++) {
4117       const VkBufferCopy2 *region = info->pRegions + i;
4118 
4119       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
4120                                         src_buffer->res, region->srcOffset,
4121                                         region->size);
4122    }
4123 }
4124 
4125 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)4126 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
4127                           const VkCopyBufferToImageInfo2 *info)
4128 {
4129    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4130 
4131    for (int i = 0; i < info->regionCount; i++) {
4132       const VkBufferImageCopy2 *region = info->pRegions + i;
4133 
4134       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4135          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4136             dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
4137       }
4138    }
4139 }
4140 
4141 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)4142 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
4143                           const VkCopyImageToBufferInfo2 *info)
4144 {
4145    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4146 
4147    for (int i = 0; i < info->regionCount; i++) {
4148       const VkBufferImageCopy2 *region = info->pRegions + i;
4149 
4150       dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4151          for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4152             dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
4153       }
4154    }
4155 }
4156 
4157 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)4158 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
4159                   const VkCopyImageInfo2 *info)
4160 {
4161    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4162    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4163    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4164    VK_FROM_HANDLE(dzn_image, src, info->srcImage);
4165    VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
4166 
4167    assert(src->vk.samples == dst->vk.samples);
4168 
4169    bool requires_temp_res = false;
4170 
4171    for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
4172       const VkImageCopy2 *region = &info->pRegions[i];
4173 
4174       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4175          assert(aspect & region->dstSubresource.aspectMask);
4176 
4177          if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
4178                                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
4179              src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
4180              dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
4181             requires_temp_res = true;
4182             break;
4183          }
4184       }
4185    }
4186 
4187    bool use_blit = false;
4188    if (src->vk.samples > 1) {
4189       use_blit = requires_temp_res;
4190 
4191       for (int i = 0; i < info->regionCount; i++) {
4192          const VkImageCopy2 *region = info->pRegions + i;
4193          if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
4194              region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
4195              region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
4196              region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
4197              region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
4198              region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
4199             use_blit = true;
4200       }
4201    }
4202 
4203    if (use_blit) {
4204       /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
4205        * is issued on a transfer queue, but we don't have any better option
4206        * right now...
4207        */
4208       STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
4209 
4210       VkBlitImageInfo2 blit_info = {
4211          .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
4212          .srcImage = info->srcImage,
4213          .srcImageLayout = info->srcImageLayout,
4214          .dstImage = info->dstImage,
4215          .dstImageLayout = info->dstImageLayout,
4216          .regionCount = info->regionCount,
4217          .pRegions = blit_regions,
4218          .filter = VK_FILTER_NEAREST,
4219       };
4220 
4221       for (uint32_t r = 0; r < info->regionCount; r++) {
4222          blit_regions[r] = (VkImageBlit2) {
4223             .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4224             .srcSubresource = info->pRegions[r].srcSubresource,
4225             .srcOffsets = {
4226                 info->pRegions[r].srcOffset,
4227                 info->pRegions[r].srcOffset,
4228             },
4229             .dstSubresource = info->pRegions[r].dstSubresource,
4230             .dstOffsets = {
4231                 info->pRegions[r].dstOffset,
4232                 info->pRegions[r].dstOffset,
4233             },
4234          };
4235 
4236          blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
4237          blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
4238          blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
4239          blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
4240          blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
4241          blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
4242       }
4243 
4244       dzn_CmdBlitImage2(commandBuffer, &blit_info);
4245 
4246       STACK_ARRAY_FINISH(blit_regions);
4247       return;
4248    }
4249 
4250    D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
4251    D3D12_RESOURCE_DESC tmp_desc = {
4252       .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
4253       .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
4254       .DepthOrArraySize = 1,
4255       .MipLevels = 1,
4256       .Format = src->desc.Format,
4257       .SampleDesc = { .Count = 1, .Quality = 0 },
4258       .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
4259       .Flags = D3D12_RESOURCE_FLAG_NONE,
4260    };
4261 
4262    if (requires_temp_res) {
4263       ID3D12Device4 *dev = device->dev;
4264       VkImageAspectFlags aspect = 0;
4265       uint64_t max_size = 0;
4266 
4267       if (vk_format_has_depth(src->vk.format))
4268          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4269       else if (vk_format_has_stencil(src->vk.format))
4270          aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4271       else
4272          aspect = VK_IMAGE_ASPECT_COLOR_BIT;
4273 
4274       for (uint32_t i = 0; i < info->regionCount; i++) {
4275          const VkImageCopy2 *region = &info->pRegions[i];
4276          uint64_t region_size = 0;
4277 
4278          tmp_desc.Format =
4279             dzn_image_get_dxgi_format(pdev, src->vk.format,
4280                                       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
4281                                       aspect);
4282          tmp_desc.Width = region->extent.width;
4283          tmp_desc.Height = region->extent.height;
4284 
4285          ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
4286                                              0, 1, 0,
4287                                              NULL, NULL, NULL,
4288                                              &region_size);
4289          max_size = MAX2(max_size, region_size * region->extent.depth);
4290       }
4291 
4292       VkResult result =
4293          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
4294                                            DZN_INTERNAL_BUF_DEFAULT,
4295                                            D3D12_RESOURCE_STATE_COPY_DEST,
4296                                            0,
4297                                            &tmp_loc.pResource, NULL);
4298       if (result != VK_SUCCESS)
4299          return;
4300 
4301       tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4302    }
4303 
4304    for (int i = 0; i < info->regionCount; i++) {
4305       const VkImageCopy2 *region = &info->pRegions[i];
4306 
4307       dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4308          for (uint32_t l = 0; l < MAX2(region->srcSubresource.layerCount, region->dstSubresource.layerCount); l++)
4309             dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
4310       }
4311    }
4312 }
4313 
4314 static VkResult
dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer * cmdbuf,uint32_t num_view_slots,D3D12_FILTER sampler_filter,struct dzn_descriptor_heap ** view_heap,uint32_t * view_heap_slot,struct dzn_descriptor_heap ** sampler_heap,uint32_t * sampler_heap_slot)4315 dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer *cmdbuf,
4316                                    uint32_t num_view_slots, D3D12_FILTER sampler_filter,
4317                                    struct dzn_descriptor_heap **view_heap, uint32_t *view_heap_slot,
4318                                    struct dzn_descriptor_heap **sampler_heap, uint32_t *sampler_heap_slot)
4319 {
4320    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4321 
4322    VkResult result =
4323       dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
4324                                            num_view_slots, view_heap, view_heap_slot);
4325 
4326    if (result != VK_SUCCESS) {
4327       vk_command_buffer_set_error(&cmdbuf->vk, result);
4328       return result;
4329    }
4330 
4331    if (!device->support_static_samplers) {
4332       result =
4333          dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->sampler_pool, device,
4334                                               1, sampler_heap, sampler_heap_slot);
4335 
4336       if (result != VK_SUCCESS) {
4337          vk_command_buffer_set_error(&cmdbuf->vk, result);
4338          return result;
4339       }
4340 
4341       D3D12_SAMPLER_DESC sampler_desc = {
4342          .Filter = sampler_filter,
4343          .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4344          .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4345          .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4346          .MipLODBias = 0,
4347          .MaxAnisotropy = 0,
4348          .MinLOD = 0,
4349          .MaxLOD = D3D12_FLOAT32_MAX,
4350       };
4351       ID3D12Device4_CreateSampler(device->dev, &sampler_desc,
4352          dzn_descriptor_heap_get_cpu_handle(*sampler_heap, *sampler_heap_slot));
4353    }
4354 
4355    if (*view_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
4356        (*sampler_heap && *sampler_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])) {
4357       ID3D12DescriptorHeap * const heaps[] = { (*view_heap)->heap, *sampler_heap ? (*sampler_heap)->heap : NULL };
4358       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = *view_heap;
4359       cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = *sampler_heap;
4360       ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, *sampler_heap ? 2 : 1, heaps);
4361    }
4362 
4363    return VK_SUCCESS;
4364 }
4365 
4366 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)4367 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
4368                   const VkBlitImageInfo2 *info)
4369 {
4370    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4371 
4372    if (info->regionCount == 0)
4373       return;
4374 
4375    uint32_t desc_count = 0;
4376    for (uint32_t r = 0; r < info->regionCount; r++)
4377       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4378 
4379    struct dzn_descriptor_heap *heap;
4380    uint32_t heap_slot;
4381    struct dzn_descriptor_heap *sampler_heap = NULL;
4382    uint32_t sampler_heap_slot = 0;
4383    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4384                                                         info->filter == VK_FILTER_LINEAR ?
4385                                                          D3D12_FILTER_MIN_MAG_MIP_LINEAR :
4386                                                          D3D12_FILTER_MIN_MAG_MIP_POINT,
4387                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4388 
4389    if (result != VK_SUCCESS)
4390       return;
4391 
4392    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4393 
4394    for (uint32_t r = 0; r < info->regionCount; r++)
4395       dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4396 
4397    cmdbuf->state.pipeline = NULL;
4398    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4399    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4400       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4401          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4402    }
4403 }
4404 
4405 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4406 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
4407                      const VkResolveImageInfo2 *info)
4408 {
4409    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4410 
4411    if (info->regionCount == 0)
4412       return;
4413 
4414    uint32_t desc_count = 0;
4415    for (uint32_t r = 0; r < info->regionCount; r++)
4416       desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4417 
4418    struct dzn_descriptor_heap *heap;
4419    uint32_t heap_slot;
4420    struct dzn_descriptor_heap *sampler_heap = NULL;
4421    uint32_t sampler_heap_slot = 0;
4422    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4423                                                         D3D12_FILTER_MIN_MAG_MIP_POINT,
4424                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4425    if (result != VK_SUCCESS)
4426       return;
4427 
4428    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4429 
4430    for (uint32_t r = 0; r < info->regionCount; r++)
4431       dzn_cmd_buffer_resolve_region(cmdbuf, info, VK_RESOLVE_MODE_AVERAGE_BIT, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4432 
4433    cmdbuf->state.pipeline = NULL;
4434    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4435    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4436       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4437          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4438    }
4439 }
4440 
4441 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4442 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
4443                        VkImage image,
4444                        VkImageLayout imageLayout,
4445                        const VkClearColorValue *pColor,
4446                        uint32_t rangeCount,
4447                        const VkImageSubresourceRange *pRanges)
4448 {
4449    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4450    VK_FROM_HANDLE(dzn_image, img, image);
4451 
4452    dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
4453 }
4454 
4455 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4456 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
4457                               VkImage image,
4458                               VkImageLayout imageLayout,
4459                               const VkClearDepthStencilValue *pDepthStencil,
4460                               uint32_t rangeCount,
4461                               const VkImageSubresourceRange *pRanges)
4462 {
4463    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4464    VK_FROM_HANDLE(dzn_image, img, image);
4465 
4466    dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
4467 }
4468 
4469 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)4470 dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,
4471                     uint32_t baseGroupX,
4472                     uint32_t baseGroupY,
4473                     uint32_t baseGroupZ,
4474                     uint32_t groupCountX,
4475                     uint32_t groupCountY,
4476                     uint32_t groupCountZ)
4477 {
4478    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4479 
4480    cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
4481    cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
4482    cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
4483    cmdbuf->state.sysvals.compute.base_group_x = baseGroupX;
4484    cmdbuf->state.sysvals.compute.base_group_y = baseGroupY;
4485    cmdbuf->state.sysvals.compute.base_group_z = baseGroupZ;
4486    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4487       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4488 
4489    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4490    ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
4491 }
4492 
4493 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)4494 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
4495                   VkBuffer dstBuffer,
4496                   VkDeviceSize dstOffset,
4497                   VkDeviceSize size,
4498                   uint32_t data)
4499 {
4500    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4501    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4502 
4503    if (size == VK_WHOLE_SIZE)
4504       size = buf->size - dstOffset;
4505 
4506    size &= ~3ULL;
4507 
4508    ID3D12Resource *src_res;
4509    uint64_t src_offset;
4510    VkResult result =
4511       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4512                                         DZN_INTERNAL_BUF_UPLOAD,
4513                                         D3D12_RESOURCE_STATE_GENERIC_READ,
4514                                         4,
4515                                         &src_res,
4516                                         &src_offset);
4517    if (result != VK_SUCCESS)
4518       return;
4519 
4520    uint32_t *cpu_ptr;
4521    ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
4522    cpu_ptr += src_offset / sizeof(uint32_t);
4523    for (uint32_t i = 0; i < size / 4; i++)
4524       cpu_ptr[i] = data;
4525 
4526    ID3D12Resource_Unmap(src_res, 0, NULL);
4527 
4528    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4529 }
4530 
4531 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)4532 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
4533                     VkBuffer dstBuffer,
4534                     VkDeviceSize dstOffset,
4535                     VkDeviceSize size,
4536                     const void *data)
4537 {
4538    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4539    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4540 
4541    if (size == VK_WHOLE_SIZE)
4542       size = buf->size - dstOffset;
4543 
4544    /*
4545     * The spec says:
4546     *   4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
4547     *   buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
4548     *   is not a multiple of 4, then the nearest smaller multiple is used."
4549     */
4550    size &= ~3ULL;
4551 
4552    ID3D12Resource *src_res;
4553    uint64_t src_offset;
4554    VkResult result =
4555       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4556                                         DZN_INTERNAL_BUF_UPLOAD,
4557                                         D3D12_RESOURCE_STATE_GENERIC_READ,
4558                                         4,
4559                                         &src_res, &src_offset);
4560    if (result != VK_SUCCESS)
4561       return;
4562 
4563    void *cpu_ptr;
4564    ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
4565    memcpy((uint8_t *)cpu_ptr + src_offset, data, size),
4566    ID3D12Resource_Unmap(src_res, 0, NULL);
4567 
4568    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4569 }
4570 
4571 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)4572 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
4573                         uint32_t attachmentCount,
4574                         const VkClearAttachment *pAttachments,
4575                         uint32_t rectCount,
4576                         const VkClearRect *pRects)
4577 {
4578    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4579 
4580    for (unsigned i = 0; i < attachmentCount; i++) {
4581       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
4582       struct dzn_image_view *view = NULL;
4583 
4584       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
4585          assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
4586          view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
4587          layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
4588       } else {
4589          if (cmdbuf->state.render.attachments.depth.iview &&
4590              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
4591             view = cmdbuf->state.render.attachments.depth.iview;
4592             layout = cmdbuf->state.render.attachments.depth.layout;
4593          }
4594 
4595          if (cmdbuf->state.render.attachments.stencil.iview &&
4596              (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
4597             assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
4598             view = cmdbuf->state.render.attachments.stencil.iview;
4599             layout = cmdbuf->state.render.attachments.stencil.layout;
4600          }
4601       }
4602 
4603       if (!view)
4604          continue;
4605 
4606       for (uint32_t j = 0; j < rectCount; j++) {
4607          D3D12_RECT rect;
4608          dzn_translate_rect(&rect, &pRects[j].rect);
4609 
4610          uint32_t view_mask = cmdbuf->state.multiview.view_mask;
4611          if (view_mask != 0) {
4612             u_foreach_bit(layer, view_mask) {
4613                dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4614                                                &pAttachments[i].clearValue,
4615                                                pAttachments[i].aspectMask,
4616                                                pRects[j].baseArrayLayer + layer,
4617                                                pRects[j].layerCount,
4618                                                1, &rect);
4619             }
4620          } else {
4621             dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4622                                             &pAttachments[i].clearValue,
4623                                             pAttachments[i].aspectMask,
4624                                             pRects[j].baseArrayLayer,
4625                                             pRects[j].layerCount,
4626                                             1, &rect);
4627          }
4628       }
4629    }
4630 }
4631 
4632 static D3D12_RESOLVE_MODE
dzn_get_resolve_mode(VkResolveModeFlags mode)4633 dzn_get_resolve_mode(VkResolveModeFlags mode)
4634 {
4635    switch (mode) {
4636    case VK_RESOLVE_MODE_AVERAGE_BIT: return D3D12_RESOLVE_MODE_AVERAGE;
4637    case VK_RESOLVE_MODE_MAX_BIT: return D3D12_RESOLVE_MODE_MAX;
4638    case VK_RESOLVE_MODE_MIN_BIT: return D3D12_RESOLVE_MODE_MIN;
4639    /* TODO */
4640    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return D3D12_RESOLVE_MODE_MIN;
4641    default: return D3D12_RESOLVE_MODE_AVERAGE;
4642    }
4643 }
4644 
4645 static void
dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,const VkImageSubresourceRange * src_range,const VkImageSubresourceRange * dst_range)4646 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer *cmdbuf,
4647                                                      const struct dzn_rendering_attachment *att,
4648                                                      VkImageAspectFlagBits aspect,
4649                                                      const VkImageSubresourceRange *src_range,
4650                                                      const VkImageSubresourceRange *dst_range)
4651 {
4652    uint32_t desc_count = util_bitcount(aspect) * src_range->levelCount * src_range->layerCount;
4653 
4654    struct dzn_descriptor_heap *heap;
4655    uint32_t heap_slot;
4656    struct dzn_descriptor_heap *sampler_heap = NULL;
4657    uint32_t sampler_heap_slot = 0;
4658    VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4659                                                         D3D12_FILTER_MIN_MAG_MIP_POINT,
4660                                                         &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4661    if (result != VK_SUCCESS)
4662       return;
4663 
4664    ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4665 
4666    VkImageResolve2 region = {
4667       .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
4668       .srcSubresource = {
4669          .aspectMask = aspect,
4670          .baseArrayLayer = src_range->baseArrayLayer,
4671          .layerCount = src_range->layerCount,
4672       },
4673       .dstSubresource = {
4674          .aspectMask = aspect,
4675          .baseArrayLayer = dst_range->baseArrayLayer,
4676          .layerCount = dst_range->layerCount,
4677       },
4678    };
4679    VkResolveImageInfo2 resolve_info = {
4680       .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2,
4681       .srcImage = vk_image_to_handle(att->iview->vk.image),
4682       .dstImage = vk_image_to_handle(att->resolve.iview->vk.image),
4683       .srcImageLayout = att->layout,
4684       .dstImageLayout = att->resolve.layout,
4685       .regionCount = 1,
4686       .pRegions = &region
4687    };
4688    for (uint32_t level = 0; level < src_range->levelCount; ++level) {
4689       region.srcSubresource.mipLevel = level + src_range->baseMipLevel;
4690       region.dstSubresource.mipLevel = level + dst_range->baseMipLevel;
4691       region.extent = (VkExtent3D){
4692          u_minify(att->iview->vk.image->extent.width, region.srcSubresource.mipLevel),
4693          u_minify(att->iview->vk.image->extent.height, region.srcSubresource.mipLevel),
4694          u_minify(att->iview->vk.image->extent.depth, region.srcSubresource.mipLevel),
4695       };
4696       dzn_cmd_buffer_resolve_region(cmdbuf, &resolve_info, att->resolve.mode, heap, &heap_slot, sampler_heap, sampler_heap_slot, 0);
4697    }
4698 
4699    cmdbuf->state.pipeline = NULL;
4700    cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4701    if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4702       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4703          DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4704    }
4705 }
4706 
4707 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,bool force_blit_resolve)4708 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
4709                                             const struct dzn_rendering_attachment *att,
4710                                             VkImageAspectFlagBits aspect,
4711                                             bool force_blit_resolve)
4712 {
4713    struct dzn_image_view *src = att->iview;
4714    struct dzn_image_view *dst = att->resolve.iview;
4715 
4716    if (!src || !dst || att->resolve.mode == VK_RESOLVE_MODE_NONE)
4717       return;
4718 
4719    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4720    struct dzn_physical_device *pdev =
4721       container_of(device->vk.physical, struct dzn_physical_device, vk);
4722 
4723    struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
4724    struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
4725 
4726    VkImageSubresourceRange src_range = {
4727       .aspectMask = (VkImageAspectFlags)aspect,
4728       .baseMipLevel = src->vk.base_mip_level,
4729       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4730       .baseArrayLayer = src->vk.base_array_layer,
4731       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4732    };
4733    if (src_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4734       src_range.baseArrayLayer = 0;
4735       src_range.layerCount = 1;
4736    }
4737 
4738    VkImageSubresourceRange dst_range = {
4739       .aspectMask = (VkImageAspectFlags)aspect,
4740       .baseMipLevel = dst->vk.base_mip_level,
4741       .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4742       .baseArrayLayer = dst->vk.base_array_layer,
4743       .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4744    };
4745    if (dst_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4746       dst_range.baseArrayLayer = 0;
4747       dst_range.layerCount = 1;
4748    }
4749 
4750    if (force_blit_resolve ||
4751        /* Resolve modes other than average are poorly tested / buggy */
4752        att->resolve.mode != VK_RESOLVE_MODE_AVERAGE_BIT ||
4753        /* D3D resolve API can't go from (e.g.) D32S8X24 to D32 */
4754        src->vk.view_format != dst->vk.view_format) {
4755       dzn_cmd_buffer_resolve_rendering_attachment_via_blit(cmdbuf, att, aspect, &src_range, &dst_range);
4756       return;
4757    }
4758 
4759    VkImageLayout src_layout = att->layout;
4760    VkImageLayout dst_layout = att->resolve.layout;
4761 
4762    D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect, cmdbuf->type);
4763    D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect, cmdbuf->type);
4764    D3D12_BARRIER_LAYOUT src_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4765       src_needed_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
4766    D3D12_BARRIER_LAYOUT dst_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4767       dst_needed_layout = D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
4768    if (cmdbuf->enhanced_barriers) {
4769       src_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, src_img,
4770                                                          src_layout, src_needed_layout,
4771                                                          &src_range);
4772       dst_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst_img,
4773                                                          dst_layout, dst_needed_layout,
4774                                                          &dst_range);
4775    } else {
4776       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4777                                                         src_state,
4778                                                         D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4779                                                         DZN_QUEUE_TRANSITION_FLUSH);
4780       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4781                                                         dst_state,
4782                                                         D3D12_RESOURCE_STATE_RESOLVE_DEST,
4783                                                         DZN_QUEUE_TRANSITION_FLUSH);
4784    }
4785 
4786    for (uint32_t level = 0; level < src_range.levelCount; level++) {
4787       for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
4788          uint32_t src_subres =
4789             dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
4790          uint32_t dst_subres =
4791             dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
4792 
4793          DXGI_FORMAT format =
4794             dzn_image_get_dxgi_format(pdev, dst->vk.format,
4795                                       dst->vk.usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
4796                                       aspect);
4797 
4798          if (cmdbuf->cmdlist8 &&
4799              pdev->options2.ProgrammableSamplePositionsTier > D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED) {
4800             ID3D12GraphicsCommandList8_ResolveSubresourceRegion(cmdbuf->cmdlist8,
4801                                                                 dst_img->res, dst_subres,
4802                                                                 0, 0,
4803                                                                 src_img->res, src_subres,
4804                                                                 NULL,
4805                                                                 format,
4806                                                                 dzn_get_resolve_mode(att->resolve.mode));
4807          } else {
4808             ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
4809                                                           dst_img->res, dst_subres,
4810                                                           src_img->res, src_subres,
4811                                                           format);
4812          }
4813       }
4814    }
4815 
4816    if (cmdbuf->enhanced_barriers) {
4817       dzn_cmd_buffer_restore_layout(cmdbuf, src_img,
4818                                     D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE,
4819                                     src_needed_layout, src_restore_layout,
4820                                     &src_range);
4821       dzn_cmd_buffer_restore_layout(cmdbuf, dst_img,
4822                                     D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_DEST,
4823                                     dst_needed_layout, dst_restore_layout,
4824                                     &dst_range);
4825    } else {
4826       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4827                                                         D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4828                                                         src_state,
4829                                                         DZN_QUEUE_TRANSITION_FLUSH);
4830       dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4831                                                         D3D12_RESOURCE_STATE_RESOLVE_DEST,
4832                                                         dst_state,
4833                                                         DZN_QUEUE_TRANSITION_FLUSH);
4834    }
4835 }
4836 
4837 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)4838 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
4839                                             const VkRenderingAttachmentInfo *att,
4840                                             VkImageAspectFlagBits aspect)
4841 {
4842    const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
4843       vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
4844    VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4845 
4846    if (!initial_layout || !iview)
4847       return;
4848 
4849    struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
4850    VkImageSubresourceRange range = {
4851       .aspectMask = aspect,
4852       .baseMipLevel = iview->vk.base_mip_level,
4853       .levelCount = iview->vk.level_count,
4854       .baseArrayLayer = iview->vk.base_array_layer,
4855       .layerCount = iview->vk.layer_count,
4856    };
4857    if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4858       range.baseArrayLayer = 0;
4859       range.layerCount = 1;
4860    }
4861 
4862    if (cmdbuf->enhanced_barriers) {
4863       D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL;
4864       D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON;
4865       if (initial_layout->initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
4866          sync_before = D3D12_BARRIER_SYNC_NONE;
4867          access_before = D3D12_BARRIER_ACCESS_NO_ACCESS;
4868       }
4869 
4870       D3D12_BARRIER_LAYOUT layout_before = dzn_vk_layout_to_d3d_layout(initial_layout->initialLayout, cmdbuf->type, aspect);
4871       D3D12_BARRIER_LAYOUT layout_after = dzn_vk_layout_to_d3d_layout(att->imageLayout, cmdbuf->type, aspect);
4872       if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) {
4873          layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED;
4874          layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED;
4875       }
4876 
4877       dzn_cmd_buffer_image_barrier(cmdbuf, image,
4878                                    sync_before, D3D12_BARRIER_SYNC_DRAW,
4879                                    access_before, D3D12_BARRIER_ACCESS_COMMON,
4880                                    layout_before,
4881                                    layout_after,
4882                                    &range);
4883    } else {
4884       dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
4885                                                          initial_layout->initialLayout,
4886                                                          att->imageLayout,
4887                                                          DZN_QUEUE_TRANSITION_FLUSH);
4888    }
4889 }
4890 
4891 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)4892 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
4893                       const VkRenderingInfo *pRenderingInfo)
4894 {
4895    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4896 
4897    D3D12_RECT new_render_area = {
4898       .left = pRenderingInfo->renderArea.offset.x,
4899       .top = pRenderingInfo->renderArea.offset.y,
4900       .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
4901       .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
4902    };
4903 
4904    // The render area has an impact on the scissor state.
4905    if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
4906       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
4907       cmdbuf->state.render.area = new_render_area;
4908    }
4909 
4910    cmdbuf->state.render.flags = pRenderingInfo->flags;
4911    cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
4912    cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
4913 
4914    D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
4915    D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
4916 
4917    cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
4918    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
4919       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
4920       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4921 
4922       cmdbuf->state.render.attachments.colors[i].iview = iview;
4923       cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
4924       cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
4925       cmdbuf->state.render.attachments.colors[i].resolve.iview =
4926          dzn_image_view_from_handle(att->resolveImageView);
4927       cmdbuf->state.render.attachments.colors[i].resolve.layout =
4928          att->resolveImageLayout;
4929       cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
4930 
4931       if (!iview) {
4932          rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
4933          continue;
4934       }
4935 
4936       struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4937       rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
4938       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4939                                                   VK_IMAGE_ASPECT_COLOR_BIT);
4940    }
4941 
4942    if (pRenderingInfo->pDepthAttachment) {
4943       const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
4944 
4945       cmdbuf->state.render.attachments.depth.iview =
4946          dzn_image_view_from_handle(att->imageView);
4947       cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
4948       cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
4949       cmdbuf->state.render.attachments.depth.resolve.iview =
4950          dzn_image_view_from_handle(att->resolveImageView);
4951       cmdbuf->state.render.attachments.depth.resolve.layout =
4952          att->resolveImageLayout;
4953       cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
4954       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4955                                                   VK_IMAGE_ASPECT_DEPTH_BIT);
4956    }
4957 
4958    if (pRenderingInfo->pStencilAttachment) {
4959       const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
4960 
4961       cmdbuf->state.render.attachments.stencil.iview =
4962          dzn_image_view_from_handle(att->imageView);
4963       cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
4964       cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
4965       cmdbuf->state.render.attachments.stencil.resolve.iview =
4966          dzn_image_view_from_handle(att->resolveImageView);
4967       cmdbuf->state.render.attachments.stencil.resolve.layout =
4968          att->resolveImageLayout;
4969       cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
4970       dzn_rendering_attachment_initial_transition(cmdbuf, att,
4971                                                   VK_IMAGE_ASPECT_STENCIL_BIT);
4972    }
4973 
4974    if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
4975       struct dzn_image_view *z_iview =
4976          pRenderingInfo->pDepthAttachment ?
4977          dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
4978          NULL;
4979       struct dzn_image_view *s_iview =
4980          pRenderingInfo->pStencilAttachment ?
4981          dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
4982          NULL;
4983       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
4984       assert(!z_iview || !s_iview || z_iview == s_iview);
4985 
4986       if (iview) {
4987          struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4988 
4989          zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
4990       }
4991    }
4992 
4993    ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
4994                                                  pRenderingInfo->colorAttachmentCount,
4995                                                  pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
4996                                                  false, zs_handle.ptr ? &zs_handle : NULL);
4997 
4998    for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
4999       const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
5000       VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
5001 
5002       if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
5003           !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5004          if (pRenderingInfo->viewMask != 0) {
5005             u_foreach_bit(layer, pRenderingInfo->viewMask) {
5006                dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
5007                                                &att->clearValue,
5008                                                VK_IMAGE_ASPECT_COLOR_BIT, layer,
5009                                                1, 1, &cmdbuf->state.render.area);
5010             }
5011          } else {
5012             dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
5013                                             &att->clearValue,
5014                                             VK_IMAGE_ASPECT_COLOR_BIT, 0,
5015                                             pRenderingInfo->layerCount, 1,
5016                                             &cmdbuf->state.render.area);
5017          }
5018       }
5019    }
5020 
5021    if ((pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) &&
5022        !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5023       const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
5024       const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
5025       struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
5026       struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
5027       struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
5028       VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
5029 
5030       assert(!z_iview || !s_iview || z_iview == s_iview);
5031 
5032       VkImageAspectFlags aspects = 0;
5033       VkClearValue clear_val;
5034 
5035       if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5036          aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
5037          clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
5038          layout = z_att->imageLayout;
5039       }
5040 
5041       if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5042          aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
5043          clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
5044          layout = s_att->imageLayout;
5045       }
5046 
5047       if (aspects != 0) {
5048          if (pRenderingInfo->viewMask != 0) {
5049             u_foreach_bit(layer, pRenderingInfo->viewMask) {
5050                dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5051                                                &clear_val, aspects, layer,
5052                                                1, 1, &cmdbuf->state.render.area);
5053             }
5054          } else {
5055             dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5056                                             &clear_val, aspects, 0,
5057                                             VK_REMAINING_ARRAY_LAYERS, 1,
5058                                             &cmdbuf->state.render.area);
5059          }
5060       }
5061    }
5062 
5063    cmdbuf->state.multiview.num_views = MAX2(util_bitcount(pRenderingInfo->viewMask), 1);
5064    cmdbuf->state.multiview.view_mask = MAX2(pRenderingInfo->viewMask, 1);
5065 }
5066 
5067 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)5068 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
5069 {
5070    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5071 
5072    if (!(cmdbuf->state.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
5073       for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
5074          dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5075                                                      &cmdbuf->state.render.attachments.colors[i],
5076                                                      VK_IMAGE_ASPECT_COLOR_BIT, false);
5077       }
5078 
5079       bool separate_stencil_resolve =
5080          cmdbuf->state.render.attachments.depth.resolve.mode !=
5081          cmdbuf->state.render.attachments.stencil.resolve.mode;
5082       dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5083                                                   &cmdbuf->state.render.attachments.depth,
5084                                                   VK_IMAGE_ASPECT_DEPTH_BIT,
5085                                                   separate_stencil_resolve);
5086       dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5087                                                   &cmdbuf->state.render.attachments.stencil,
5088                                                   VK_IMAGE_ASPECT_STENCIL_BIT,
5089                                                   separate_stencil_resolve);
5090    }
5091 
5092    memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
5093 }
5094 
5095 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)5096 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
5097                     VkPipelineBindPoint pipelineBindPoint,
5098                     VkPipeline pipe)
5099 {
5100    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5101    VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
5102 
5103    cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
5104    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5105    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
5106       const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
5107 
5108       if (!gfx->vp.dynamic) {
5109          memcpy(cmdbuf->state.viewports, gfx->vp.desc,
5110                 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
5111          cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5112          cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5113          cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5114          cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5115       }
5116 
5117       if (!gfx->scissor.dynamic) {
5118          memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
5119                 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
5120          cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5121       }
5122 
5123       if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
5124          cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
5125          cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
5126          cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5127       }
5128 
5129       if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
5130          cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
5131          cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
5132          cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5133       }
5134 
5135       if (!gfx->blend.dynamic_constants) {
5136          memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
5137                 sizeof(cmdbuf->state.blend.constants));
5138          cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5139       }
5140 
5141       for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
5142          cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
5143 
5144       if (gfx->vb.count > 0)
5145          BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
5146    }
5147 }
5148 
5149 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)5150 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
5151                           VkPipelineBindPoint pipelineBindPoint,
5152                           VkPipelineLayout layout,
5153                           uint32_t firstSet,
5154                           uint32_t descriptorSetCount,
5155                           const VkDescriptorSet *pDescriptorSets,
5156                           uint32_t dynamicOffsetCount,
5157                           const uint32_t *pDynamicOffsets)
5158 {
5159    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5160    VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
5161 
5162    struct dzn_descriptor_state *desc_state =
5163       &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
5164    uint32_t dirty = 0;
5165 
5166    for (uint32_t i = 0; i < descriptorSetCount; i++) {
5167       uint32_t idx = firstSet + i;
5168       VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
5169 
5170       if (desc_state->sets[idx].set != set) {
5171          desc_state->sets[idx].set = set;
5172          dirty |= DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << idx;
5173       }
5174 
5175       uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
5176       if (dynamic_buffer_count) {
5177          assert(dynamicOffsetCount >= dynamic_buffer_count);
5178 
5179          for (uint32_t j = 0; j < dynamic_buffer_count; j++)
5180             desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
5181 
5182          dynamicOffsetCount -= dynamic_buffer_count;
5183          pDynamicOffsets += dynamic_buffer_count;
5184          dirty |= DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS;
5185       }
5186    }
5187 
5188    cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
5189 }
5190 
5191 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)5192 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
5193                    uint32_t firstViewport,
5194                    uint32_t viewportCount,
5195                    const VkViewport *pViewports)
5196 {
5197    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5198 
5199    STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
5200 
5201    for (uint32_t i = 0; i < viewportCount; i++) {
5202       uint32_t vp = i + firstViewport;
5203 
5204       dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
5205 
5206       if (pViewports[i].minDepth > pViewports[i].maxDepth)
5207          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5208       else
5209          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5210 
5211       if (pViewports[i].height > 0)
5212          cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
5213       else
5214          cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
5215    }
5216 
5217    cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5218    cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5219 
5220    if (viewportCount) {
5221       cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5222       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5223          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5224    }
5225 }
5226 
5227 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)5228 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
5229                   uint32_t firstScissor,
5230                   uint32_t scissorCount,
5231                   const VkRect2D *pScissors)
5232 {
5233    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5234 
5235    for (uint32_t i = 0; i < scissorCount; i++)
5236       dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
5237 
5238    if (scissorCount)
5239       cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5240 }
5241 
5242 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)5243 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
5244                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
5245                      const void *pValues)
5246 {
5247    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5248    struct dzn_cmd_buffer_push_constant_state *states[2];
5249    uint32_t num_states = 0;
5250 
5251    if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
5252       states[num_states++] = &cmdbuf->state.push_constant.gfx;
5253 
5254    if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
5255       states[num_states++] = &cmdbuf->state.push_constant.compute;
5256 
5257    for (uint32_t i = 0; i < num_states; i++) {
5258       memcpy(((char *)states[i]->values) + offset, pValues, size);
5259       states[i]->offset =
5260          states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
5261       states[i]->end = MAX2(states[i]->end, offset + size);
5262    }
5263 }
5264 
5265 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)5266 dzn_CmdDraw(VkCommandBuffer commandBuffer,
5267             uint32_t vertexCount,
5268             uint32_t instanceCount,
5269             uint32_t firstVertex,
5270             uint32_t firstInstance)
5271 {
5272    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5273 
5274    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5275       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5276 
5277    cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
5278    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5279 
5280    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5281       1 : pipeline->multiview.view_mask;
5282 
5283    if (pipeline->ia.triangle_fan) {
5284       D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5285 
5286       VkResult result =
5287          dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
5288       if (result != VK_SUCCESS || !vertexCount)
5289          return;
5290 
5291       cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5292       u_foreach_bit(view, view_mask) {
5293          cmdbuf->state.sysvals.gfx.view_index = view;
5294          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5295             DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5296          dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5297          ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
5298                                                 firstVertex, firstInstance);
5299       }
5300 
5301       /* Restore the IB view if we modified it when lowering triangle fans. */
5302       if (ib_view.SizeInBytes > 0) {
5303          cmdbuf->state.ib.view = ib_view;
5304          cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5305       }
5306    } else {
5307       cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
5308       u_foreach_bit(view, view_mask) {
5309          cmdbuf->state.sysvals.gfx.view_index = view;
5310          cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5311             DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5312          dzn_cmd_buffer_prepare_draw(cmdbuf, false);
5313          ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
5314                                           firstVertex, firstInstance);
5315       }
5316    }
5317 }
5318 
5319 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)5320 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
5321                    uint32_t indexCount,
5322                    uint32_t instanceCount,
5323                    uint32_t firstIndex,
5324                    int32_t vertexOffset,
5325                    uint32_t firstInstance)
5326 {
5327    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5328 
5329    const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5330       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5331 
5332    if (pipeline->ia.triangle_fan &&
5333        dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
5334       /* The indexed+primitive-restart+triangle-fan combination is a mess,
5335        * since we have to walk the index buffer, skip entries with the
5336        * special 0xffff/0xffffffff values, and push triangle list indices
5337        * for the remaining values. All of this has an impact on the index
5338        * count passed to the draw call, which forces us to use the indirect
5339        * path.
5340        */
5341       D3D12_DRAW_INDEXED_ARGUMENTS params = {
5342          .IndexCountPerInstance = indexCount,
5343          .InstanceCount = instanceCount,
5344          .StartIndexLocation = firstIndex,
5345          .BaseVertexLocation = vertexOffset,
5346          .StartInstanceLocation = firstInstance,
5347       };
5348 
5349       ID3D12Resource *draw_buf;
5350       uint64_t offset;
5351       VkResult result =
5352          dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
5353                                            DZN_INTERNAL_BUF_UPLOAD,
5354                                            D3D12_RESOURCE_STATE_GENERIC_READ,
5355                                            4,
5356                                            &draw_buf, &offset);
5357       if (result != VK_SUCCESS)
5358          return;
5359 
5360       void *cpu_ptr;
5361       ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
5362       memcpy((uint8_t *)cpu_ptr + offset, &params, sizeof(params));
5363 
5364       ID3D12Resource_Unmap(draw_buf, 0, NULL);
5365 
5366       dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, offset, NULL, 0, 1, sizeof(params), true);
5367       return;
5368    }
5369 
5370    cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
5371    cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5372    cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5373 
5374    D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5375 
5376    if (pipeline->ia.triangle_fan) {
5377       VkResult result =
5378          dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
5379       if (result != VK_SUCCESS || !indexCount)
5380          return;
5381    }
5382 
5383    uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5384       1 : pipeline->multiview.view_mask;
5385    u_foreach_bit(view, view_mask) {
5386       cmdbuf->state.sysvals.gfx.view_index = view;
5387       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5388          DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5389 
5390       dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5391       ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
5392                                             vertexOffset, firstInstance);
5393    }
5394 
5395    /* Restore the IB view if we modified it when lowering triangle fans. */
5396    if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
5397       cmdbuf->state.ib.view = ib_view;
5398       cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5399    }
5400 }
5401 
5402 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5403 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
5404                     VkBuffer buffer,
5405                     VkDeviceSize offset,
5406                     uint32_t drawCount,
5407                     uint32_t stride)
5408 {
5409    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5410    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5411 
5412    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
5413 }
5414 
5415 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5416 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
5417                            VkBuffer buffer,
5418                            VkDeviceSize offset,
5419                            uint32_t drawCount,
5420                            uint32_t stride)
5421 {
5422    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5423    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5424 
5425    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
5426 }
5427 
5428 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5429 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
5430                          VkBuffer buffer,
5431                          VkDeviceSize offset,
5432                          VkBuffer countBuffer,
5433                          VkDeviceSize countBufferOffset,
5434                          uint32_t maxDrawCount,
5435                          uint32_t stride)
5436 {
5437    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5438    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5439    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5440 
5441    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5442                                 count_buf->res, countBufferOffset,
5443                                 maxDrawCount, stride, false);
5444 }
5445 
5446 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5447 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
5448                                 VkBuffer buffer,
5449                                 VkDeviceSize offset,
5450                                 VkBuffer countBuffer,
5451                                 VkDeviceSize countBufferOffset,
5452                                 uint32_t maxDrawCount,
5453                                 uint32_t stride)
5454 {
5455    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5456    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5457    VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5458 
5459    dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5460                                 count_buf->res, countBufferOffset,
5461                                 maxDrawCount, stride, true);
5462 }
5463 
5464 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)5465 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
5466                          uint32_t firstBinding,
5467                          uint32_t bindingCount,
5468                          const VkBuffer *pBuffers,
5469                          const VkDeviceSize *pOffsets)
5470 {
5471    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5472 
5473    if (!bindingCount)
5474       return;
5475 
5476    D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
5477 
5478    for (uint32_t i = 0; i < bindingCount; i++) {
5479       VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
5480 
5481       vbviews[firstBinding + i].BufferLocation = buf->gpuva + pOffsets[i];
5482       vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
5483    }
5484 
5485    BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
5486                     firstBinding + bindingCount - 1);
5487 }
5488 
5489 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)5490 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
5491                        VkBuffer buffer,
5492                        VkDeviceSize offset,
5493                        VkIndexType indexType)
5494 {
5495    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5496    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5497 
5498    cmdbuf->state.ib.view.BufferLocation = buf->gpuva + offset;
5499    cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
5500    switch (indexType) {
5501    case VK_INDEX_TYPE_UINT16:
5502       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
5503       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
5504       break;
5505    case VK_INDEX_TYPE_UINT32:
5506       cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
5507       cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
5508       break;
5509    default: unreachable("Invalid index type");
5510    }
5511 
5512    cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5513 
5514    const struct dzn_graphics_pipeline *pipeline =
5515       (const struct dzn_graphics_pipeline *)cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5516 
5517    if (pipeline &&
5518        dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
5519       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5520 }
5521 
5522 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags2 stageMask)5523 dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,
5524                    VkEvent event,
5525                    VkPipelineStageFlags2 stageMask)
5526 {
5527    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5528    VK_FROM_HANDLE(dzn_event, evt, event);
5529 
5530    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
5531       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5532 }
5533 
5534 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,VkEvent event,const VkDependencyInfo * pDependencyInfo)5535 dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,
5536                  VkEvent event,
5537                  const VkDependencyInfo *pDependencyInfo)
5538 {
5539    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5540    VK_FROM_HANDLE(dzn_event, evt, event);
5541 
5542    if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
5543       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5544 }
5545 
5546 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfo)5547 dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,
5548                    uint32_t eventCount,
5549                    const VkEvent *pEvents,
5550                    const VkDependencyInfo *pDependencyInfo)
5551 {
5552    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5553 
5554    /* Intra-command list wait is handle by this pipeline flush, which is
5555     * overkill, but that's the best we can do with the standard D3D12 barrier
5556     * API.
5557     *
5558     * Inter-command list is taken care of by the serialization done at the
5559     * ExecuteCommandList() level:
5560     * "Calling ExecuteCommandLists twice in succession (from the same thread,
5561     *  or different threads) guarantees that the first workload (A) finishes
5562     *  before the second workload (B)"
5563     *
5564     * HOST -> DEVICE signaling is ignored and we assume events are always
5565     * signaled when we reach the vkCmdWaitEvents() point.:
5566     * "Command buffers in the submission can include vkCmdWaitEvents commands
5567     *  that wait on events that will not be signaled by earlier commands in the
5568     *  queue. Such events must be signaled by the application using vkSetEvent,
5569     *  and the vkCmdWaitEvents commands that wait upon them must not be inside
5570     *  a render pass instance.
5571     *  The event must be set before the vkCmdWaitEvents command is executed."
5572     */
5573    bool flush_pipeline = false;
5574 
5575    for (uint32_t i = 0; i < eventCount; i++) {
5576       VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
5577 
5578       struct hash_entry *he =
5579          _mesa_hash_table_search(cmdbuf->events.ht, event);
5580       if (he) {
5581          enum dzn_event_state state = (uintptr_t)he->data;
5582          assert(state != DZN_EVENT_STATE_RESET);
5583          flush_pipeline = state == DZN_EVENT_STATE_SET;
5584       }
5585    }
5586 
5587    if (flush_pipeline) {
5588       if (cmdbuf->enhanced_barriers) {
5589          dzn_cmd_buffer_global_barrier(cmdbuf,
5590                                        D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
5591                                        D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON);
5592       } else {
5593          D3D12_RESOURCE_BARRIER barrier = {
5594             .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5595             .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5596             .UAV = {.pResource = NULL },
5597          };
5598 
5599          ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5600       }
5601    }
5602    cmdbuf->vk.base.device->dispatch_table.CmdPipelineBarrier2(
5603       vk_command_buffer_to_handle(&cmdbuf->vk),
5604       pDependencyInfo);
5605 }
5606 
5607 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)5608 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
5609                   VkQueryPool queryPool,
5610                   uint32_t query,
5611                   VkQueryControlFlags flags)
5612 {
5613    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5614    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5615 
5616    struct dzn_cmd_buffer_query_pool_state *state =
5617       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5618    if (!state)
5619       return;
5620 
5621    for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5622       qpool->queries[query + i].type = dzn_query_pool_get_query_type(qpool, flags);
5623 
5624    ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5625 
5626    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, query, cmdbuf->state.multiview.num_views);
5627    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, query, cmdbuf->state.multiview.num_views);
5628 }
5629 
5630 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)5631 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
5632                 VkQueryPool queryPool,
5633                 uint32_t query)
5634 {
5635    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5636    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5637 
5638    struct dzn_cmd_buffer_query_pool_state *state =
5639       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5640    if (!state)
5641       return;
5642 
5643    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5644 
5645    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5646    if (cmdbuf->state.multiview.num_views > 1)
5647       dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5648 }
5649 
5650 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)5651 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
5652                        VkPipelineStageFlags2 stage,
5653                        VkQueryPool queryPool,
5654                        uint32_t query)
5655 {
5656    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5657    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5658 
5659    struct dzn_cmd_buffer_query_pool_state *state =
5660       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5661    if (!state)
5662       return;
5663 
5664    /* Execution barrier so the timestamp gets written after the pipeline flush. */
5665    D3D12_RESOURCE_BARRIER barrier = {
5666       .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5667       .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5668       .UAV = { .pResource = NULL },
5669    };
5670 
5671    ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5672 
5673    for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5674       qpool->queries[query + i].type = D3D12_QUERY_TYPE_TIMESTAMP;
5675    ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5676 
5677    dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5678    if (cmdbuf->state.multiview.num_views > 1)
5679       dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5680 }
5681 
5682 
5683 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)5684 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
5685                       VkQueryPool queryPool,
5686                       uint32_t firstQuery,
5687                       uint32_t queryCount)
5688 {
5689    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5690    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5691    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5692 
5693    struct dzn_cmd_buffer_query_pool_state *state =
5694       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5695 
5696    if (!state)
5697       return;
5698 
5699    uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
5700 
5701    for (uint32_t q = 0; q < queryCount; q += q_step) {
5702       uint32_t q_count = MIN2(queryCount - q, q_step);
5703 
5704       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5705                                         dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5706                                         device->queries.refs,
5707                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5708                                         q_count * sizeof(uint64_t));
5709    }
5710 
5711    q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
5712 
5713    for (uint32_t q = 0; q < queryCount; q += q_step) {
5714       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5715                                         dzn_query_pool_get_result_offset(qpool, firstQuery + q),
5716                                         device->queries.refs,
5717                                         DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5718                                         qpool->query_size);
5719    }
5720 
5721    dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
5722    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
5723    dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, firstQuery, queryCount);
5724 }
5725 
5726 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)5727 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
5728                             VkQueryPool queryPool,
5729                             uint32_t firstQuery,
5730                             uint32_t queryCount,
5731                             VkBuffer dstBuffer,
5732                             VkDeviceSize dstOffset,
5733                             VkDeviceSize stride,
5734                             VkQueryResultFlags flags)
5735 {
5736    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5737    VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5738    VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
5739 
5740    struct dzn_cmd_buffer_query_pool_state *qpstate =
5741       dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5742    if (!qpstate)
5743       return;
5744 
5745    VkResult result =
5746       dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
5747    if (result != VK_SUCCESS)
5748       return;
5749 
5750    bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
5751                    stride == qpool->query_size &&
5752                    !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
5753 #define ALL_STATS \
5754         (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
5755          VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
5756          VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
5757          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
5758          VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
5759          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
5760          VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
5761          VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
5762          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
5763          VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
5764          VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
5765    if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
5766        qpool->pipeline_statistics != ALL_STATS)
5767       raw_copy = false;
5768 #undef ALL_STATS
5769 
5770    if (cmdbuf->enhanced_barriers) {
5771       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
5772          dzn_cmd_buffer_buffer_barrier(cmdbuf, qpool->collect_buffer,
5773                                        D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
5774                                        D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5775       }
5776    } else {
5777       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5778                                                D3D12_RESOURCE_STATE_COPY_DEST,
5779                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5780                                                DZN_QUEUE_TRANSITION_FLUSH);
5781    }
5782 
5783    if (raw_copy) {
5784       ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5785                                         qpool->collect_buffer,
5786                                         dzn_query_pool_get_result_offset(qpool, firstQuery),
5787                                         dzn_query_pool_get_result_size(qpool, queryCount));
5788    } else {
5789       uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
5790 
5791       for (uint32_t q = 0; q < queryCount; q++) {
5792          uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
5793          uint32_t dst_counter_offset = 0;
5794 
5795          if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
5796             for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
5797                if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
5798                   continue;
5799 
5800                ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5801                                                  qpool->collect_buffer,
5802                                                  res_offset + (c * sizeof(uint64_t)),
5803                                                  step);
5804                dst_counter_offset += step;
5805             }
5806          } else {
5807             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5808                                               qpool->collect_buffer,
5809                                               res_offset, step);
5810             dst_counter_offset += step;
5811          }
5812 
5813          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
5814             ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5815                                               qpool->collect_buffer,
5816                                               dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5817                                               step);
5818          }
5819 
5820          dstOffset += stride;
5821       }
5822    }
5823 
5824    if (!cmdbuf->enhanced_barriers) {
5825       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5826                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5827                                                D3D12_RESOURCE_STATE_COPY_DEST,
5828                                                0);
5829    }
5830 }
5831 
5832 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)5833 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
5834                         VkBuffer buffer,
5835                         VkDeviceSize offset)
5836 {
5837    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5838    VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5839 
5840    cmdbuf->state.sysvals.compute.group_count_x = 0;
5841    cmdbuf->state.sysvals.compute.group_count_y = 0;
5842    cmdbuf->state.sysvals.compute.group_count_z = 0;
5843    cmdbuf->state.sysvals.compute.base_group_x = 0;
5844    cmdbuf->state.sysvals.compute.base_group_y = 0;
5845    cmdbuf->state.sysvals.compute.base_group_z = 0;
5846    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
5847       DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5848 
5849    dzn_cmd_buffer_prepare_dispatch(cmdbuf);
5850 
5851    struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
5852       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
5853    ID3D12CommandSignature *cmdsig =
5854       dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
5855 
5856    if (!cmdsig) {
5857       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5858       return;
5859    }
5860 
5861    ID3D12Resource *exec_buf;
5862    VkResult result =
5863       dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
5864                                         DZN_INTERNAL_BUF_DEFAULT,
5865                                         D3D12_RESOURCE_STATE_COPY_DEST,
5866                                         0,
5867                                         &exec_buf, NULL);
5868    if (result != VK_SUCCESS)
5869       return;
5870 
5871    if (cmdbuf->enhanced_barriers) {
5872       dzn_cmd_buffer_buffer_barrier(cmdbuf, buf->res,
5873                                     D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, D3D12_BARRIER_SYNC_COPY,
5874                                     D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5875    } else {
5876       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, buf->res, 0, 1,
5877                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5878                                                D3D12_RESOURCE_STATE_COPY_SOURCE,
5879                                                DZN_QUEUE_TRANSITION_FLUSH);
5880    }
5881 
5882    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
5883                                      buf->res,
5884                                      offset,
5885                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
5886    ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
5887                                      buf->res,
5888                                      offset,
5889                                      sizeof(D3D12_DISPATCH_ARGUMENTS));
5890 
5891    if (cmdbuf->enhanced_barriers) {
5892       dzn_cmd_buffer_buffer_barrier(cmdbuf, exec_buf,
5893                                     D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
5894                                     D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
5895    } else {
5896       dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
5897                                                D3D12_RESOURCE_STATE_COPY_DEST,
5898                                                D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5899                                                DZN_QUEUE_TRANSITION_FLUSH);
5900    }
5901 
5902    ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
5903 }
5904 
5905 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)5906 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
5907                     float lineWidth)
5908 {
5909    assert(lineWidth == 1.0f);
5910 }
5911 
5912 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)5913 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
5914                     float depthBiasConstantFactor,
5915                     float depthBiasClamp,
5916                     float depthBiasSlopeFactor)
5917 {
5918    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5919    struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
5920 
5921    cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
5922    cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
5923    cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
5924    cmdbuf->state.sysvals.gfx.depth_bias = depthBiasConstantFactor;
5925    if (pdev->options16.DynamicDepthBiasSupported)
5926       cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BIAS;
5927    else
5928       cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5929 }
5930 
5931 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])5932 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
5933                          const float blendConstants[4])
5934 {
5935    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5936 
5937    memcpy(cmdbuf->state.blend.constants, blendConstants,
5938           sizeof(cmdbuf->state.blend.constants));
5939    cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5940 }
5941 
5942 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)5943 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
5944                       float minDepthBounds,
5945                       float maxDepthBounds)
5946 {
5947    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5948    struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5949    struct dzn_physical_device *pdev =
5950       container_of(device->vk.physical, struct dzn_physical_device, vk);
5951 
5952    if (pdev->options2.DepthBoundsTestSupported) {
5953       cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
5954       cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
5955       cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5956    }
5957 }
5958 
5959 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)5960 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
5961                              VkStencilFaceFlags faceMask,
5962                              uint32_t compareMask)
5963 {
5964    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5965 
5966    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5967       cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
5968       cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
5969    }
5970 
5971    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5972       cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
5973       cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
5974    }
5975 
5976    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
5977    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5978 }
5979 
5980 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)5981 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
5982                            VkStencilFaceFlags faceMask,
5983                            uint32_t writeMask)
5984 {
5985    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5986 
5987    if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5988       cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
5989       cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
5990    }
5991 
5992    if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5993       cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
5994       cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
5995    }
5996 
5997    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
5998    cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5999 }
6000 
6001 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)6002 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
6003                            VkStencilFaceFlags faceMask,
6004                            uint32_t reference)
6005 {
6006    VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
6007 
6008    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
6009       cmdbuf->state.zsa.stencil_test.front.ref = reference;
6010 
6011    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
6012       cmdbuf->state.zsa.stencil_test.back.ref = reference;
6013 
6014    cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
6015 }
6016