1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "vk_alloc.h"
27 #include "vk_debug_report.h"
28 #include "vk_format.h"
29 #include "vk_util.h"
30
31 #include "dxil_spirv_nir.h"
32
33 static void
dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer * cmdbuf,D3D12_RESOURCE_BARRIER * barriers,uint32_t barrier_count)34 dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
35 D3D12_RESOURCE_BARRIER *barriers,
36 uint32_t barrier_count)
37 {
38 assert(!cmdbuf->enhanced_barriers);
39 uint32_t flush_count = 0;
40 for (uint32_t b = 0; b < barrier_count; b++) {
41 assert(barriers[b].Transition.pResource);
42
43 /* some layouts map to the same states, and NOP-barriers are illegal */
44 if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
45 if (flush_count) {
46 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
47 &barriers[b - flush_count]);
48 flush_count = 0;
49 }
50 } else {
51 flush_count++;
52 }
53 }
54
55 if (flush_count)
56 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
57 &barriers[barrier_count - flush_count]);
58
59 /* Set Before = After so we don't execute the same barrier twice. */
60 for (uint32_t b = 0; b < barrier_count; b++)
61 barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
62 }
63
64 static void
dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count)65 dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
66 ID3D12Resource *res,
67 uint32_t first_subres,
68 uint32_t subres_count)
69 {
70 assert(!cmdbuf->enhanced_barriers);
71 struct hash_entry *he =
72 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
73 D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
74
75 if (!barriers)
76 return;
77
78 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
79 }
80
81 enum dzn_queue_transition_flags {
82 DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
83 DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
84 };
85
86 static VkResult
dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * res,uint32_t first_subres,uint32_t subres_count,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)87 dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
88 ID3D12Resource *res,
89 uint32_t first_subres,
90 uint32_t subres_count,
91 D3D12_RESOURCE_STATES before,
92 D3D12_RESOURCE_STATES after,
93 uint32_t flags)
94 {
95 assert(!cmdbuf->enhanced_barriers);
96 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
97 struct hash_entry *he =
98 _mesa_hash_table_search(cmdbuf->transition_barriers, res);
99 struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
100
101 if (!barriers) {
102 D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
103 D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
104 ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
105 uint32_t barrier_count =
106 fmt_info.PlaneCount *
107 desc.MipLevels * desc.DepthOrArraySize;
108
109 barriers =
110 vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
111 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
112 if (!barriers)
113 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
114
115 he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
116 if (!he)
117 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
118 }
119
120 for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
121 if (!barriers[subres].Transition.pResource) {
122 barriers[subres] = (D3D12_RESOURCE_BARRIER) {
123 .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
124 .Flags = 0,
125 .Transition = {
126 .pResource = res,
127 .Subresource = subres,
128 .StateBefore = before,
129 .StateAfter = after,
130 },
131 };
132 } else {
133 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
134 before = barriers[subres].Transition.StateAfter;
135
136 assert(barriers[subres].Transition.StateAfter == before ||
137 barriers[subres].Transition.StateAfter == after);
138 barriers[subres].Transition.StateAfter = after;
139 }
140 }
141
142 if (flags & DZN_QUEUE_TRANSITION_FLUSH)
143 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
144
145 return VK_SUCCESS;
146 }
147
148 static VkResult
dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,D3D12_RESOURCE_STATES before,D3D12_RESOURCE_STATES after,uint32_t flags)149 dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
150 const struct dzn_image *image,
151 const VkImageSubresourceRange *range,
152 D3D12_RESOURCE_STATES before,
153 D3D12_RESOURCE_STATES after,
154 uint32_t flags)
155 {
156 assert(!cmdbuf->enhanced_barriers);
157 uint32_t first_barrier = 0, barrier_count = 0;
158 VkResult ret = VK_SUCCESS;
159
160 dzn_foreach_aspect(aspect, range->aspectMask) {
161 uint32_t layer_count = dzn_get_layer_count(image, range);
162 uint32_t level_count = dzn_get_level_count(image, range);
163 for (uint32_t layer = 0; layer < layer_count; layer++) {
164 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165 if (!barrier_count) {
166 first_barrier = subres;
167 barrier_count = level_count;
168 continue;
169 } else if (first_barrier + barrier_count == subres) {
170 barrier_count += level_count;
171 continue;
172 }
173
174 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175 first_barrier, barrier_count,
176 before, after, flags);
177 if (ret != VK_SUCCESS)
178 return ret;
179
180 barrier_count = 0;
181 }
182
183 if (barrier_count) {
184 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185 first_barrier, barrier_count,
186 before, after, flags);
187 if (ret != VK_SUCCESS)
188 return ret;
189 }
190 }
191
192 return VK_SUCCESS;
193 }
194
195 static VkResult
dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const VkImageSubresourceRange * range,VkImageLayout old_layout,VkImageLayout new_layout,uint32_t flags)196 dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197 const struct dzn_image *image,
198 const VkImageSubresourceRange *range,
199 VkImageLayout old_layout,
200 VkImageLayout new_layout,
201 uint32_t flags)
202 {
203 assert(!cmdbuf->enhanced_barriers);
204 uint32_t first_barrier = 0, barrier_count = 0;
205 VkResult ret = VK_SUCCESS;
206
207 if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
208 flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
209
210 dzn_foreach_aspect(aspect, range->aspectMask) {
211 D3D12_RESOURCE_STATES after =
212 dzn_image_layout_to_state(image, new_layout, aspect, cmdbuf->type);
213 D3D12_RESOURCE_STATES before =
214 (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
215 old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
216 D3D12_RESOURCE_STATE_COMMON :
217 dzn_image_layout_to_state(image, old_layout, aspect, cmdbuf->type);
218
219 uint32_t layer_count = dzn_get_layer_count(image, range);
220 uint32_t level_count = dzn_get_level_count(image, range);
221 for (uint32_t layer = 0; layer < layer_count; layer++) {
222 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
223 if (!barrier_count) {
224 first_barrier = subres;
225 barrier_count = level_count;
226 continue;
227 } else if (first_barrier + barrier_count == subres) {
228 barrier_count += level_count;
229 continue;
230 }
231
232 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
233 first_barrier, barrier_count,
234 before, after, flags);
235 if (ret != VK_SUCCESS)
236 return ret;
237
238 barrier_count = 0;
239 }
240
241 if (barrier_count) {
242 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
243 first_barrier, barrier_count,
244 before, after, flags);
245 if (ret != VK_SUCCESS)
246 return ret;
247 }
248 }
249
250 return VK_SUCCESS;
251 }
252
253 static void
dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer * cmdbuf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)254 dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer *cmdbuf,
255 D3D12_BARRIER_SYNC sync_before,
256 D3D12_BARRIER_SYNC sync_after,
257 D3D12_BARRIER_ACCESS access_before,
258 D3D12_BARRIER_ACCESS access_after)
259 {
260 assert(cmdbuf->enhanced_barriers);
261 D3D12_GLOBAL_BARRIER global = {
262 .SyncBefore = sync_before,
263 .SyncAfter = sync_after,
264 .AccessBefore = access_before,
265 .AccessAfter = access_after,
266 };
267 D3D12_BARRIER_GROUP group = {
268 .Type = D3D12_BARRIER_TYPE_GLOBAL,
269 .NumBarriers = 1,
270 .pGlobalBarriers = &global,
271 };
272 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
273 }
274
275 static void
dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * buf,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after)276 dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer *cmdbuf,
277 ID3D12Resource *buf,
278 D3D12_BARRIER_SYNC sync_before,
279 D3D12_BARRIER_SYNC sync_after,
280 D3D12_BARRIER_ACCESS access_before,
281 D3D12_BARRIER_ACCESS access_after)
282 {
283 assert(cmdbuf->enhanced_barriers);
284 D3D12_BUFFER_BARRIER buffer = {
285 .SyncBefore = sync_before,
286 .SyncAfter = sync_after,
287 .AccessBefore = access_before,
288 .AccessAfter = access_after,
289 .pResource = buf,
290 .Offset = 0,
291 .Size = UINT64_MAX,
292 };
293 D3D12_BARRIER_GROUP group = {
294 .Type = D3D12_BARRIER_TYPE_BUFFER,
295 .NumBarriers = 1,
296 .pBufferBarriers = &buffer,
297 };
298 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
299 }
300
301 static void
dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync_before,D3D12_BARRIER_SYNC sync_after,D3D12_BARRIER_ACCESS access_before,D3D12_BARRIER_ACCESS access_after,D3D12_BARRIER_LAYOUT layout_before,D3D12_BARRIER_LAYOUT layout_after,const VkImageSubresourceRange * range)302 dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer *cmdbuf,
303 const struct dzn_image *image,
304 D3D12_BARRIER_SYNC sync_before,
305 D3D12_BARRIER_SYNC sync_after,
306 D3D12_BARRIER_ACCESS access_before,
307 D3D12_BARRIER_ACCESS access_after,
308 D3D12_BARRIER_LAYOUT layout_before,
309 D3D12_BARRIER_LAYOUT layout_after,
310 const VkImageSubresourceRange *range)
311 {
312 assert(cmdbuf->enhanced_barriers);
313 uint32_t first_plane = (range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
314 uint32_t plane_count = first_plane == 0 && (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? 2 : 1;
315 D3D12_TEXTURE_BARRIER texture = {
316 .SyncBefore = sync_before,
317 .SyncAfter = sync_after,
318 .AccessBefore = access_before,
319 .AccessAfter = access_after,
320 .LayoutBefore = layout_before,
321 .LayoutAfter = layout_after,
322 .Subresources.FirstArraySlice = range->baseArrayLayer,
323 .Subresources.NumArraySlices = dzn_get_layer_count(image, range),
324 .Subresources.IndexOrFirstMipLevel = range->baseMipLevel,
325 .Subresources.NumMipLevels = dzn_get_level_count(image, range),
326 .Subresources.FirstPlane = first_plane,
327 .Subresources.NumPlanes = plane_count,
328 .pResource = image->res,
329 };
330 D3D12_BARRIER_GROUP group = {
331 .Type = D3D12_BARRIER_TYPE_TEXTURE,
332 .NumBarriers = 1,
333 .pTextureBarriers = &texture,
334 };
335 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group);
336 }
337
338 static D3D12_BARRIER_LAYOUT
dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout current_layout,D3D12_BARRIER_LAYOUT needed_layout,const VkImageSubresourceRange * range)339 dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer *cmdbuf,
340 const struct dzn_image *image,
341 VkImageLayout current_layout,
342 D3D12_BARRIER_LAYOUT needed_layout,
343 const VkImageSubresourceRange *range)
344 {
345 assert(cmdbuf->enhanced_barriers);
346 /* We shouldn't need these fixups on a subresource range which includes depth and stencil,
347 where one is read-only and the other is writable */
348 if (range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
349 assert(current_layout != VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL &&
350 current_layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL);
351 }
352
353 /* Nothing needs to be done for these, the appropriate sync/access was already handled */
354 if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
355 return needed_layout;
356
357 D3D12_BARRIER_LAYOUT current_d3d_layout = dzn_vk_layout_to_d3d_layout(current_layout, cmdbuf->type, range->aspectMask);
358 if (current_d3d_layout != needed_layout) {
359 dzn_cmd_buffer_image_barrier(cmdbuf, image,
360 D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
361 D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON,
362 current_d3d_layout, needed_layout, range);
363 }
364 return current_d3d_layout;
365 }
366
367 static void
dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,D3D12_BARRIER_SYNC sync,D3D12_BARRIER_ACCESS access,D3D12_BARRIER_LAYOUT needed_layout,D3D12_BARRIER_LAYOUT restore_layout,const VkImageSubresourceRange * range)368 dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer *cmdbuf,
369 const struct dzn_image *image,
370 D3D12_BARRIER_SYNC sync,
371 D3D12_BARRIER_ACCESS access,
372 D3D12_BARRIER_LAYOUT needed_layout,
373 D3D12_BARRIER_LAYOUT restore_layout,
374 const VkImageSubresourceRange *range)
375 {
376 if (needed_layout != restore_layout) {
377 dzn_cmd_buffer_image_barrier(cmdbuf, image,
378 sync, D3D12_BARRIER_SYNC_COPY,
379 access, D3D12_BARRIER_ACCESS_COMMON,
380 needed_layout, restore_layout, range);
381 }
382 }
383
384 static void
dzn_cmd_buffer_destroy(struct vk_command_buffer * cbuf)385 dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
386 {
387 if (!cbuf)
388 return;
389
390 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
391
392 if (cmdbuf->cmdlist)
393 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
394
395 if (cmdbuf->cmdlist8)
396 ID3D12GraphicsCommandList8_Release(cmdbuf->cmdlist8);
397
398 if (cmdbuf->cmdlist9)
399 ID3D12GraphicsCommandList9_Release(cmdbuf->cmdlist9);
400
401 if (cmdbuf->cmdalloc)
402 ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
403
404 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
405 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
406 list_del(&res->link);
407 ID3D12Resource_Release(res->res);
408 vk_free(&cbuf->pool->alloc, res);
409 }
410 }
411
412 dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
413 dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
414 dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
415 dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
416 util_dynarray_fini(&cmdbuf->events.signal);
417 util_dynarray_fini(&cmdbuf->queries.reset);
418 util_dynarray_fini(&cmdbuf->queries.signal);
419
420 if (cmdbuf->rtvs.ht) {
421 hash_table_foreach(cmdbuf->rtvs.ht, he)
422 vk_free(&cbuf->pool->alloc, he->data);
423 _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
424 }
425
426 if (cmdbuf->dsvs.ht) {
427 hash_table_foreach(cmdbuf->dsvs.ht, he)
428 vk_free(&cbuf->pool->alloc, he->data);
429 _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
430 }
431
432 if (cmdbuf->events.ht)
433 _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
434
435 if (cmdbuf->queries.ht) {
436 hash_table_foreach(cmdbuf->queries.ht, he) {
437 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
438 util_dynarray_fini(&qpstate->reset);
439 util_dynarray_fini(&qpstate->collect);
440 util_dynarray_fini(&qpstate->signal);
441 util_dynarray_fini(&qpstate->zero);
442 vk_free(&cbuf->pool->alloc, he->data);
443 }
444 _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
445 }
446
447 if (cmdbuf->transition_barriers) {
448 hash_table_foreach(cmdbuf->transition_barriers, he)
449 vk_free(&cbuf->pool->alloc, he->data);
450 _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
451 }
452
453 vk_command_buffer_finish(&cmdbuf->vk);
454 vk_free(&cbuf->pool->alloc, cmdbuf);
455 }
456
457 static void
dzn_cmd_buffer_reset(struct vk_command_buffer * cbuf,VkCommandBufferResetFlags flags)458 dzn_cmd_buffer_reset(struct vk_command_buffer *cbuf, VkCommandBufferResetFlags flags)
459 {
460 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
461
462 /* Reset the state */
463 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
464 cmdbuf->state.multiview.num_views = 1;
465 cmdbuf->state.multiview.view_mask = 1;
466
467 /* TODO: Return resources to the pool */
468 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) {
469 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) {
470 list_del(&res->link);
471 ID3D12Resource_Release(res->res);
472 vk_free(&cmdbuf->vk.pool->alloc, res);
473 }
474 }
475 cmdbuf->cur_upload_buf = NULL;
476
477 util_dynarray_clear(&cmdbuf->events.signal);
478 util_dynarray_clear(&cmdbuf->queries.reset);
479 util_dynarray_clear(&cmdbuf->queries.signal);
480 hash_table_foreach(cmdbuf->rtvs.ht, he)
481 vk_free(&cmdbuf->vk.pool->alloc, he->data);
482 _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
483 cmdbuf->null_rtv.ptr = 0;
484 dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
485 hash_table_foreach(cmdbuf->dsvs.ht, he)
486 vk_free(&cmdbuf->vk.pool->alloc, he->data);
487 _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
488 hash_table_foreach(cmdbuf->queries.ht, he) {
489 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
490 util_dynarray_fini(&qpstate->reset);
491 util_dynarray_fini(&qpstate->collect);
492 util_dynarray_fini(&qpstate->signal);
493 util_dynarray_fini(&qpstate->zero);
494 vk_free(&cmdbuf->vk.pool->alloc, he->data);
495 }
496 _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
497 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
498 hash_table_foreach(cmdbuf->transition_barriers, he)
499 vk_free(&cmdbuf->vk.pool->alloc, he->data);
500 _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
501 dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
502 dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
503 dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
504
505 if (cmdbuf->vk.state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING &&
506 cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
507 ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
508
509 vk_command_buffer_reset(&cmdbuf->vk);
510
511 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
512 ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
513 }
514
515 static uint32_t
dzn_cmd_buffer_rtv_key_hash_function(const void * key)516 dzn_cmd_buffer_rtv_key_hash_function(const void *key)
517 {
518 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
519 }
520
521 static bool
dzn_cmd_buffer_rtv_key_equals_function(const void * a,const void * b)522 dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
523 {
524 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
525 }
526
527 static uint32_t
dzn_cmd_buffer_dsv_key_hash_function(const void * key)528 dzn_cmd_buffer_dsv_key_hash_function(const void *key)
529 {
530 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
531 }
532
533 static bool
dzn_cmd_buffer_dsv_key_equals_function(const void * a,const void * b)534 dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
535 {
536 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
537 }
538
539 static const struct vk_command_buffer_ops cmd_buffer_ops = {
540 .destroy = dzn_cmd_buffer_destroy,
541 .reset = dzn_cmd_buffer_reset,
542 };
543
544 static const D3D12_BARRIER_SYNC cmd_buffer_valid_sync[] = {
545 [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_SYNC_VIDEO_DECODE |
546 D3D12_BARRIER_SYNC_VIDEO_PROCESS |
547 D3D12_BARRIER_SYNC_VIDEO_ENCODE),
548 [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_SYNC_ALL |
549 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
550 D3D12_BARRIER_SYNC_RAYTRACING |
551 D3D12_BARRIER_SYNC_COPY |
552 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT |
553 D3D12_BARRIER_SYNC_PREDICATION |
554 D3D12_BARRIER_SYNC_ALL_SHADING |
555 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING |
556 D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
557 D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW |
558 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
559 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE),
560 [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_SYNC_ALL | D3D12_BARRIER_SYNC_COPY
561 };
562 static const D3D12_BARRIER_ACCESS cmd_buffer_valid_access[] = {
563 [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ |
564 D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE |
565 D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ |
566 D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE |
567 D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ |
568 D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE),
569 [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
570 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
571 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
572 D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
573 D3D12_BARRIER_ACCESS_PREDICATION |
574 D3D12_BARRIER_ACCESS_COPY_DEST |
575 D3D12_BARRIER_ACCESS_COPY_SOURCE |
576 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ |
577 D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE),
578 [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_COPY_DEST,
579 };
580
581 static VkResult
dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo * info,VkCommandBuffer * out)582 dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
583 VkCommandBuffer *out)
584 {
585 VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
586 struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
587 struct dzn_physical_device *pdev =
588 container_of(device->vk.physical, struct dzn_physical_device, vk);
589
590 assert(pool->queue_family_index < pdev->queue_family_count);
591
592 D3D12_COMMAND_LIST_TYPE type =
593 pdev->queue_families[pool->queue_family_index].desc.Type;
594
595 struct dzn_cmd_buffer *cmdbuf =
596 vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
597 if (!cmdbuf)
598 return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
599
600 VkResult result =
601 vk_command_buffer_init(pool, &cmdbuf->vk, &cmd_buffer_ops, info->level);
602 if (result != VK_SUCCESS) {
603 vk_free(&pool->alloc, cmdbuf);
604 return result;
605 }
606
607 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
608 cmdbuf->state.multiview.num_views = 1;
609 cmdbuf->state.multiview.view_mask = 1;
610 for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket)
611 list_inithead(&cmdbuf->internal_bufs[bucket]);
612 util_dynarray_init(&cmdbuf->events.signal, NULL);
613 util_dynarray_init(&cmdbuf->queries.reset, NULL);
614 util_dynarray_init(&cmdbuf->queries.signal, NULL);
615 dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
616 D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
617 false, &pool->alloc);
618 dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
619 D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
620 false, &pool->alloc);
621 dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
622 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
623 true, &pool->alloc);
624 dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
625 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
626 true, &pool->alloc);
627
628 cmdbuf->events.ht =
629 _mesa_pointer_hash_table_create(NULL);
630 cmdbuf->queries.ht =
631 _mesa_pointer_hash_table_create(NULL);
632 cmdbuf->transition_barriers =
633 _mesa_pointer_hash_table_create(NULL);
634 cmdbuf->rtvs.ht =
635 _mesa_hash_table_create(NULL,
636 dzn_cmd_buffer_rtv_key_hash_function,
637 dzn_cmd_buffer_rtv_key_equals_function);
638 cmdbuf->dsvs.ht =
639 _mesa_hash_table_create(NULL,
640 dzn_cmd_buffer_dsv_key_hash_function,
641 dzn_cmd_buffer_dsv_key_equals_function);
642 if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
643 !cmdbuf->transition_barriers ||
644 !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
645 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
646 goto out;
647 }
648
649 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
650 if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
651 &IID_ID3D12CommandAllocator,
652 (void **)&cmdbuf->cmdalloc))) {
653 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
654 goto out;
655 }
656
657 if (FAILED(ID3D12Device4_CreateCommandList1(device->dev, 0, type,
658 D3D12_COMMAND_LIST_FLAG_NONE,
659 &IID_ID3D12GraphicsCommandList1,
660 (void **)&cmdbuf->cmdlist))) {
661 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
662 goto out;
663 }
664
665 (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList8, (void **)&cmdbuf->cmdlist8);
666 (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList9, (void **)&cmdbuf->cmdlist9);
667 }
668
669 cmdbuf->type = type;
670 cmdbuf->valid_sync = cmd_buffer_valid_sync[type];
671 cmdbuf->valid_access = cmd_buffer_valid_access[type];
672 cmdbuf->enhanced_barriers = pdev->options12.EnhancedBarriersSupported;
673
674 out:
675 if (result != VK_SUCCESS)
676 dzn_cmd_buffer_destroy(&cmdbuf->vk);
677 else
678 *out = dzn_cmd_buffer_to_handle(cmdbuf);
679
680 return result;
681 }
682
683 VKAPI_ATTR VkResult VKAPI_CALL
dzn_AllocateCommandBuffers(VkDevice device,const VkCommandBufferAllocateInfo * pAllocateInfo,VkCommandBuffer * pCommandBuffers)684 dzn_AllocateCommandBuffers(VkDevice device,
685 const VkCommandBufferAllocateInfo *pAllocateInfo,
686 VkCommandBuffer *pCommandBuffers)
687 {
688 VK_FROM_HANDLE(dzn_device, dev, device);
689 VkResult result = VK_SUCCESS;
690 uint32_t i;
691
692 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
693 result = dzn_cmd_buffer_create(pAllocateInfo,
694 &pCommandBuffers[i]);
695 if (result != VK_SUCCESS)
696 break;
697 }
698
699 if (result != VK_SUCCESS) {
700 dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
701 i, pCommandBuffers);
702 for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
703 pCommandBuffers[i] = VK_NULL_HANDLE;
704 }
705
706 return result;
707 }
708
709 VKAPI_ATTR VkResult VKAPI_CALL
dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * info)710 dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
711 const VkCommandBufferBeginInfo *info)
712 {
713 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
714 vk_command_buffer_begin(&cmdbuf->vk, info);
715 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
716 ID3D12GraphicsCommandList1_Reset(cmdbuf->cmdlist, cmdbuf->cmdalloc, NULL);
717 return vk_command_buffer_get_record_result(&cmdbuf->vk);
718 }
719
720 static void
dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer * cmdbuf)721 dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
722 {
723 if (vk_command_buffer_has_error(&cmdbuf->vk))
724 goto out;
725
726 hash_table_foreach(cmdbuf->events.ht, he) {
727 enum dzn_event_state state = (uintptr_t)he->data;
728
729 struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET };
730 struct dzn_cmd_event_signal *entry =
731 util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
732
733 if (!entry) {
734 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
735 break;
736 }
737
738 *entry = signal;
739 }
740
741 out:
742 _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
743 }
744
745 static VkResult
dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)746 dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
747 {
748
749 if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
750 return VK_SUCCESS;
751
752 unsigned old_sz = array->size;
753 void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
754 if (!ptr)
755 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
756
757 memset(ptr, 0, array->size - old_sz);
758 return VK_SUCCESS;
759 }
760
761 static bool
dzn_cmd_buffer_dynbitset_test(struct util_dynarray * array,uint32_t bit)762 dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
763 {
764 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
765
766 if (bit < nbits)
767 return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
768
769 return false;
770 }
771
772 static VkResult
dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)773 dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
774 {
775 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
776 if (result != VK_SUCCESS)
777 return result;
778
779 BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
780 return VK_SUCCESS;
781 }
782
783 static void
dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit)784 dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
785 {
786 if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
787 return;
788
789 BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
790 }
791
792 static VkResult
dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)793 dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
794 struct util_dynarray *array,
795 uint32_t bit, uint32_t count)
796 {
797 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
798 if (result != VK_SUCCESS)
799 return result;
800
801 BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
802 return VK_SUCCESS;
803 }
804
805 static void
dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer * cmdbuf,struct util_dynarray * array,uint32_t bit,uint32_t count)806 dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
807 struct util_dynarray *array,
808 uint32_t bit, uint32_t count)
809 {
810 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
811
812 if (!nbits)
813 return;
814
815 uint32_t end = MIN2(bit + count, nbits) - 1;
816
817 while (bit <= end) {
818 uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
819 BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
820 bit += subcount;
821 }
822 }
823
824 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer * cmdbuf)825 dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
826 {
827 struct dzn_cmd_buffer_query_pool_state *state =
828 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
829 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
830 if (!state) {
831 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
832 return NULL;
833 }
834
835 util_dynarray_init(&state->reset, NULL);
836 util_dynarray_init(&state->collect, NULL);
837 util_dynarray_init(&state->signal, NULL);
838 util_dynarray_init(&state->zero, NULL);
839 return state;
840 }
841
842 static void
dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_cmd_buffer_query_pool_state * state)843 dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
844 struct dzn_cmd_buffer_query_pool_state *state)
845 {
846 util_dynarray_fini(&state->reset);
847 util_dynarray_fini(&state->collect);
848 util_dynarray_fini(&state->signal);
849 util_dynarray_fini(&state->zero);
850 vk_free(&cmdbuf->vk.pool->alloc, state);
851 }
852
853 static struct dzn_cmd_buffer_query_pool_state *
dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool)854 dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
855 struct dzn_query_pool *qpool)
856 {
857 struct dzn_cmd_buffer_query_pool_state *state = NULL;
858 struct hash_entry *he =
859 _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
860
861 if (!he) {
862 state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
863 if (!state)
864 return NULL;
865
866 he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
867 if (!he) {
868 dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
869 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
870 return NULL;
871 }
872 } else {
873 state = he->data;
874 }
875
876 return state;
877 }
878
879 static VkResult
dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer * cmdbuf,const struct dzn_query_pool * qpool,struct dzn_cmd_buffer_query_pool_state * state,uint32_t first_query,uint32_t query_count)880 dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
881 const struct dzn_query_pool *qpool,
882 struct dzn_cmd_buffer_query_pool_state *state,
883 uint32_t first_query,
884 uint32_t query_count)
885 {
886 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
887 uint32_t nbits_collect = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
888 uint32_t nbits_zero = util_dynarray_num_elements(&state->zero, BITSET_WORD) * BITSET_WORDBITS;
889 uint32_t start, end;
890
891 if (!nbits_collect && !nbits_zero)
892 return VK_SUCCESS;
893
894 query_count = MIN2(query_count, MAX2(nbits_collect, nbits_zero) - first_query);
895 nbits_collect = MIN2(first_query + query_count, nbits_collect);
896 nbits_zero = MIN2(first_query + query_count, nbits_zero);
897
898 VkResult result =
899 dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
900 if (result != VK_SUCCESS)
901 return result;
902
903 if (cmdbuf->enhanced_barriers) {
904 /* A global barrier is used because both resolve_buffer and collect_buffer might have been
905 * copied from recently, and it's not worth the effort to track whether that's true. */
906 dzn_cmd_buffer_global_barrier(cmdbuf,
907 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
908 D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
909 } else {
910 dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
911 }
912
913 /* Resolve the valid query regions into the resolve buffer */
914 BITSET_WORD *collect =
915 util_dynarray_element(&state->collect, BITSET_WORD, 0);
916
917 for (start = first_query, end = first_query,
918 __bitset_next_range(&start, &end, collect, nbits_collect);
919 start < nbits_collect;
920 __bitset_next_range(&start, &end, collect, nbits_collect)) {
921 ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
922 qpool->heap,
923 qpool->queries[start].type,
924 start, end - start,
925 qpool->resolve_buffer,
926 qpool->query_size * start);
927 }
928
929 /* Zero out sections of the resolve buffer that contain queries for multi-view rendering
930 * for views other than the first one. */
931 BITSET_WORD *zero =
932 util_dynarray_element(&state->zero, BITSET_WORD, 0);
933 const uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
934
935 for (start = first_query, end = first_query,
936 __bitset_next_range(&start, &end, zero, nbits_zero);
937 start < nbits_zero;
938 __bitset_next_range(&start, &end, zero, nbits_zero)) {
939 uint32_t count = end - start;
940
941 for (unsigned i = 0; i < count; i += step) {
942 uint32_t sub_count = MIN2(step, count - i);
943
944 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
945 qpool->resolve_buffer,
946 dzn_query_pool_get_result_offset(qpool, start + i),
947 device->queries.refs,
948 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
949 qpool->query_size * sub_count);
950 }
951 }
952
953 uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
954 uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
955
956 if (cmdbuf->enhanced_barriers) {
957 dzn_cmd_buffer_buffer_barrier(cmdbuf,
958 qpool->resolve_buffer,
959 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
960 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
961 } else {
962 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
963 0, 1,
964 D3D12_RESOURCE_STATE_COPY_DEST,
965 D3D12_RESOURCE_STATE_COPY_SOURCE,
966 DZN_QUEUE_TRANSITION_FLUSH);
967 }
968
969 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
970 qpool->collect_buffer, offset,
971 qpool->resolve_buffer, offset,
972 size);
973
974 struct query_pass_data {
975 struct util_dynarray *dynarray;
976 BITSET_WORD *bitset;
977 uint32_t count;
978 } passes[] = {
979 { &state->collect, collect, nbits_collect },
980 { &state->zero, zero, nbits_zero }
981 };
982 for (uint32_t pass = 0; pass < ARRAY_SIZE(passes); ++pass) {
983 BITSET_WORD *bitset = passes[pass].bitset;
984 uint32_t nbits = passes[pass].count;
985 for (start = first_query, end = first_query,
986 __bitset_next_range(&start, &end, bitset, nbits);
987 start < nbits;
988 __bitset_next_range(&start, &end, bitset, nbits)) {
989 uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
990 uint32_t count = end - start;
991
992 for (unsigned i = 0; i < count; i += step) {
993 uint32_t sub_count = MIN2(step, count - i);
994
995 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
996 qpool->collect_buffer,
997 dzn_query_pool_get_availability_offset(qpool, start + i),
998 device->queries.refs,
999 DZN_QUERY_REFS_ALL_ONES_OFFSET,
1000 sizeof(uint64_t) * sub_count);
1001 }
1002
1003 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
1004 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, passes[pass].dynarray, start, count);
1005 }
1006 }
1007
1008 if (!cmdbuf->enhanced_barriers) {
1009 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
1010 0, 1,
1011 D3D12_RESOURCE_STATE_COPY_SOURCE,
1012 D3D12_RESOURCE_STATE_COPY_DEST,
1013 0);
1014 }
1015 return VK_SUCCESS;
1016 }
1017
1018 static VkResult
dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer * cmdbuf,struct dzn_query_pool * qpool,struct util_dynarray * bitset_array,struct util_dynarray * ops_array)1019 dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
1020 struct dzn_query_pool *qpool,
1021 struct util_dynarray *bitset_array,
1022 struct util_dynarray *ops_array)
1023 {
1024 BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
1025 uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
1026 uint32_t start, end;
1027
1028 BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
1029 struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
1030 struct dzn_cmd_buffer_query_range *entry =
1031 util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
1032
1033 if (!entry)
1034 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1035
1036 *entry = range;
1037 }
1038
1039 return VK_SUCCESS;
1040 }
1041
1042 static VkResult
dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer * cmdbuf)1043 dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
1044 {
1045 hash_table_foreach(cmdbuf->queries.ht, he) {
1046 struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
1047 struct dzn_cmd_buffer_query_pool_state *state = he->data;
1048 VkResult result =
1049 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
1050 if (result != VK_SUCCESS)
1051 return result;
1052
1053 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
1054 if (result != VK_SUCCESS)
1055 return result;
1056
1057 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
1058 if (result != VK_SUCCESS)
1059 return result;
1060 }
1061
1062 return VK_SUCCESS;
1063 }
1064
1065 VKAPI_ATTR VkResult VKAPI_CALL
dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)1066 dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
1067 {
1068 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1069
1070 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1071 dzn_cmd_buffer_gather_events(cmdbuf);
1072 dzn_cmd_buffer_gather_queries(cmdbuf);
1073 HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
1074 if (FAILED(hres))
1075 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1076 }
1077
1078 return vk_command_buffer_end(&cmdbuf->vk);
1079 }
1080
1081 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1082 dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
1083 const VkDependencyInfo *info)
1084 {
1085 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1086
1087 bool execution_barrier =
1088 !info->memoryBarrierCount &&
1089 !info->bufferMemoryBarrierCount &&
1090 !info->imageMemoryBarrierCount;
1091
1092 if (execution_barrier) {
1093 /* Execution barrier can be emulated with a NULL UAV barrier (AKA
1094 * pipeline flush). That's the best we can do with the standard D3D12
1095 * barrier API.
1096 */
1097 D3D12_RESOURCE_BARRIER barrier = {
1098 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
1099 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
1100 .UAV = { .pResource = NULL },
1101 };
1102
1103 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1104 }
1105
1106 /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
1107 * Scopes are not taken into account, but that's inherent to the current
1108 * D3D12 barrier API.
1109 */
1110 if (info->memoryBarrierCount) {
1111 D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
1112
1113 barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1114 barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1115 barriers[0].UAV.pResource = NULL;
1116 barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
1117 barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1118 barriers[1].Aliasing.pResourceBefore = NULL;
1119 barriers[1].Aliasing.pResourceAfter = NULL;
1120 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
1121 }
1122
1123 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
1124 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1125 D3D12_RESOURCE_BARRIER barrier = { 0 };
1126
1127 /* UAV are used only for storage buffers, skip all other buffers. */
1128 if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
1129 continue;
1130
1131 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
1132 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
1133 barrier.UAV.pResource = buf->res;
1134 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
1135 }
1136
1137 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
1138 const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
1139 const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
1140 VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
1141
1142 VkImageLayout old_layout = ibarrier->oldLayout;
1143 VkImageLayout new_layout = ibarrier->newLayout;
1144 if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1145 old_layout == VK_IMAGE_LAYOUT_GENERAL &&
1146 (ibarrier->srcAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1147 old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1148 if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
1149 new_layout == VK_IMAGE_LAYOUT_GENERAL &&
1150 (ibarrier->dstAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT))
1151 new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1152 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1153 old_layout,
1154 new_layout,
1155 DZN_QUEUE_TRANSITION_FLUSH);
1156 }
1157 }
1158
1159 /* A straightforward translation of the Vulkan sync flags to D3D sync flags */
1160 static D3D12_BARRIER_SYNC
translate_sync(VkPipelineStageFlags2 flags,bool before)1161 translate_sync(VkPipelineStageFlags2 flags, bool before)
1162 {
1163 if (!before && (flags & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT))
1164 return D3D12_BARRIER_SYNC_ALL;
1165 else if (before && (flags & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
1166 return D3D12_BARRIER_SYNC_ALL;
1167
1168 if (flags & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
1169 /* Theoretically transfer should be less, but it encompasses blit
1170 * (which can be draws) and clears, so bloat it up to everything. */
1171 VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
1172 VK_PIPELINE_STAGE_2_BLIT_BIT))
1173 return D3D12_BARRIER_SYNC_ALL;
1174
1175 D3D12_BARRIER_SYNC ret = D3D12_BARRIER_SYNC_NONE;
1176 if (flags & (VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
1177 VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
1178 VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT))
1179 ret |= D3D12_BARRIER_SYNC_INDEX_INPUT;
1180 if (flags & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT)
1181 ret |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
1182 if (flags & (VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
1183 VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
1184 VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
1185 VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
1186 VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
1187 VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT |
1188 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT))
1189 ret |= D3D12_BARRIER_SYNC_NON_PIXEL_SHADING;
1190 if (flags & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
1191 VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR))
1192 ret |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
1193 if (flags & (VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
1194 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT))
1195 ret |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
1196 if (flags & VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT)
1197 ret |= D3D12_BARRIER_SYNC_RENDER_TARGET;
1198 if (flags & VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)
1199 ret |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
1200 if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
1201 ret |= D3D12_BARRIER_SYNC_DRAW;
1202 if (flags & VK_PIPELINE_STAGE_2_COPY_BIT)
1203 ret |= D3D12_BARRIER_SYNC_COPY;
1204 if (flags & VK_PIPELINE_STAGE_2_RESOLVE_BIT)
1205 ret |= D3D12_BARRIER_SYNC_RESOLVE;
1206 if (flags & VK_PIPELINE_STAGE_2_CLEAR_BIT)
1207 ret |= D3D12_BARRIER_SYNC_RENDER_TARGET |
1208 D3D12_BARRIER_SYNC_DEPTH_STENCIL |
1209 D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
1210 if (flags & VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
1211 ret |= D3D12_BARRIER_SYNC_PREDICATION;
1212 if (flags & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
1213 VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
1214 ret |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
1215 if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)
1216 ret |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE;
1217 if (flags & VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR)
1218 ret |= D3D12_BARRIER_SYNC_RAYTRACING;
1219 if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR)
1220 ret |= D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE;
1221
1222 return ret;
1223 }
1224
1225 /* A straightforward translation of Vulkan access to D3D access */
1226 static D3D12_BARRIER_ACCESS
translate_access(VkAccessFlags2 flags)1227 translate_access(VkAccessFlags2 flags)
1228 {
1229 D3D12_BARRIER_ACCESS ret = D3D12_BARRIER_ACCESS_COMMON;
1230 if (flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
1231 ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
1232 if (flags & VK_ACCESS_2_INDEX_READ_BIT)
1233 ret |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
1234 if (flags & VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT)
1235 ret |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
1236 if (flags & VK_ACCESS_2_UNIFORM_READ_BIT)
1237 ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
1238 if (flags & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
1239 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
1240 ret |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1241 if (flags & VK_ACCESS_2_SHADER_READ_BIT)
1242 ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1243 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1244 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1245 if (flags & (VK_ACCESS_2_SHADER_WRITE_BIT |
1246 VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1247 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT))
1248 ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1249 if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT)
1250 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1251 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1252 if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT)
1253 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1254 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1255 if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT)
1256 ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1257 if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)
1258 ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1259 if (flags & VK_ACCESS_2_TRANSFER_READ_BIT)
1260 ret |= D3D12_BARRIER_ACCESS_COPY_SOURCE |
1261 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1262 if (flags & VK_ACCESS_2_TRANSFER_WRITE_BIT)
1263 ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET |
1264 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1265 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1266 D3D12_BARRIER_ACCESS_COPY_DEST |
1267 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1268 if (flags & VK_ACCESS_2_MEMORY_READ_BIT)
1269 ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT |
1270 D3D12_BARRIER_ACCESS_INDEX_BUFFER |
1271 D3D12_BARRIER_ACCESS_VERTEX_BUFFER |
1272 D3D12_BARRIER_ACCESS_CONSTANT_BUFFER |
1273 D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1274 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1275 D3D12_BARRIER_ACCESS_RENDER_TARGET |
1276 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1277 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1278 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1279 if (flags & VK_ACCESS_2_MEMORY_WRITE_BIT)
1280 ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS |
1281 D3D12_BARRIER_ACCESS_RENDER_TARGET |
1282 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE |
1283 D3D12_BARRIER_ACCESS_COPY_DEST |
1284 D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1285 if (flags & (VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
1286 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
1287 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT))
1288 ret |= D3D12_BARRIER_ACCESS_STREAM_OUTPUT;
1289 if (flags & VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)
1290 ret |= D3D12_BARRIER_ACCESS_PREDICATION;
1291 if (flags & VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)
1292 ret |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1293 if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR)
1294 ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ;
1295 if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
1296 ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE;
1297 return ret;
1298 }
1299
1300 /* For texture barriers, D3D will validate that the access flags used are actually
1301 * things that were valid for the specified layout. Use the mask returned from here
1302 * to scope down the set of app-provided access flags to make validation happy. */
1303 static D3D12_BARRIER_ACCESS
valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)1304 valid_access_for_layout(D3D12_BARRIER_LAYOUT layout)
1305 {
1306 switch (layout) {
1307 case D3D12_BARRIER_LAYOUT_UNDEFINED:
1308 return D3D12_BARRIER_ACCESS_NO_ACCESS;
1309 case D3D12_BARRIER_LAYOUT_COMMON:
1310 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1311 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1312 D3D12_BARRIER_ACCESS_COPY_DEST;
1313 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON:
1314 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON:
1315 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1316 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1317 D3D12_BARRIER_ACCESS_COPY_DEST |
1318 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1319 case D3D12_BARRIER_LAYOUT_GENERIC_READ:
1320 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ:
1321 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE |
1322 D3D12_BARRIER_ACCESS_COPY_SOURCE |
1323 D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ |
1324 D3D12_BARRIER_ACCESS_RESOLVE_SOURCE |
1325 D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1326 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ:
1327 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE|
1328 D3D12_BARRIER_ACCESS_COPY_SOURCE;
1329 case D3D12_BARRIER_LAYOUT_RENDER_TARGET:
1330 return D3D12_BARRIER_ACCESS_RENDER_TARGET;
1331 case D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS:
1332 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS:
1333 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS:
1334 return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
1335 case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE:
1336 return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1337 case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ:
1338 return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
1339 case D3D12_BARRIER_LAYOUT_SHADER_RESOURCE:
1340 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE:
1341 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE:
1342 return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
1343 case D3D12_BARRIER_LAYOUT_COPY_SOURCE:
1344 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE:
1345 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE:
1346 return D3D12_BARRIER_ACCESS_COPY_SOURCE;
1347 case D3D12_BARRIER_LAYOUT_COPY_DEST:
1348 case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST:
1349 case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST:
1350 return D3D12_BARRIER_ACCESS_COPY_DEST;
1351 case D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE:
1352 return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
1353 case D3D12_BARRIER_LAYOUT_RESOLVE_DEST:
1354 return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
1355 case D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE:
1356 return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
1357 default:
1358 return D3D12_BARRIER_ACCESS_COMMON;
1359 }
1360 }
1361
1362 /* Similar to layout -> access, there's access -> sync validation too. D3D
1363 * doesn't like over-synchronizing if you weren't accessing a resource through
1364 * a relevant access bit. */
1365 static D3D12_BARRIER_SYNC
adjust_sync_for_access(D3D12_BARRIER_SYNC in,D3D12_BARRIER_ACCESS access)1366 adjust_sync_for_access(D3D12_BARRIER_SYNC in, D3D12_BARRIER_ACCESS access)
1367 {
1368 /* NO_ACCESS must not add sync */
1369 if (access == D3D12_BARRIER_ACCESS_NO_ACCESS)
1370 return D3D12_BARRIER_SYNC_NONE;
1371 /* SYNC_ALL can be used with any access bits */
1372 if (in == D3D12_BARRIER_SYNC_ALL)
1373 return in;
1374 /* ACCESS_COMMON needs at least one sync bit */
1375 if (access == D3D12_BARRIER_ACCESS_COMMON)
1376 return in == D3D12_BARRIER_SYNC_NONE ? D3D12_BARRIER_SYNC_ALL : in;
1377
1378 D3D12_BARRIER_SYNC out = D3D12_BARRIER_SYNC_NONE;
1379 if (access & D3D12_BARRIER_ACCESS_VERTEX_BUFFER)
1380 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1381 D3D12_BARRIER_SYNC_DRAW |
1382 D3D12_BARRIER_SYNC_ALL_SHADING |
1383 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1384 if (access & D3D12_BARRIER_ACCESS_CONSTANT_BUFFER)
1385 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1386 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1387 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1388 D3D12_BARRIER_SYNC_DRAW |
1389 D3D12_BARRIER_SYNC_ALL_SHADING |
1390 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1391 if (access & D3D12_BARRIER_ACCESS_INDEX_BUFFER)
1392 out |= in & D3D12_BARRIER_SYNC_INDEX_INPUT;
1393 if (access & D3D12_BARRIER_ACCESS_RENDER_TARGET)
1394 out |= in & D3D12_BARRIER_SYNC_RENDER_TARGET;
1395 if (access & D3D12_BARRIER_ACCESS_UNORDERED_ACCESS)
1396 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1397 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1398 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1399 D3D12_BARRIER_SYNC_DRAW |
1400 D3D12_BARRIER_SYNC_ALL_SHADING |
1401 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1402 if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE)
1403 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1404 D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1405 if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ)
1406 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1407 D3D12_BARRIER_SYNC_DEPTH_STENCIL);
1408 if (access & D3D12_BARRIER_ACCESS_SHADER_RESOURCE)
1409 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1410 D3D12_BARRIER_SYNC_PIXEL_SHADING |
1411 D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1412 D3D12_BARRIER_SYNC_DRAW |
1413 D3D12_BARRIER_SYNC_ALL_SHADING |
1414 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1415 if (access & D3D12_BARRIER_ACCESS_STREAM_OUTPUT)
1416 out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING |
1417 D3D12_BARRIER_SYNC_DRAW |
1418 D3D12_BARRIER_SYNC_ALL_SHADING |
1419 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1420 if (access & D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT)
1421 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1422 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1423 if (access & D3D12_BARRIER_ACCESS_PREDICATION)
1424 out |= in & (D3D12_BARRIER_SYNC_DRAW |
1425 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT);
1426 if (access & (D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE))
1427 out |= in & D3D12_BARRIER_SYNC_COPY;
1428 if (access & (D3D12_BARRIER_ACCESS_RESOLVE_DEST | D3D12_BARRIER_ACCESS_RESOLVE_SOURCE))
1429 out |= in & D3D12_BARRIER_SYNC_RESOLVE;
1430 if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ)
1431 out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1432 D3D12_BARRIER_SYNC_RAYTRACING |
1433 D3D12_BARRIER_SYNC_ALL_SHADING |
1434 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1435 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1436 D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO |
1437 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1438 if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE)
1439 out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING |
1440 D3D12_BARRIER_SYNC_RAYTRACING |
1441 D3D12_BARRIER_SYNC_ALL_SHADING |
1442 D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE |
1443 D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE |
1444 D3D12_BARRIER_SYNC_NON_PIXEL_SHADING);
1445 if (access & D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE)
1446 out |= in & (D3D12_BARRIER_SYNC_PIXEL_SHADING |
1447 D3D12_BARRIER_SYNC_ALL_SHADING);
1448 /* SYNC_NONE means it won't be accessed, so if we can't express the app's original intent
1449 * here, then be conservative and over-sync. */
1450 return out ? out : D3D12_BARRIER_SYNC_ALL;
1451 }
1452
1453 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,const VkDependencyInfo * info)1454 dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer,
1455 const VkDependencyInfo *info)
1456 {
1457 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
1458
1459 uint32_t num_barrier_groups = 0;
1460 D3D12_BARRIER_GROUP groups[3];
1461
1462 /* Some input image barriers will expand into 2 outputs, and some will turn into buffer barriers.
1463 * Do a first pass and count how much we need to allocate. */
1464 uint32_t num_image_barriers = 0;
1465 uint32_t num_buffer_barriers = info->bufferMemoryBarrierCount;
1466 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1467 VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1468 bool need_separate_aspect_barriers =
1469 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1470 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1471 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1472 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1473 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1474 ++num_buffer_barriers;
1475 else
1476 num_image_barriers += need_separate_aspect_barriers ? 2 : 1;
1477 }
1478
1479 VK_MULTIALLOC(ma);
1480 VK_MULTIALLOC_DECL(&ma, D3D12_GLOBAL_BARRIER, global_barriers, info->memoryBarrierCount);
1481 VK_MULTIALLOC_DECL(&ma, D3D12_BUFFER_BARRIER, buffer_barriers, num_buffer_barriers);
1482 VK_MULTIALLOC_DECL(&ma, D3D12_TEXTURE_BARRIER, texture_barriers, num_image_barriers);
1483
1484 if (ma.size == 0)
1485 return;
1486
1487 if (!vk_multialloc_alloc(&ma, &cmdbuf->vk.pool->alloc,
1488 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
1489 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
1490 return;
1491 }
1492
1493 if (info->memoryBarrierCount) {
1494 groups[num_barrier_groups].NumBarriers = info->memoryBarrierCount;
1495 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_GLOBAL;
1496 groups[num_barrier_groups].pGlobalBarriers = global_barriers;
1497 ++num_barrier_groups;
1498 for (uint32_t i = 0; i < info->memoryBarrierCount; ++i) {
1499 global_barriers[i].SyncBefore = translate_sync(info->pMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1500 global_barriers[i].SyncAfter = translate_sync(info->pMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1501 global_barriers[i].AccessBefore = global_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1502 D3D12_BARRIER_ACCESS_NO_ACCESS :
1503 translate_access(info->pMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access;
1504 global_barriers[i].AccessAfter = global_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1505 D3D12_BARRIER_ACCESS_NO_ACCESS :
1506 translate_access(info->pMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access;
1507
1508 if ((global_barriers[i].AccessBefore & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) &&
1509 (global_barriers[i].AccessAfter == D3D12_BARRIER_ACCESS_COMMON ||
1510 global_barriers[i].AccessAfter & ~(D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ))) {
1511 /* D3D validates against a global barrier attempting to transition from depth write to something other than depth write,
1512 * but this is a D3D bug; it's absolutely valid to use a global barrier to transition *multiple* types of accesses.
1513 * The validation does say that you'd need an image barrier to actually get that kind of transition, which is still correct,
1514 * so just remove this bit under the assumption that a dedicated image barrier will be submitted to do any necessary work later. */
1515 global_barriers[i].AccessBefore &= ~D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
1516 }
1517 if (global_barriers[i].AccessBefore == D3D12_BARRIER_ACCESS_COMMON)
1518 global_barriers[i].AccessAfter = D3D12_BARRIER_ACCESS_COMMON;
1519 global_barriers[i].SyncBefore = adjust_sync_for_access(global_barriers[i].SyncBefore, global_barriers[i].AccessBefore);
1520 global_barriers[i].SyncAfter = adjust_sync_for_access(global_barriers[i].SyncAfter, global_barriers[i].AccessAfter);
1521 }
1522 }
1523
1524 if (num_buffer_barriers) {
1525 groups[num_barrier_groups].NumBarriers = num_buffer_barriers;
1526 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_BUFFER;
1527 groups[num_barrier_groups].pBufferBarriers = buffer_barriers;
1528 ++num_barrier_groups;
1529 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; ++i) {
1530 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
1531 buffer_barriers[i].SyncBefore = translate_sync(info->pBufferMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1532 buffer_barriers[i].SyncAfter = translate_sync(info->pBufferMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1533 buffer_barriers[i].AccessBefore = buffer_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1534 D3D12_BARRIER_ACCESS_NO_ACCESS :
1535 translate_access(info->pBufferMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & buf->valid_access;
1536 buffer_barriers[i].AccessAfter = buffer_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1537 D3D12_BARRIER_ACCESS_NO_ACCESS :
1538 translate_access(info->pBufferMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & buf->valid_access;
1539 buffer_barriers[i].SyncBefore = adjust_sync_for_access(buffer_barriers[i].SyncBefore, buffer_barriers[i].AccessBefore);
1540 buffer_barriers[i].SyncAfter = adjust_sync_for_access(buffer_barriers[i].SyncAfter, buffer_barriers[i].AccessAfter);
1541 buffer_barriers[i].pResource = buf->res;
1542 buffer_barriers[i].Offset = 0;
1543 buffer_barriers[i].Size = UINT64_MAX;
1544 }
1545 }
1546
1547 if (num_image_barriers) {
1548 groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_TEXTURE;
1549 groups[num_barrier_groups].pTextureBarriers = texture_barriers;
1550 groups[num_barrier_groups].NumBarriers = num_image_barriers;
1551 ++num_barrier_groups;
1552 }
1553
1554 uint32_t tbar = 0;
1555 uint32_t bbar = info->bufferMemoryBarrierCount;
1556 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) {
1557 VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image);
1558
1559 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) {
1560 /* Barriers on linear images turn into buffer barriers */
1561 buffer_barriers[bbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1562 buffer_barriers[bbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1563 buffer_barriers[bbar].AccessBefore = buffer_barriers[bbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ?
1564 D3D12_BARRIER_ACCESS_NO_ACCESS :
1565 translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & image->valid_access;
1566 buffer_barriers[bbar].AccessAfter = buffer_barriers[bbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1567 D3D12_BARRIER_ACCESS_NO_ACCESS :
1568 translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & image->valid_access;
1569 buffer_barriers[bbar].SyncBefore = adjust_sync_for_access(buffer_barriers[bbar].SyncBefore, buffer_barriers[bbar].AccessBefore);
1570 buffer_barriers[bbar].SyncAfter = adjust_sync_for_access(buffer_barriers[bbar].SyncAfter, buffer_barriers[bbar].AccessAfter);
1571 buffer_barriers[bbar].pResource = image->res;
1572 buffer_barriers[bbar].Offset = 0;
1573 buffer_barriers[bbar].Size = UINT64_MAX;
1574 ++bbar;
1575 continue;
1576 }
1577
1578 const VkImageSubresourceRange *range = &info->pImageMemoryBarriers[i].subresourceRange;
1579 const bool simultaneous_access = image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
1580
1581 bool need_separate_aspect_barriers =
1582 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1583 info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL ||
1584 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
1585 info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL;
1586 uint32_t num_aspects = need_separate_aspect_barriers ? 2 : 1;
1587 VkImageAspectFlags aspect_0_mask = need_separate_aspect_barriers ?
1588 (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) : VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM;
1589 VkImageAspectFlags aspects[] = {
1590 range->aspectMask & aspect_0_mask,
1591 range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT,
1592 };
1593
1594 for (uint32_t aspect_idx = 0; aspect_idx < num_aspects; ++aspect_idx) {
1595 VkImageAspectFlags aspect = aspects[aspect_idx];
1596 texture_barriers[tbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync;
1597 texture_barriers[tbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync;
1598 const bool queue_ownership_transfer = info->pImageMemoryBarriers[i].srcQueueFamilyIndex != info->pImageMemoryBarriers[i].dstQueueFamilyIndex;
1599 D3D12_BARRIER_ACCESS layout_before_valid_access = ~0;
1600 D3D12_BARRIER_ACCESS layout_after_valid_access = ~0;
1601 if (simultaneous_access) {
1602 /* Simultaneous access textures never perform layout transitions, and can do any type of access from COMMON layout */
1603 texture_barriers[tbar].LayoutAfter = texture_barriers[tbar].LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED;
1604 } else if (queue_ownership_transfer) {
1605 /* For an ownership transfer, force the foreign layout to COMMON and the matching sync/access to NONE */
1606 assert(info->pImageMemoryBarriers[i].srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1607 assert(info->pImageMemoryBarriers[i].dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED);
1608 const bool is_release = info->pImageMemoryBarriers[i].srcQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1609 const bool is_acquire = info->pImageMemoryBarriers[i].dstQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index;
1610 assert(is_release ^ is_acquire);
1611 texture_barriers[tbar].LayoutBefore = is_acquire ?
1612 D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1613 texture_barriers[tbar].LayoutAfter = is_release ?
1614 D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1615 if (is_acquire) {
1616 texture_barriers[tbar].SyncBefore = D3D12_BARRIER_SYNC_NONE;
1617 texture_barriers[tbar].AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
1618 layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1619 } else {
1620 texture_barriers[tbar].SyncAfter = D3D12_BARRIER_SYNC_NONE;
1621 texture_barriers[tbar].AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
1622 layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1623 }
1624 } else {
1625 texture_barriers[tbar].LayoutBefore = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect);
1626 texture_barriers[tbar].LayoutAfter = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect);
1627 layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore);
1628 layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter);
1629 }
1630
1631 texture_barriers[tbar].AccessBefore = texture_barriers[tbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ||
1632 texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED ?
1633 D3D12_BARRIER_ACCESS_NO_ACCESS :
1634 translate_access(info->pImageMemoryBarriers[i].srcAccessMask) &
1635 cmdbuf->valid_access & image->valid_access & layout_before_valid_access;
1636 texture_barriers[tbar].AccessAfter = texture_barriers[tbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ?
1637 D3D12_BARRIER_ACCESS_NO_ACCESS :
1638 translate_access(info->pImageMemoryBarriers[i].dstAccessMask) &
1639 cmdbuf->valid_access & image->valid_access & layout_after_valid_access;
1640
1641 texture_barriers[tbar].SyncBefore = adjust_sync_for_access(texture_barriers[tbar].SyncBefore, texture_barriers[tbar].AccessBefore);
1642 texture_barriers[tbar].SyncAfter = adjust_sync_for_access(texture_barriers[tbar].SyncAfter, texture_barriers[tbar].AccessAfter);
1643 texture_barriers[tbar].Subresources.FirstArraySlice = range->baseArrayLayer;
1644 texture_barriers[tbar].Subresources.NumArraySlices = dzn_get_layer_count(image, range);
1645 texture_barriers[tbar].Subresources.IndexOrFirstMipLevel = range->baseMipLevel;
1646 texture_barriers[tbar].Subresources.NumMipLevels = dzn_get_level_count(image, range);
1647 texture_barriers[tbar].Subresources.FirstPlane = aspect_idx;
1648 texture_barriers[tbar].Subresources.NumPlanes = util_bitcount(aspect);
1649 texture_barriers[tbar].pResource = image->res;
1650 texture_barriers[tbar].Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE;
1651 if (texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED)
1652 texture_barriers[tbar].Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
1653 ++tbar;
1654 }
1655 }
1656 assert(bbar == num_buffer_barriers);
1657 assert(tbar == num_image_barriers);
1658
1659 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, num_barrier_groups, groups);
1660
1661 vk_free(&cmdbuf->vk.pool->alloc, global_barriers);
1662 }
1663
1664 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_DEPTH_STENCIL_VIEW_DESC * desc)1665 dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
1666 const struct dzn_image *image,
1667 const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
1668 {
1669 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1670 struct dzn_cmd_buffer_dsv_key key = { image, *desc };
1671 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
1672 struct dzn_cmd_buffer_dsv_entry *dsve;
1673
1674 if (!he) {
1675 struct dzn_descriptor_heap *heap;
1676 uint32_t slot;
1677
1678 // TODO: error handling
1679 dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
1680 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1681 dsve->key = key;
1682 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
1683 dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1684 ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
1685 _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
1686 } else {
1687 dsve = he->data;
1688 }
1689
1690 return dsve->handle;
1691 }
1692
1693 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,const D3D12_RENDER_TARGET_VIEW_DESC * desc)1694 dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1695 const struct dzn_image *image,
1696 const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1697 {
1698 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1699 struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1700 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1701 struct dzn_cmd_buffer_rtv_entry *rtve;
1702
1703 if (!he) {
1704 struct dzn_descriptor_heap *heap;
1705 uint32_t slot;
1706
1707 // TODO: error handling
1708 rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1709 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1710 rtve->key = key;
1711 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1712 rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1713 ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1714 he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1715 } else {
1716 rtve = he->data;
1717 }
1718
1719 return rtve->handle;
1720 }
1721
1722 static D3D12_CPU_DESCRIPTOR_HANDLE
dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer * cmdbuf)1723 dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1724 {
1725 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1726
1727 if (!cmdbuf->null_rtv.ptr) {
1728 struct dzn_descriptor_heap *heap;
1729 uint32_t slot;
1730 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1731 cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1732
1733 D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1734 desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1735 desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1736 desc.Texture2D.MipSlice = 0;
1737 desc.Texture2D.PlaneSlice = 0;
1738
1739 ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1740 }
1741
1742 return cmdbuf->null_rtv;
1743 }
1744
1745 static D3D12_HEAP_TYPE
heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)1746 heap_type_for_bucket(enum dzn_internal_buf_bucket bucket)
1747 {
1748 switch (bucket) {
1749 case DZN_INTERNAL_BUF_UPLOAD: return D3D12_HEAP_TYPE_UPLOAD;
1750 case DZN_INTERNAL_BUF_DEFAULT: return D3D12_HEAP_TYPE_DEFAULT;
1751 default: unreachable("Invalid value");
1752 }
1753 }
1754
1755 static VkResult
dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer * cmdbuf,uint32_t size,enum dzn_internal_buf_bucket bucket,D3D12_RESOURCE_STATES init_state,uint64_t align,ID3D12Resource ** out,uint64_t * offset)1756 dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1757 uint32_t size,
1758 enum dzn_internal_buf_bucket bucket,
1759 D3D12_RESOURCE_STATES init_state,
1760 uint64_t align,
1761 ID3D12Resource **out,
1762 uint64_t *offset)
1763 {
1764 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1765 ID3D12Resource *res;
1766 *out = NULL;
1767 D3D12_HEAP_TYPE heap_type = heap_type_for_bucket(bucket);
1768
1769 if (bucket == DZN_INTERNAL_BUF_UPLOAD && cmdbuf->cur_upload_buf) {
1770 uint64_t new_offset = ALIGN_POT(cmdbuf->cur_upload_buf_offset, align);
1771 if (cmdbuf->cur_upload_buf->size >= size + new_offset) {
1772 cmdbuf->cur_upload_buf_offset = new_offset + size;
1773 *out = cmdbuf->cur_upload_buf->res;
1774 *offset = new_offset;
1775 return VK_SUCCESS;
1776 }
1777 cmdbuf->cur_upload_buf = NULL;
1778 cmdbuf->cur_upload_buf_offset = 0;
1779 }
1780
1781 uint32_t alloc_size = size;
1782 if (bucket == DZN_INTERNAL_BUF_UPLOAD)
1783 /* Walk through a 4MB upload buffer */
1784 alloc_size = ALIGN_POT(size, 4 * 1024 * 1024);
1785 else
1786 /* Align size on 64k (the default alignment) */
1787 alloc_size = ALIGN_POT(size, 64 * 1024);
1788
1789 D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_type);
1790 D3D12_RESOURCE_DESC rdesc = {
1791 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1792 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1793 .Width = alloc_size,
1794 .Height = 1,
1795 .DepthOrArraySize = 1,
1796 .MipLevels = 1,
1797 .Format = DXGI_FORMAT_UNKNOWN,
1798 .SampleDesc = { .Count = 1, .Quality = 0 },
1799 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1800 .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1801 };
1802
1803 HRESULT hres =
1804 ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1805 D3D12_HEAP_FLAG_NONE, &rdesc,
1806 init_state, NULL,
1807 &IID_ID3D12Resource,
1808 (void **)&res);
1809 if (FAILED(hres)) {
1810 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1811 }
1812
1813 struct dzn_internal_resource *entry =
1814 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1815 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1816 if (!entry) {
1817 ID3D12Resource_Release(res);
1818 return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1819 }
1820
1821 entry->res = res;
1822 entry->size = alloc_size;
1823 list_addtail(&entry->link, &cmdbuf->internal_bufs[bucket]);
1824 *out = entry->res;
1825 if (offset)
1826 *offset = 0;
1827 if (bucket == DZN_INTERNAL_BUF_UPLOAD) {
1828 cmdbuf->cur_upload_buf = entry;
1829 cmdbuf->cur_upload_buf_offset = size;
1830 }
1831 return VK_SUCCESS;
1832 }
1833
1834 static void
dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,const VkImageSubresourceRange * range,uint32_t rect_count,D3D12_RECT * rects)1835 dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1836 const struct dzn_image *image,
1837 VkImageLayout layout,
1838 const VkClearColorValue *color,
1839 const VkImageSubresourceRange *range,
1840 uint32_t rect_count, D3D12_RECT *rects)
1841 {
1842 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1843 uint32_t blksize = util_format_get_blocksize(pfmt);
1844 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1845 uint32_t raw[4] = { 0 };
1846
1847 assert(blksize <= sizeof(raw));
1848 assert(!(sizeof(buf) % blksize));
1849
1850 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1851
1852 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1853 while (fill_step % blksize)
1854 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1855
1856 uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1857 uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1858 uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1859 uint32_t res_size = max_h * row_pitch;
1860
1861 assert(fill_step <= sizeof(buf));
1862
1863 for (uint32_t i = 0; i < fill_step; i += blksize)
1864 memcpy(&buf[i], raw, blksize);
1865
1866 ID3D12Resource *src_res;
1867 uint64_t src_offset;
1868
1869 VkResult result =
1870 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1871 DZN_INTERNAL_BUF_UPLOAD,
1872 D3D12_RESOURCE_STATE_GENERIC_READ,
1873 D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
1874 &src_res,
1875 &src_offset);
1876 if (result != VK_SUCCESS)
1877 return;
1878
1879 assert(!(res_size % fill_step));
1880
1881 uint8_t *cpu_ptr;
1882 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1883 cpu_ptr += src_offset;
1884 for (uint32_t i = 0; i < res_size; i += fill_step)
1885 memcpy(&cpu_ptr[i], buf, fill_step);
1886
1887 ID3D12Resource_Unmap(src_res, 0, NULL);
1888
1889 D3D12_TEXTURE_COPY_LOCATION src_loc = {
1890 .pResource = src_res,
1891 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1892 .PlacedFootprint = {
1893 .Offset = src_offset,
1894 .Footprint = {
1895 .Width = max_w,
1896 .Height = max_h,
1897 .Depth = 1,
1898 .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1899 },
1900 },
1901 };
1902
1903 if (!cmdbuf->enhanced_barriers) {
1904 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1905 layout,
1906 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1907 DZN_QUEUE_TRANSITION_FLUSH);
1908 }
1909
1910 assert(dzn_get_level_count(image, range) == 1);
1911 uint32_t layer_count = dzn_get_layer_count(image, range);
1912
1913 dzn_foreach_aspect(aspect, range->aspectMask) {
1914 VkImageSubresourceLayers subres = {
1915 .aspectMask = (VkImageAspectFlags)aspect,
1916 .mipLevel = range->baseMipLevel,
1917 .baseArrayLayer = range->baseArrayLayer,
1918 .layerCount = layer_count,
1919 };
1920
1921 for (uint32_t layer = 0; layer < layer_count; layer++) {
1922 D3D12_TEXTURE_COPY_LOCATION dst_loc =
1923 dzn_image_get_copy_loc(image, &subres, aspect, layer);
1924
1925 src_loc.PlacedFootprint.Footprint.Format =
1926 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1927 dst_loc.PlacedFootprint.Footprint.Format :
1928 image->desc.Format;
1929
1930 for (uint32_t r = 0; r < rect_count; r++) {
1931 D3D12_BOX src_box = {
1932 .left = 0,
1933 .top = 0,
1934 .front = 0,
1935 .right = (UINT)(rects[r].right - rects[r].left),
1936 .bottom = (UINT)(rects[r].bottom - rects[r].top),
1937 .back = 1,
1938 };
1939
1940 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1941 &dst_loc,
1942 rects[r].left,
1943 rects[r].top, 0,
1944 &src_loc,
1945 &src_box);
1946 }
1947 }
1948 }
1949
1950 if (!cmdbuf->enhanced_barriers) {
1951 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1952 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1953 layout,
1954 DZN_QUEUE_TRANSITION_FLUSH);
1955 }
1956 }
1957
1958 static VkClearColorValue
adjust_clear_color(struct dzn_physical_device * pdev,VkFormat format,const VkClearColorValue * col)1959 adjust_clear_color(struct dzn_physical_device *pdev,
1960 VkFormat format, const VkClearColorValue *col)
1961 {
1962 VkClearColorValue out = *col;
1963
1964 // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1965 // manually where it matters, like here, in the clear path.
1966 if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1967 if (pdev->support_a4b4g4r4) {
1968 DZN_SWAP(float, out.float32[0], out.float32[2]);
1969 } else {
1970 DZN_SWAP(float, out.float32[0], out.float32[1]);
1971 DZN_SWAP(float, out.float32[2], out.float32[3]);
1972 }
1973 }
1974
1975 return out;
1976 }
1977
1978 static void
dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * color,uint32_t range_count,const VkImageSubresourceRange * ranges)1979 dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1980 const struct dzn_image *image,
1981 VkImageLayout layout,
1982 const VkClearColorValue *color,
1983 uint32_t range_count,
1984 const VkImageSubresourceRange *ranges)
1985 {
1986 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1987 uint32_t blksize = util_format_get_blocksize(pfmt);
1988 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1989 uint32_t raw[4] = { 0 };
1990
1991 assert(blksize <= sizeof(raw));
1992 assert(!(sizeof(buf) % blksize));
1993
1994 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1995
1996 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1997 while (fill_step % blksize)
1998 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1999
2000 uint32_t res_size = 0;
2001 for (uint32_t r = 0; r < range_count; r++) {
2002 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
2003 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
2004 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
2005 uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
2006
2007 res_size = MAX2(res_size, h * d * row_pitch);
2008 }
2009
2010 assert(fill_step <= sizeof(buf));
2011
2012 for (uint32_t i = 0; i < fill_step; i += blksize)
2013 memcpy(&buf[i], raw, blksize);
2014
2015 ID3D12Resource *src_res;
2016 uint64_t src_offset;
2017
2018 VkResult result =
2019 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
2020 DZN_INTERNAL_BUF_UPLOAD,
2021 D3D12_RESOURCE_STATE_GENERIC_READ,
2022 D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT,
2023 &src_res,
2024 &src_offset);
2025 if (result != VK_SUCCESS)
2026 return;
2027
2028 assert(!(res_size % fill_step));
2029
2030 uint8_t *cpu_ptr;
2031 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
2032 cpu_ptr += src_offset;
2033 for (uint32_t i = 0; i < res_size; i += fill_step)
2034 memcpy(&cpu_ptr[i], buf, fill_step);
2035
2036 ID3D12Resource_Unmap(src_res, 0, NULL);
2037
2038 D3D12_TEXTURE_COPY_LOCATION src_loc = {
2039 .pResource = src_res,
2040 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
2041 .PlacedFootprint = {
2042 .Offset = src_offset,
2043 },
2044 };
2045
2046 for (uint32_t r = 0; r < range_count; r++) {
2047 uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
2048 uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
2049
2050 if (!cmdbuf->enhanced_barriers) {
2051 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2052 layout,
2053 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2054 DZN_QUEUE_TRANSITION_FLUSH);
2055 }
2056
2057 dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
2058 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2059 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
2060 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
2061 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
2062 VkImageSubresourceLayers subres = {
2063 .aspectMask = (VkImageAspectFlags)aspect,
2064 .mipLevel = ranges[r].baseMipLevel + lvl,
2065 .baseArrayLayer = ranges[r].baseArrayLayer,
2066 .layerCount = layer_count,
2067 };
2068
2069 for (uint32_t layer = 0; layer < layer_count; layer++) {
2070 D3D12_TEXTURE_COPY_LOCATION dst_loc =
2071 dzn_image_get_copy_loc(image, &subres, aspect, layer);
2072
2073 src_loc.PlacedFootprint.Footprint.Format =
2074 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
2075 dst_loc.PlacedFootprint.Footprint.Format :
2076 image->desc.Format;
2077 src_loc.PlacedFootprint.Footprint.Width = w;
2078 src_loc.PlacedFootprint.Footprint.Height = h;
2079 src_loc.PlacedFootprint.Footprint.Depth = d;
2080 src_loc.PlacedFootprint.Footprint.RowPitch =
2081 ALIGN_NPOT(w * blksize, fill_step);
2082 D3D12_BOX src_box = {
2083 .left = 0,
2084 .top = 0,
2085 .front = 0,
2086 .right = w,
2087 .bottom = h,
2088 .back = d,
2089 };
2090
2091 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
2092 &src_loc, &src_box);
2093
2094 }
2095 }
2096 }
2097
2098 if (!cmdbuf->enhanced_barriers) {
2099 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
2100 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2101 layout,
2102 DZN_QUEUE_TRANSITION_FLUSH);
2103 }
2104 }
2105 }
2106
2107 static void
dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer * cmdbuf,struct dzn_image_view * view,VkImageLayout layout,const VkClearValue * value,VkImageAspectFlags aspects,uint32_t base_layer,uint32_t layer_count,uint32_t rect_count,D3D12_RECT * rects)2108 dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
2109 struct dzn_image_view *view,
2110 VkImageLayout layout,
2111 const VkClearValue *value,
2112 VkImageAspectFlags aspects,
2113 uint32_t base_layer,
2114 uint32_t layer_count,
2115 uint32_t rect_count,
2116 D3D12_RECT *rects)
2117 {
2118 struct dzn_image *image =
2119 container_of(view->vk.image, struct dzn_image, vk);
2120 struct dzn_physical_device *pdev =
2121 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2122
2123 VkImageSubresourceRange range = {
2124 .aspectMask = aspects,
2125 .baseMipLevel = view->vk.base_mip_level,
2126 .levelCount = 1,
2127 .baseArrayLayer = view->vk.base_array_layer + base_layer,
2128 .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
2129 view->vk.layer_count - base_layer : layer_count,
2130 };
2131
2132 layer_count = vk_image_subresource_layer_count(&image->vk, &range);
2133 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2134
2135 if (vk_format_is_depth_or_stencil(view->vk.format)) {
2136 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2137
2138 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2139 flags |= D3D12_CLEAR_FLAG_DEPTH;
2140 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2141 flags |= D3D12_CLEAR_FLAG_STENCIL;
2142
2143 if (flags != 0) {
2144 if (cmdbuf->enhanced_barriers) {
2145 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2146 layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2147 &range);
2148 } else {
2149 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2150 layout,
2151 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2152 DZN_QUEUE_TRANSITION_FLUSH);
2153 }
2154
2155 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
2156 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2157 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
2158 value->depthStencil.depth,
2159 value->depthStencil.stencil,
2160 rect_count, rects);
2161
2162 if (cmdbuf->enhanced_barriers) {
2163 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2164 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2165 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2166 &range);
2167 } else {
2168 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2169 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2170 layout,
2171 DZN_QUEUE_TRANSITION_FLUSH);
2172 }
2173 }
2174 } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2175 VkClearColorValue color = adjust_clear_color(pdev, view->vk.format, &value->color);
2176 bool clear_with_cpy = false;
2177 float vals[4];
2178
2179 if (vk_format_is_sint(view->vk.format)) {
2180 for (uint32_t i = 0; i < 4; i++) {
2181 vals[i] = color.int32[i];
2182 if (color.int32[i] != (int32_t)vals[i]) {
2183 clear_with_cpy = true;
2184 break;
2185 }
2186 }
2187 } else if (vk_format_is_uint(view->vk.format)) {
2188 for (uint32_t i = 0; i < 4; i++) {
2189 vals[i] = color.uint32[i];
2190 if (color.uint32[i] != (uint32_t)vals[i]) {
2191 clear_with_cpy = true;
2192 break;
2193 }
2194 }
2195 } else {
2196 for (uint32_t i = 0; i < 4; i++)
2197 vals[i] = color.float32[i];
2198 }
2199
2200 if (clear_with_cpy) {
2201 dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
2202 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2203 &value->color,
2204 &range, rect_count, rects);
2205 } else {
2206 if (cmdbuf->enhanced_barriers) {
2207 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2208 layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2209 &range);
2210 } else {
2211 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2212 layout,
2213 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2214 DZN_QUEUE_TRANSITION_FLUSH);
2215 }
2216
2217 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
2218 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2219 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
2220
2221 if (cmdbuf->enhanced_barriers) {
2222 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2223 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2224 D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2225 &range);
2226 } else {
2227 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
2228 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2229 layout,
2230 DZN_QUEUE_TRANSITION_FLUSH);
2231 }
2232 }
2233 }
2234 }
2235
2236 static void
dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearColorValue * col,uint32_t range_count,const VkImageSubresourceRange * ranges)2237 dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
2238 const struct dzn_image *image,
2239 VkImageLayout layout,
2240 const VkClearColorValue *col,
2241 uint32_t range_count,
2242 const VkImageSubresourceRange *ranges)
2243 {
2244 struct dzn_physical_device *pdev =
2245 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2246 if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ||
2247 cmdbuf->type != D3D12_COMMAND_LIST_TYPE_DIRECT) {
2248 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2249 return;
2250 }
2251
2252 VkClearColorValue color = adjust_clear_color(pdev, image->vk.format, col);
2253 float clear_vals[4];
2254
2255 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
2256 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2257
2258 if (util_format_is_pure_sint(pfmt)) {
2259 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2260 clear_vals[c] = color.int32[c];
2261 if (color.int32[c] != (int32_t)clear_vals[c]) {
2262 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2263 return;
2264 }
2265 }
2266 } else if (util_format_is_pure_uint(pfmt)) {
2267 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
2268 clear_vals[c] = color.uint32[c];
2269 if (color.uint32[c] != (uint32_t)clear_vals[c]) {
2270 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
2271 return;
2272 }
2273 }
2274 } else {
2275 memcpy(clear_vals, color.float32, sizeof(clear_vals));
2276 }
2277
2278 for (uint32_t r = 0; r < range_count; r++) {
2279 const VkImageSubresourceRange *range = &ranges[r];
2280 uint32_t level_count = dzn_get_level_count(image, range);
2281
2282 if (cmdbuf->enhanced_barriers) {
2283 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2284 layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET,
2285 range);
2286 } else {
2287 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2288 layout,
2289 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2290 DZN_QUEUE_TRANSITION_FLUSH);
2291 }
2292
2293 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2294 VkImageSubresourceRange view_range = *range;
2295
2296 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
2297 view_range.baseArrayLayer = 0;
2298 view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
2299 }
2300
2301 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
2302 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
2303 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
2304 }
2305
2306 if (cmdbuf->enhanced_barriers) {
2307 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2308 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2309 D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout,
2310 range);
2311 } else {
2312 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2313 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2314 layout,
2315 DZN_QUEUE_TRANSITION_FLUSH);
2316 }
2317 }
2318 }
2319
2320 static void
dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * image,VkImageLayout layout,const VkClearDepthStencilValue * zs,uint32_t range_count,const VkImageSubresourceRange * ranges)2321 dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
2322 const struct dzn_image *image,
2323 VkImageLayout layout,
2324 const VkClearDepthStencilValue *zs,
2325 uint32_t range_count,
2326 const VkImageSubresourceRange *ranges)
2327 {
2328 assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
2329
2330 for (uint32_t r = 0; r < range_count; r++) {
2331 const VkImageSubresourceRange *range = &ranges[r];
2332 uint32_t level_count = dzn_get_level_count(image, range);
2333
2334 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
2335 D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON;
2336
2337 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2338 flags |= D3D12_CLEAR_FLAG_DEPTH;
2339 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2340 flags |= D3D12_CLEAR_FLAG_STENCIL;
2341
2342 if (cmdbuf->enhanced_barriers) {
2343 restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image,
2344 layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
2345 range);
2346 } else {
2347 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2348 layout,
2349 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2350 DZN_QUEUE_TRANSITION_FLUSH);
2351 }
2352
2353 for (uint32_t lvl = 0; lvl < level_count; lvl++) {
2354 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
2355 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
2356 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
2357 handle, flags,
2358 zs->depth,
2359 zs->stencil,
2360 0, NULL);
2361 }
2362
2363 if (cmdbuf->enhanced_barriers) {
2364 dzn_cmd_buffer_restore_layout(cmdbuf, image,
2365 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2366 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout,
2367 range);
2368 } else {
2369 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
2370 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
2371 layout,
2372 DZN_QUEUE_TRANSITION_FLUSH);
2373 }
2374 }
2375 }
2376
2377 static void
dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyBufferToImageInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2378 dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
2379 const VkCopyBufferToImageInfo2 *info,
2380 uint32_t r,
2381 VkImageAspectFlagBits aspect,
2382 uint32_t l)
2383 {
2384 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2385 VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
2386 struct dzn_physical_device *pdev =
2387 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2388
2389 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2390
2391 VkBufferImageCopy2 region = info->pRegions[r];
2392 enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
2393 uint32_t blkh = util_format_get_blockheight(pfmt);
2394 uint32_t blkd = util_format_get_blockdepth(pfmt);
2395
2396 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2397 * to not be block aligned if it's reaching the image boundary, offsets still
2398 * have to be aligned. Align the image extent to make D3D12 happy.
2399 */
2400 dzn_image_align_extent(dst_image, ®ion.imageExtent);
2401
2402 D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
2403 dzn_image_get_copy_loc(dst_image, ®ion.imageSubresource, aspect, l);
2404 D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
2405 dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, ®ion, aspect, l);
2406
2407 if (dzn_buffer_supports_region_copy(pdev, &src_buf_loc)) {
2408 /* RowPitch and Offset are properly aligned, we can copy
2409 * the whole thing in one call.
2410 */
2411 D3D12_BOX src_box = {
2412 .left = 0,
2413 .top = 0,
2414 .front = 0,
2415 .right = region.imageExtent.width,
2416 .bottom = region.imageExtent.height,
2417 .back = region.imageExtent.depth,
2418 };
2419
2420 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
2421 region.imageOffset.x,
2422 region.imageOffset.y,
2423 region.imageOffset.z,
2424 &src_buf_loc, &src_box);
2425 return;
2426 }
2427
2428 /* Copy line-by-line if things are not properly aligned. */
2429 D3D12_BOX src_box = {
2430 .top = 0,
2431 .front = 0,
2432 .bottom = blkh,
2433 .back = blkd,
2434 };
2435
2436 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2437 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2438 uint32_t src_x;
2439
2440 D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
2441 dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
2442 ®ion, &src_buf_loc,
2443 y, z, &src_x);
2444
2445 src_box.left = src_x;
2446 src_box.right = src_x + region.imageExtent.width;
2447 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2448 &dst_img_loc,
2449 region.imageOffset.x,
2450 region.imageOffset.y + y,
2451 region.imageOffset.z + z,
2452 &src_buf_line_loc,
2453 &src_box);
2454 }
2455 }
2456 }
2457
2458 static void
dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageToBufferInfo2 * info,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2459 dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
2460 const VkCopyImageToBufferInfo2 *info,
2461 uint32_t r,
2462 VkImageAspectFlagBits aspect,
2463 uint32_t l)
2464 {
2465 VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
2466 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2467 struct dzn_physical_device *pdev =
2468 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2469
2470 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2471
2472 VkBufferImageCopy2 region = info->pRegions[r];
2473 enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
2474 uint32_t blkh = util_format_get_blockheight(pfmt);
2475 uint32_t blkd = util_format_get_blockdepth(pfmt);
2476
2477 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
2478 * to not be block aligned if it's reaching the image boundary, offsets still
2479 * have to be aligned. Align the image extent to make D3D12 happy.
2480 */
2481 dzn_image_align_extent(src_image, ®ion.imageExtent);
2482
2483 D3D12_TEXTURE_COPY_LOCATION src_img_loc =
2484 dzn_image_get_copy_loc(src_image, ®ion.imageSubresource, aspect, l);
2485 D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
2486 dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, ®ion, aspect, l);
2487
2488 if (dzn_buffer_supports_region_copy(pdev, &dst_buf_loc)) {
2489 /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
2490 * the whole thing in one call.
2491 */
2492 D3D12_BOX src_box = {
2493 .left = (UINT)region.imageOffset.x,
2494 .top = (UINT)region.imageOffset.y,
2495 .front = (UINT)region.imageOffset.z,
2496 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2497 .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
2498 .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
2499 };
2500
2501 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
2502 0, 0, 0, &src_img_loc,
2503 &src_box);
2504 return;
2505 }
2506
2507 D3D12_BOX src_box = {
2508 .left = (UINT)region.imageOffset.x,
2509 .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
2510 };
2511
2512 /* Copy line-by-line if things are not properly aligned. */
2513 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
2514 src_box.front = region.imageOffset.z + z;
2515 src_box.back = src_box.front + blkd;
2516
2517 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
2518 uint32_t dst_x;
2519
2520 D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
2521 dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
2522 ®ion, &dst_buf_loc,
2523 y, z, &dst_x);
2524
2525 src_box.top = region.imageOffset.y + y;
2526 src_box.bottom = src_box.top + blkh;
2527
2528 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
2529 &dst_buf_line_loc,
2530 dst_x, 0, 0,
2531 &src_img_loc,
2532 &src_box);
2533 }
2534 }
2535 }
2536
2537 static void
dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer * cmdbuf,const VkCopyImageInfo2 * info,D3D12_RESOURCE_DESC * tmp_desc,D3D12_TEXTURE_COPY_LOCATION * tmp_loc,uint32_t r,VkImageAspectFlagBits aspect,uint32_t l)2538 dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
2539 const VkCopyImageInfo2 *info,
2540 D3D12_RESOURCE_DESC *tmp_desc,
2541 D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
2542 uint32_t r,
2543 VkImageAspectFlagBits aspect,
2544 uint32_t l)
2545 {
2546 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2547 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2548 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2549 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2550
2551 ID3D12Device4 *dev = device->dev;
2552 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
2553
2554 VkImageCopy2 region = info->pRegions[r];
2555 dzn_image_align_extent(src, ®ion.extent);
2556
2557 const VkImageSubresourceLayers *src_subres = ®ion.srcSubresource;
2558 const VkImageSubresourceLayers *dst_subres = ®ion.dstSubresource;
2559 VkFormat src_format =
2560 dzn_image_get_plane_format(src->vk.format, aspect);
2561 VkFormat dst_format =
2562 dzn_image_get_plane_format(dst->vk.format, aspect);
2563
2564 enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
2565 uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
2566 uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
2567 uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
2568 enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
2569 uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
2570 uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
2571 uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
2572 uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
2573 uint32_t depth = region.extent.depth;
2574 uint32_t dst_l = l, src_l = l;
2575
2576 assert(src_subres->aspectMask == dst_subres->aspectMask);
2577
2578 if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
2579 dst->vk.image_type == VK_IMAGE_TYPE_2D) {
2580 assert(src_subres->layerCount == 1);
2581 src_l = 0;
2582 src_z += l;
2583 depth = 1;
2584 } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
2585 dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2586 assert(dst_subres->layerCount == 1);
2587 dst_l = 0;
2588 dst_z += l;
2589 depth = 1;
2590 } else {
2591 assert(src_subres->layerCount == dst_subres->layerCount);
2592 }
2593
2594 D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
2595 D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
2596
2597 D3D12_BOX src_box = {
2598 .left = (UINT)MAX2(region.srcOffset.x, 0),
2599 .top = (UINT)MAX2(region.srcOffset.y, 0),
2600 .front = (UINT)MAX2(src_z, 0),
2601 .right = (UINT)region.srcOffset.x + region.extent.width,
2602 .bottom = (UINT)region.srcOffset.y + region.extent.height,
2603 .back = (UINT)src_z + depth,
2604 };
2605
2606 if (!tmp_loc->pResource) {
2607 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2608 region.dstOffset.x,
2609 region.dstOffset.y,
2610 dst_z, &src_loc,
2611 &src_box);
2612 return;
2613 }
2614
2615 tmp_desc->Format =
2616 dzn_image_get_placed_footprint_format(pdev, src->vk.format, aspect);
2617 tmp_desc->Width = region.extent.width;
2618 tmp_desc->Height = region.extent.height;
2619
2620 ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
2621 0, 1, 0,
2622 &tmp_loc->PlacedFootprint,
2623 NULL, NULL, NULL);
2624
2625 tmp_loc->PlacedFootprint.Footprint.Depth = depth;
2626
2627 if (r > 0 || l > 0) {
2628 if (cmdbuf->enhanced_barriers) {
2629 dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2630 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2631 D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST);
2632 } else {
2633 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2634 D3D12_RESOURCE_STATE_COPY_SOURCE,
2635 D3D12_RESOURCE_STATE_COPY_DEST,
2636 DZN_QUEUE_TRANSITION_FLUSH);
2637 }
2638 }
2639
2640 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
2641
2642 if (cmdbuf->enhanced_barriers) {
2643 dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource,
2644 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
2645 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
2646 } else {
2647 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
2648 D3D12_RESOURCE_STATE_COPY_DEST,
2649 D3D12_RESOURCE_STATE_COPY_SOURCE,
2650 DZN_QUEUE_TRANSITION_FLUSH);
2651 }
2652
2653 tmp_desc->Format =
2654 dzn_image_get_placed_footprint_format(pdev, dst->vk.format, aspect);
2655 if (src_blkw != dst_blkw)
2656 tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
2657 if (src_blkh != dst_blkh)
2658 tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
2659
2660 ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
2661 0, 1, 0,
2662 &tmp_loc->PlacedFootprint,
2663 NULL, NULL, NULL);
2664
2665 if (src_blkd != dst_blkd) {
2666 tmp_loc->PlacedFootprint.Footprint.Depth =
2667 DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
2668 } else {
2669 tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
2670 }
2671
2672 D3D12_BOX tmp_box = {
2673 .left = 0,
2674 .top = 0,
2675 .front = 0,
2676 .right = tmp_loc->PlacedFootprint.Footprint.Width,
2677 .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
2678 .back = tmp_loc->PlacedFootprint.Footprint.Depth,
2679 };
2680
2681 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
2682 region.dstOffset.x,
2683 region.dstOffset.y,
2684 dst_z,
2685 tmp_loc, &tmp_box);
2686 }
2687
2688 static void
dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer * cmdbuf,VkImage image,VkImageAspectFlagBits aspect,const VkImageSubresourceLayers * subres,struct dzn_descriptor_heap * heap,uint32_t heap_slot)2689 dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
2690 VkImage image,
2691 VkImageAspectFlagBits aspect,
2692 const VkImageSubresourceLayers *subres,
2693 struct dzn_descriptor_heap *heap,
2694 uint32_t heap_slot)
2695 {
2696 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2697 VK_FROM_HANDLE(dzn_image, img, image);
2698 VkImageViewCreateInfo iview_info = {
2699 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2700 .image = image,
2701 .format = img->vk.format,
2702 .subresourceRange = {
2703 .aspectMask = (VkImageAspectFlags)aspect,
2704 .baseMipLevel = subres->mipLevel,
2705 .levelCount = 1,
2706 .baseArrayLayer = subres->baseArrayLayer,
2707 .layerCount = subres->layerCount,
2708 },
2709 };
2710
2711 switch (img->vk.image_type) {
2712 case VK_IMAGE_TYPE_1D:
2713 iview_info.viewType = img->vk.array_layers > 1 ?
2714 VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
2715 break;
2716 case VK_IMAGE_TYPE_2D:
2717 iview_info.viewType = img->vk.array_layers > 1 ?
2718 VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
2719 break;
2720 case VK_IMAGE_TYPE_3D:
2721 iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2722 break;
2723 default:
2724 unreachable("Invalid type");
2725 }
2726
2727 struct dzn_image_view iview;
2728 dzn_image_view_init(device, &iview, &iview_info);
2729 dzn_descriptor_heap_write_image_view_desc(device, heap, heap_slot, false, false, &iview);
2730 dzn_image_view_finish(&iview);
2731
2732 D3D12_GPU_DESCRIPTOR_HANDLE handle =
2733 dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
2734 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
2735 }
2736
2737 static void
dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * img,VkImageAspectFlagBits aspect,uint32_t level,uint32_t layer,const VkOffset3D * dst_offsets)2738 dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
2739 struct dzn_image *img,
2740 VkImageAspectFlagBits aspect,
2741 uint32_t level, uint32_t layer,
2742 const VkOffset3D *dst_offsets)
2743 {
2744 bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
2745 VkImageSubresourceRange range = {
2746 .aspectMask = (VkImageAspectFlags)aspect,
2747 .baseMipLevel = level,
2748 .levelCount = 1,
2749 .baseArrayLayer = layer,
2750 .layerCount = 1,
2751 };
2752
2753 if (ds) {
2754 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
2755 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
2756 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle);
2757
2758 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
2759 const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
2760 if (!pdev->options.PSSpecifiedStencilRefSupported) {
2761 D3D12_RECT clear_rect = {
2762 .left = dst_offsets[0].x,
2763 .right = dst_offsets[1].x,
2764 .top = dst_offsets[0].y,
2765 .bottom = dst_offsets[1].y,
2766 };
2767 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect);
2768 }
2769 }
2770 } else {
2771 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
2772 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
2773 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, false, NULL);
2774 }
2775 }
2776
2777 static void
dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const struct dzn_image * dst,VkImageAspectFlagBits aspect,VkFilter filter,enum dzn_blit_resolve_mode resolve_mode,uint32_t stencil_bit)2778 dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
2779 const struct dzn_image *src,
2780 const struct dzn_image *dst,
2781 VkImageAspectFlagBits aspect,
2782 VkFilter filter,
2783 enum dzn_blit_resolve_mode resolve_mode,
2784 uint32_t stencil_bit)
2785 {
2786 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2787 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2788 assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf);
2789 enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
2790 VkImageUsageFlags usage =
2791 vk_format_is_depth_or_stencil(dst->vk.format) ?
2792 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
2793 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2794 struct dzn_meta_blit_key ctx_key = {
2795 .out_format = dzn_image_get_dxgi_format(pdev, dst->vk.format, usage, aspect),
2796 .samples = (uint32_t)src->vk.samples,
2797 .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
2798 FRAG_RESULT_DEPTH :
2799 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
2800 FRAG_RESULT_STENCIL :
2801 FRAG_RESULT_DATA0),
2802 .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
2803 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
2804 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
2805 GLSL_TYPE_FLOAT),
2806 .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
2807 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
2808 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
2809 GLSL_SAMPLER_DIM_3D),
2810 .src_is_array = src->vk.array_layers > 1,
2811 .resolve_mode = resolve_mode,
2812 /* Filter doesn't need to be part of the key if we're not embedding a static sampler */
2813 .linear_filter = filter == VK_FILTER_LINEAR && device->support_static_samplers,
2814 .stencil_bit = stencil_bit,
2815 .padding = 0,
2816 };
2817
2818 const struct dzn_meta_blit *ctx =
2819 dzn_meta_blits_get_context(device, &ctx_key);
2820 assert(ctx);
2821
2822 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2823 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) {
2824 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig;
2825 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2826 }
2827 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2828 }
2829
2830 static void
dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer * cmdbuf,const struct dzn_image * src,const VkImageSubresourceLayers * src_subres,const VkOffset3D * src_offsets,const struct dzn_image * dst,const VkImageSubresourceLayers * dst_subres,const VkOffset3D * dst_offsets,bool normalize_src_coords)2831 dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2832 const struct dzn_image *src,
2833 const VkImageSubresourceLayers *src_subres,
2834 const VkOffset3D *src_offsets,
2835 const struct dzn_image *dst,
2836 const VkImageSubresourceLayers *dst_subres,
2837 const VkOffset3D *dst_offsets,
2838 bool normalize_src_coords)
2839 {
2840 uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2841 uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2842 uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2843 uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2844
2845 float dst_pos[4] = {
2846 (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2847 (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2848 };
2849
2850 float src_pos[4] = {
2851 (float)src_offsets[0].x, (float)src_offsets[0].y,
2852 (float)src_offsets[1].x, (float)src_offsets[1].y,
2853 };
2854
2855 if (normalize_src_coords) {
2856 src_pos[0] /= src_w;
2857 src_pos[1] /= src_h;
2858 src_pos[2] /= src_w;
2859 src_pos[3] /= src_h;
2860 }
2861
2862 float coords[] = {
2863 dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2864 dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2865 dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2866 dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2867 };
2868
2869 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2870
2871 D3D12_VIEWPORT vp = {
2872 .TopLeftX = 0,
2873 .TopLeftY = 0,
2874 .Width = (float)dst_w,
2875 .Height = (float)dst_h,
2876 .MinDepth = 0,
2877 .MaxDepth = 1,
2878 };
2879 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2880
2881 D3D12_RECT scissor = {
2882 .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2883 .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2884 .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2885 .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2886 };
2887 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2888 }
2889
2890 static void
dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer * cmdbuf,struct dzn_image * src,VkImageLayout src_layout,const VkImageSubresourceLayers * src_subres,struct dzn_image * dst,VkImageLayout dst_layout,const VkImageSubresourceLayers * dst_subres,VkImageAspectFlagBits aspect,D3D12_BARRIER_LAYOUT * restore_src_layout,D3D12_BARRIER_LAYOUT * restore_dst_layout,bool post)2891 dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2892 struct dzn_image *src, VkImageLayout src_layout,
2893 const VkImageSubresourceLayers *src_subres,
2894 struct dzn_image *dst, VkImageLayout dst_layout,
2895 const VkImageSubresourceLayers *dst_subres,
2896 VkImageAspectFlagBits aspect,
2897 D3D12_BARRIER_LAYOUT *restore_src_layout,
2898 D3D12_BARRIER_LAYOUT *restore_dst_layout,
2899 bool post)
2900 {
2901 VkImageSubresourceRange src_range = {
2902 .aspectMask = aspect,
2903 .baseMipLevel = src_subres->mipLevel,
2904 .levelCount = 1,
2905 .baseArrayLayer = src_subres->baseArrayLayer,
2906 .layerCount = src_subres->layerCount,
2907 };
2908 VkImageSubresourceRange dst_range = {
2909 .aspectMask = aspect,
2910 .baseMipLevel = dst_subres->mipLevel,
2911 .levelCount = 1,
2912 .baseArrayLayer = dst_subres->baseArrayLayer,
2913 .layerCount = dst_subres->layerCount,
2914 };
2915
2916 if (!post) {
2917 if (cmdbuf->enhanced_barriers) {
2918 D3D12_BARRIER_LAYOUT dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2919 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
2920 *restore_src_layout = dzn_cmd_buffer_require_layout(cmdbuf, src, src_layout,
2921 D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
2922 &src_range);
2923 *restore_dst_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst,
2924 dst_layout,
2925 dst_new_layout,
2926 &dst_range);
2927 } else {
2928 VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2929 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2930 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2931 src_layout,
2932 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2933 DZN_QUEUE_TRANSITION_FLUSH);
2934 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2935 dst_layout,
2936 dst_new_layout,
2937 DZN_QUEUE_TRANSITION_FLUSH);
2938 }
2939 } else {
2940 if (cmdbuf->enhanced_barriers) {
2941 dzn_cmd_buffer_restore_layout(cmdbuf, src,
2942 D3D12_BARRIER_SYNC_PIXEL_SHADING, D3D12_BARRIER_ACCESS_SHADER_RESOURCE,
2943 D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, *restore_src_layout,
2944 &src_range);
2945 if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
2946 dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2947 D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE,
2948 D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, *restore_dst_layout,
2949 &dst_range);
2950 } else {
2951 dzn_cmd_buffer_restore_layout(cmdbuf, dst,
2952 D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET,
2953 D3D12_BARRIER_LAYOUT_RENDER_TARGET, *restore_dst_layout,
2954 &dst_range);
2955 }
2956 } else {
2957 VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ?
2958 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2959 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2960 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2961 src_layout,
2962 DZN_QUEUE_TRANSITION_FLUSH);
2963 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2964 dst_new_layout,
2965 dst_layout,
2966 DZN_QUEUE_TRANSITION_FLUSH);
2967 }
2968 }
2969 }
2970
2971 static void
dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer * cmdbuf,const VkBlitImageInfo2 * info,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)2972 dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2973 const VkBlitImageInfo2 *info,
2974 struct dzn_descriptor_heap *heap,
2975 uint32_t *heap_slot,
2976 struct dzn_descriptor_heap *sampler_heap,
2977 uint32_t sampler_heap_slot,
2978 uint32_t r)
2979 {
2980 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2981 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2982
2983 const VkImageBlit2 *region = &info->pRegions[r];
2984 bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2985 bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2986 const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2987 const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
2988 bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
2989 uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
2990 uint32_t stencil_bit_root_param_slot = 2;
2991 assert(device->support_static_samplers == (sampler_heap == NULL));
2992
2993 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2994 D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
2995 D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
2996 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
2997 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2998 src, info->srcImageLayout, ®ion->srcSubresource,
2999 dst, info->dstImageLayout, ®ion->dstSubresource,
3000 aspect, &restore_src_layout, &restore_dst_layout, false);
3001 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
3002 aspect, ®ion->srcSubresource,
3003 heap, (*heap_slot)++);
3004 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3005 src, ®ion->srcSubresource, region->srcOffsets,
3006 dst, ®ion->dstSubresource, region->dstOffsets,
3007 src->vk.samples == 1);
3008
3009 uint32_t dst_depth =
3010 region->dstOffsets[1].z > region->dstOffsets[0].z ?
3011 region->dstOffsets[1].z - region->dstOffsets[0].z :
3012 region->dstOffsets[0].z - region->dstOffsets[1].z;
3013 uint32_t src_depth =
3014 region->srcOffsets[1].z > region->srcOffsets[0].z ?
3015 region->srcOffsets[1].z - region->srcOffsets[0].z :
3016 region->srcOffsets[0].z - region->srcOffsets[1].z;
3017
3018 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
3019 uint32_t dst_level = region->dstSubresource.mipLevel;
3020
3021 float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
3022 if (region->srcOffsets[0].z > region->srcOffsets[1].z)
3023 src_slice_step = -src_slice_step;
3024 float src_z_coord =
3025 src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
3026 uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
3027 uint32_t dst_z_coord =
3028 dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
3029 if (region->dstOffsets[0].z > region->dstOffsets[1].z)
3030 dst_z_coord--;
3031
3032 uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
3033 1 : -1;
3034
3035 /* Normalize the src coordinates/step */
3036 if (src_is_3d) {
3037 src_z_coord /= src->vk.extent.depth;
3038 src_slice_step /= src->vk.extent.depth;
3039 }
3040
3041 for (uint32_t slice = 0; slice < slice_count; slice++) {
3042 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets);
3043 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3044 if (!device->support_static_samplers) {
3045 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3046 stencil_bit_root_param_slot++;
3047 }
3048 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3049 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3050 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff);
3051 for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3052 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit);
3053 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3054 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3055 }
3056 } else {
3057 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3058 }
3059 src_z_coord += src_slice_step;
3060 dst_z_coord += dst_slice_step;
3061 }
3062
3063 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3064 src, info->srcImageLayout, ®ion->srcSubresource,
3065 dst, info->dstImageLayout, ®ion->dstSubresource,
3066 aspect, &restore_src_layout, &restore_dst_layout, true);
3067 }
3068 }
3069
3070 static enum dzn_blit_resolve_mode
get_blit_resolve_mode(VkResolveModeFlagBits mode)3071 get_blit_resolve_mode(VkResolveModeFlagBits mode)
3072 {
3073 switch (mode) {
3074 case VK_RESOLVE_MODE_AVERAGE_BIT: return dzn_blit_resolve_average;
3075 case VK_RESOLVE_MODE_MIN_BIT: return dzn_blit_resolve_min;
3076 case VK_RESOLVE_MODE_MAX_BIT: return dzn_blit_resolve_max;
3077 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return dzn_blit_resolve_sample_zero;
3078 default: unreachable("Unexpected resolve mode");
3079 }
3080 }
3081
3082 static void
dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer * cmdbuf,const VkResolveImageInfo2 * info,VkResolveModeFlags mode,struct dzn_descriptor_heap * heap,uint32_t * heap_slot,struct dzn_descriptor_heap * sampler_heap,uint32_t sampler_heap_slot,uint32_t r)3083 dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
3084 const VkResolveImageInfo2 *info,
3085 VkResolveModeFlags mode,
3086 struct dzn_descriptor_heap *heap,
3087 uint32_t *heap_slot,
3088 struct dzn_descriptor_heap *sampler_heap,
3089 uint32_t sampler_heap_slot,
3090 uint32_t r)
3091 {
3092 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3093 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3094
3095 const VkImageResolve2 *region = &info->pRegions[r];
3096
3097 const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3098 const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
3099 bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported;
3100 uint32_t stencil_bit = support_stencil_blit ? 0xf : 0;
3101 uint32_t stencil_bit_root_param_slot = 2;
3102 assert(device->support_static_samplers == (sampler_heap == NULL));
3103 enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode);
3104
3105 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3106 D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON;
3107 D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON;
3108 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3109 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3110 src, info->srcImageLayout, ®ion->srcSubresource,
3111 dst, info->dstImageLayout, ®ion->dstSubresource,
3112 aspect, &restore_src_layout, &restore_dst_layout, false);
3113 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
3114 ®ion->srcSubresource,
3115 heap, (*heap_slot)++);
3116
3117 VkOffset3D src_offset[2] = {
3118 {
3119 .x = region->srcOffset.x,
3120 .y = region->srcOffset.y,
3121 },
3122 {
3123 .x = (int32_t)(region->srcOffset.x + region->extent.width),
3124 .y = (int32_t)(region->srcOffset.y + region->extent.height),
3125 },
3126 };
3127 VkOffset3D dst_offset[2] = {
3128 {
3129 .x = region->dstOffset.x,
3130 .y = region->dstOffset.y,
3131 },
3132 {
3133 .x = (int32_t)(region->dstOffset.x + region->extent.width),
3134 .y = (int32_t)(region->dstOffset.y + region->extent.height),
3135 },
3136 };
3137
3138 dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
3139 src, ®ion->srcSubresource, src_offset,
3140 dst, ®ion->dstSubresource, dst_offset,
3141 false);
3142
3143 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource);
3144 for (uint32_t layer = 0; layer < layer_count; layer++) {
3145 float src_z_coord = layer;
3146
3147 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
3148 dst, aspect, region->dstSubresource.mipLevel,
3149 region->dstSubresource.baseArrayLayer + layer,
3150 dst_offset);
3151 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
3152 if (!device->support_static_samplers) {
3153 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot));
3154 stencil_bit_root_param_slot++;
3155 }
3156 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) {
3157 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3158 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff);
3159 for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) {
3160 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit);
3161 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0);
3162 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3163 }
3164 } else {
3165 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
3166 }
3167 }
3168
3169 dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
3170 src, info->srcImageLayout, ®ion->srcSubresource,
3171 dst, info->dstImageLayout, ®ion->dstSubresource,
3172 aspect, &restore_src_layout, &restore_dst_layout, true);
3173 }
3174 }
3175
3176 static void
dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3177 dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3178 {
3179 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3180
3181 if (!pipeline)
3182 return;
3183
3184 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3185 ID3D12PipelineState *old_pipeline_state =
3186 cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
3187
3188 uint32_t view_instance_mask = 0;
3189 if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
3190 if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) {
3191 cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig;
3192 /* Changing root signature always requires re-binding descriptor heaps */
3193 cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3194
3195 if (device->bindless) {
3196 /* Note: The D3D12 spec for descriptor heap indexing requires that the descriptor heaps
3197 * are bound *before* the root signature. */
3198 bool bind_heaps = false;
3199 dzn_foreach_pool_type(type) {
3200 if (cmdbuf->state.heaps[type] != &device->device_heaps[type].heap) {
3201 bind_heaps = true;
3202 cmdbuf->state.heaps[type] = &device->device_heaps[type].heap;
3203 }
3204 }
3205 if (bind_heaps) {
3206 ID3D12DescriptorHeap *heaps[NUM_POOL_TYPES];
3207 dzn_foreach_pool_type(type)
3208 heaps[type] = cmdbuf->state.heaps[type]->heap;
3209 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, NUM_POOL_TYPES, heaps);
3210 }
3211 }
3212
3213 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3214 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3215 else
3216 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
3217 }
3218 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3219 struct dzn_graphics_pipeline *gfx =
3220 (struct dzn_graphics_pipeline *)pipeline;
3221 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
3222 dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
3223 if (gfx->multiview.native_view_instancing)
3224 view_instance_mask = gfx->multiview.view_mask;
3225 else
3226 view_instance_mask = 1;
3227
3228 if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point)
3229 cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3230 }
3231 }
3232
3233 ID3D12PipelineState *new_pipeline_state = pipeline->state;
3234
3235 if (old_pipeline_state != new_pipeline_state) {
3236 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
3237 cmdbuf->state.pipeline = pipeline;
3238 }
3239
3240 /* Deferring this until after the pipeline has been set due to an NVIDIA driver bug
3241 * when view instancing mask is set with no pipeline bound. */
3242 if (view_instance_mask)
3243 ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask);
3244 }
3245
3246 static void
dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3247 dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3248 {
3249 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3250 struct dzn_descriptor_state *desc_state =
3251 &cmdbuf->state.bindpoint[bindpoint].desc_state;
3252 struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
3253 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
3254 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
3255 };
3256 uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
3257 bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
3258 const struct dzn_pipeline *pipeline =
3259 cmdbuf->state.bindpoint[bindpoint].pipeline;
3260
3261 /* The set of dirty bits that are cleared by running this function. Notably,
3262 * for bindless, descriptor sets that are bound but unused by the currently
3263 * set pipeline are not processed, meaning their dirty bits should persist
3264 * until such a point as a pipeline does use them. For not-bindless,
3265 * all sets are processed. */
3266 uint32_t dirty_bits_bindless =
3267 (pipeline->dynamic_buffer_count ? DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS : 0) |
3268 (((DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << pipeline->set_count) - 1) & DZN_CMD_BINDPOINT_DIRTY_DESC_SETS);
3269 uint32_t dirty_bits = (device->bindless ? dirty_bits_bindless : DZN_CMD_BINDPOINT_DIRTY_DESC_SETS | DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS);
3270 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bits))
3271 return;
3272
3273 dzn_foreach_pool_type (type) {
3274 if (device->bindless) {
3275 new_heaps[type] = &device->device_heaps[type].heap;
3276 } else {
3277 uint32_t desc_count = pipeline->desc_count[type];
3278 if (!desc_count)
3279 continue;
3280
3281 struct dzn_descriptor_heap_pool *pool =
3282 type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
3283 &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
3284 struct dzn_descriptor_heap *dst_heap = NULL;
3285 uint32_t dst_heap_offset = 0;
3286
3287 dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
3288 &dst_heap, &dst_heap_offset);
3289 new_heap_offsets[type] = dst_heap_offset;
3290 update_root_desc_table[type] = true;
3291
3292 for (uint32_t s = 0; s < MAX_SETS; s++) {
3293 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3294 if (!set) continue;
3295
3296 uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
3297 uint32_t set_desc_count = MIN2(pipeline->sets[s].range_desc_count[type], set->heap_sizes[type]);
3298 if (set_desc_count) {
3299 dzn_descriptor_heap_copy(device, dst_heap, dst_heap_offset + set_heap_offset,
3300 &set->pool->heaps[type], set->heap_offsets[type],
3301 set_desc_count, type);
3302 }
3303
3304 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
3305 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3306 for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3307 struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
3308 if (!bdesc.buffer)
3309 continue;
3310 bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
3311
3312 bool primary_is_writable = bdesc.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
3313 uint32_t desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary;
3314 dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3315 dst_heap_offset + set_heap_offset + desc_heap_offset,
3316 primary_is_writable, &bdesc);
3317
3318 if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt != ~0) {
3319 assert(primary_is_writable);
3320 desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt;
3321 dzn_descriptor_heap_write_buffer_desc(device, dst_heap,
3322 dst_heap_offset + set_heap_offset + desc_heap_offset,
3323 false, &bdesc);
3324 }
3325 }
3326 }
3327 }
3328
3329 new_heaps[type] = dst_heap;
3330 }
3331 }
3332
3333 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
3334 new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
3335 ID3D12DescriptorHeap *desc_heaps[2];
3336 uint32_t num_desc_heaps = 0;
3337 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
3338 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
3339 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
3340 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
3341 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
3342
3343 for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
3344 cmdbuf->state.heaps[h] = new_heaps[h];
3345 }
3346
3347 if (!device->bindless) {
3348 for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
3349 D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
3350
3351 if (!update_root_desc_table[type])
3352 continue;
3353
3354 D3D12_GPU_DESCRIPTOR_HANDLE handle =
3355 dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
3356
3357 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3358 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3359 else
3360 ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
3361 }
3362 }
3363
3364 if (device->bindless) {
3365 for (uint32_t s = 0; s < pipeline->set_count; ++s) {
3366 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3367 if (!set || !set->pool->bindless.buf)
3368 continue;
3369
3370 uint32_t dirty_bit = DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << s;
3371 if (cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bit) {
3372 uint64_t gpuva = set->pool->bindless.gpuva + (set->heap_offsets[0] * sizeof(struct dxil_spirv_bindless_entry));
3373 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3374 ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3375 else
3376 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, s, gpuva);
3377 }
3378 }
3379 if (pipeline->dynamic_buffer_count &&
3380 (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS)) {
3381 ID3D12Resource *dynamic_buffer_buf = NULL;
3382 uint64_t dynamic_buffer_buf_offset;
3383 VkResult result =
3384 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(struct dxil_spirv_bindless_entry) * pipeline->dynamic_buffer_count,
3385 DZN_INTERNAL_BUF_UPLOAD,
3386 D3D12_RESOURCE_STATE_GENERIC_READ,
3387 D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT,
3388 &dynamic_buffer_buf,
3389 &dynamic_buffer_buf_offset);
3390 if (result != VK_SUCCESS)
3391 return;
3392
3393 uint64_t gpuva = ID3D12Resource_GetGPUVirtualAddress(dynamic_buffer_buf) + dynamic_buffer_buf_offset;
3394 struct dxil_spirv_bindless_entry *map;
3395 ID3D12Resource_Map(dynamic_buffer_buf, 0, NULL, (void **)&map);
3396 map += (dynamic_buffer_buf_offset / sizeof(*map));
3397
3398 for (uint32_t s = 0; s < MAX_SETS; ++s) {
3399 const struct dzn_descriptor_set *set = desc_state->sets[s].set;
3400 if (!set)
3401 continue;
3402
3403 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
3404 for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
3405 const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o];
3406 volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary];
3407 struct dzn_buffer_desc bdesc_updated = *bdesc;
3408 bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o];
3409 dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry);
3410 }
3411 }
3412
3413 ID3D12Resource_Unmap(dynamic_buffer_buf, 0, NULL);
3414 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3415 ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist,
3416 pipeline->root.dynamic_buffer_bindless_param_idx,
3417 gpuva);
3418 else
3419 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3420 pipeline->root.dynamic_buffer_bindless_param_idx,
3421 gpuva);
3422 }
3423 }
3424
3425 cmdbuf->state.bindpoint[bindpoint].dirty &= ~dirty_bits;
3426 }
3427
3428 static void
dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3429 dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3430 {
3431 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
3432 return;
3433
3434 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
3435 uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
3436
3437 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3438 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3439 sizeof(cmdbuf->state.sysvals.gfx) / 4,
3440 &cmdbuf->state.sysvals.gfx, 0);
3441 } else {
3442 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
3443 sizeof(cmdbuf->state.sysvals.compute) / 4,
3444 &cmdbuf->state.sysvals.compute, 0);
3445 }
3446
3447 cmdbuf->state.bindpoint[bindpoint].dirty &= ~DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3448 }
3449
3450 static void
dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer * cmdbuf)3451 dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
3452 {
3453 const struct dzn_graphics_pipeline *pipeline =
3454 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3455
3456 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
3457 !pipeline->vp.count)
3458 return;
3459
3460 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
3461 }
3462
3463 static void
dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer * cmdbuf)3464 dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
3465 {
3466 const struct dzn_graphics_pipeline *pipeline =
3467 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
3468
3469 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
3470 return;
3471
3472 if (!pipeline->scissor.count) {
3473 /* Apply a scissor delimiting the render area. */
3474 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
3475 return;
3476 }
3477
3478 D3D12_RECT scissors[MAX_SCISSOR];
3479
3480 memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
3481 for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
3482 scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
3483 scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
3484 scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
3485 scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
3486 }
3487
3488 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
3489 }
3490
3491 static void
dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer * cmdbuf)3492 dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
3493 {
3494 unsigned start, end;
3495
3496 BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
3497 ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, &cmdbuf->state.vb.views[start]);
3498
3499 BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
3500 }
3501
3502 static void
dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer * cmdbuf)3503 dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
3504 {
3505 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
3506 return;
3507
3508 ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
3509 }
3510
3511 static void
dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer * cmdbuf,uint32_t bindpoint)3512 dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
3513 {
3514 struct dzn_cmd_buffer_push_constant_state *state =
3515 bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
3516 &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
3517
3518 uint32_t offset = state->offset / 4;
3519 uint32_t end = ALIGN(state->end, 4) / 4;
3520 uint32_t count = end - offset;
3521
3522 if (!count)
3523 return;
3524
3525 uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
3526 uint32_t *vals = state->values + offset;
3527
3528 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
3529 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3530 else
3531 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
3532
3533 state->offset = 0;
3534 state->end = 0;
3535 }
3536
3537 static void
dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer * cmdbuf)3538 dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
3539 {
3540 struct dzn_physical_device *pdev =
3541 container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
3542 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
3543 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
3544 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3545 if (cmdbuf->cmdlist8 &&
3546 pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
3547 ID3D12GraphicsCommandList8_OMSetFrontAndBackStencilRef(cmdbuf->cmdlist8,
3548 cmdbuf->state.zsa.stencil_test.front.ref,
3549 cmdbuf->state.zsa.stencil_test.back.ref);
3550 } else {
3551 uint32_t ref =
3552 gfx->zsa.stencil_test.front.uses_ref ?
3553 cmdbuf->state.zsa.stencil_test.front.ref :
3554 cmdbuf->state.zsa.stencil_test.back.ref;
3555 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
3556 }
3557 }
3558 }
3559
3560 static void
dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer * cmdbuf)3561 dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
3562 {
3563 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
3564 ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
3565 cmdbuf->state.blend.constants);
3566 }
3567
3568 static void
dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer * cmdbuf)3569 dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
3570 {
3571 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
3572 ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
3573 cmdbuf->state.zsa.depth_bounds.min,
3574 cmdbuf->state.zsa.depth_bounds.max);
3575 }
3576 }
3577
3578 static void
dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer * cmdbuf)3579 dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer *cmdbuf)
3580 {
3581 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BIAS) {
3582 assert(cmdbuf->cmdlist9);
3583 ID3D12GraphicsCommandList9_RSSetDepthBias(cmdbuf->cmdlist9,
3584 cmdbuf->state.pipeline_variant.depth_bias.constant_factor,
3585 cmdbuf->state.pipeline_variant.depth_bias.clamp,
3586 cmdbuf->state.pipeline_variant.depth_bias.slope_factor);
3587 }
3588 }
3589
3590 static VkResult
dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * vertex_count)3591 dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
3592 {
3593 uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
3594 uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
3595
3596 *vertex_count = triangle_count * 3;
3597 if (!*vertex_count)
3598 return VK_SUCCESS;
3599
3600 ID3D12Resource *index_buf;
3601 uint64_t index_offset;
3602 VkResult result =
3603 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
3604 DZN_INTERNAL_BUF_UPLOAD,
3605 D3D12_RESOURCE_STATE_GENERIC_READ,
3606 index_size,
3607 &index_buf,
3608 &index_offset);
3609 if (result != VK_SUCCESS)
3610 return result;
3611
3612 void *cpu_ptr;
3613 ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
3614 cpu_ptr = (uint8_t *)cpu_ptr + index_offset;
3615
3616 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
3617 if (index_size == 2) {
3618 uint16_t *indices = (uint16_t *)cpu_ptr;
3619 for (uint32_t t = 0; t < triangle_count; t++) {
3620 indices[t * 3] = t + 1;
3621 indices[(t * 3) + 1] = t + 2;
3622 indices[(t * 3) + 2] = 0;
3623 }
3624 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
3625 } else {
3626 uint32_t *indices = (uint32_t *)cpu_ptr;
3627 for (uint32_t t = 0; t < triangle_count; t++) {
3628 indices[t * 3] = t + 1;
3629 indices[(t * 3) + 1] = t + 2;
3630 indices[(t * 3) + 2] = 0;
3631 }
3632 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3633 }
3634
3635 cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
3636 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf) + index_offset;
3637 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3638 return VK_SUCCESS;
3639 }
3640
3641 static VkResult
dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer * cmdbuf,uint32_t * index_count,uint32_t * first_index)3642 dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
3643 uint32_t *index_count,
3644 uint32_t *first_index)
3645 {
3646 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3647 uint32_t triangle_count = MAX2(*index_count, 2) - 2;
3648
3649 *index_count = triangle_count * 3;
3650 if (!*index_count)
3651 return VK_SUCCESS;
3652
3653 /* New index is always 32bit to make the compute shader rewriting the
3654 * index simpler */
3655 ID3D12Resource *new_index_buf;
3656 VkResult result =
3657 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
3658 DZN_INTERNAL_BUF_DEFAULT,
3659 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3660 4,
3661 &new_index_buf,
3662 NULL);
3663 if (result != VK_SUCCESS)
3664 return result;
3665
3666 D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
3667 cmdbuf->state.ib.view.BufferLocation;
3668
3669 ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
3670 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3671 ASSERTED bool prim_restart =
3672 dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
3673
3674 assert(!prim_restart);
3675
3676 enum dzn_index_type index_type =
3677 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
3678 const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3679 &device->triangle_fan[index_type];
3680
3681 struct dzn_triangle_fan_rewrite_index_params params = {
3682 .first_index = *first_index,
3683 };
3684
3685 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3686 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3687 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3688 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3689 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
3690 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
3691 ¶ms, 0);
3692 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
3693 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
3694
3695 if (cmdbuf->enhanced_barriers) {
3696 dzn_cmd_buffer_buffer_barrier(cmdbuf, new_index_buf,
3697 D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_INDEX_INPUT,
3698 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDEX_BUFFER);
3699 } else {
3700 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
3701 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3702 D3D12_RESOURCE_STATE_INDEX_BUFFER,
3703 DZN_QUEUE_TRANSITION_FLUSH);
3704 }
3705
3706 /* We don't mess up with the driver state when executing our internal
3707 * compute shader, but we still change the D3D12 state, so let's mark
3708 * things dirty if needed.
3709 */
3710 cmdbuf->state.pipeline = NULL;
3711 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3712 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3713 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3714 }
3715
3716 cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
3717 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
3718 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3719 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3720 *first_index = 0;
3721 return VK_SUCCESS;
3722 }
3723
3724 static void
dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer * cmdbuf,bool indexed)3725 dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3726 {
3727 if (indexed)
3728 dzn_cmd_buffer_update_ibview(cmdbuf);
3729
3730 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3731 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3732 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3733 dzn_cmd_buffer_update_viewports(cmdbuf);
3734 dzn_cmd_buffer_update_scissors(cmdbuf);
3735 dzn_cmd_buffer_update_vbviews(cmdbuf);
3736 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
3737 dzn_cmd_buffer_update_zsa(cmdbuf);
3738 dzn_cmd_buffer_update_blend_constants(cmdbuf);
3739 dzn_cmd_buffer_update_depth_bounds(cmdbuf);
3740 dzn_cmd_buffer_update_depth_bias(cmdbuf);
3741
3742 /* Reset the dirty states */
3743 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3744 cmdbuf->state.dirty = 0;
3745 }
3746
3747 static uint32_t
dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer * cmdbuf,bool indexed)3748 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
3749 {
3750 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
3751 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3752
3753 if (!pipeline->ia.triangle_fan)
3754 return 0;
3755
3756 uint32_t max_triangles;
3757
3758 if (indexed) {
3759 uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
3760 uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
3761
3762 max_triangles = MAX2(max_indices, 2) - 2;
3763 } else {
3764 uint32_t max_vertex = 0;
3765 for (uint32_t i = 0; i < pipeline->vb.count; i++) {
3766 max_vertex =
3767 MAX2(max_vertex,
3768 cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
3769 }
3770
3771 max_triangles = MAX2(max_vertex, 2) - 2;
3772 }
3773
3774 return max_triangles * 3;
3775 }
3776
3777 static void
dzn_cmd_buffer_patch_indirect_draw(struct dzn_cmd_buffer * cmdbuf,struct dzn_indirect_draw_type draw_type,ID3D12Resource ** inout_draw_buf,size_t * inout_draw_buf_offset,ID3D12Resource ** inout_count_buf,size_t * inout_count_buf_offset,uint32_t max_draw_count,uint32_t * inout_draw_buf_stride,bool * inout_indexed)3778 dzn_cmd_buffer_patch_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
3779 struct dzn_indirect_draw_type draw_type,
3780 ID3D12Resource **inout_draw_buf,
3781 size_t *inout_draw_buf_offset,
3782 ID3D12Resource **inout_count_buf,
3783 size_t *inout_count_buf_offset,
3784 uint32_t max_draw_count,
3785 uint32_t *inout_draw_buf_stride,
3786 bool *inout_indexed)
3787 {
3788 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3789 uint32_t triangle_fan_index_buf_stride =
3790 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, *inout_indexed) *
3791 sizeof(uint32_t);
3792
3793 uint32_t min_draw_buf_stride = *inout_indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : sizeof(D3D12_DRAW_ARGUMENTS);
3794
3795 uint32_t exec_buf_stride =
3796 (draw_type.triangle_fan ? sizeof(D3D12_INDEX_BUFFER_VIEW) : 0) +
3797 (draw_type.draw_params ? sizeof(uint32_t) * 2 : 0) +
3798 (draw_type.draw_id ? sizeof(uint32_t) : 0) +
3799 min_draw_buf_stride;
3800 uint32_t triangle_fan_exec_buf_stride =
3801 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
3802 uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
3803 uint32_t exec_buf_draw_offset = 0;
3804
3805 // We reserve the first slot for the draw_count value when indirect count is
3806 // involved.
3807 if (*inout_count_buf != NULL) {
3808 exec_buf_size += exec_buf_stride;
3809 exec_buf_draw_offset = exec_buf_stride;
3810 }
3811
3812 ID3D12Resource *exec_buf;
3813 VkResult result =
3814 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
3815 DZN_INTERNAL_BUF_DEFAULT,
3816 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3817 0,
3818 &exec_buf, NULL);
3819 if (result != VK_SUCCESS)
3820 return;
3821
3822 D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
3823 ID3D12Resource_GetGPUVirtualAddress(*inout_draw_buf) + *inout_draw_buf_offset;
3824 ID3D12Resource *triangle_fan_index_buf = NULL;
3825 ID3D12Resource *triangle_fan_exec_buf = NULL;
3826
3827 if (triangle_fan_index_buf_stride) {
3828 result =
3829 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3830 max_draw_count * triangle_fan_index_buf_stride,
3831 DZN_INTERNAL_BUF_DEFAULT,
3832 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3833 0,
3834 &triangle_fan_index_buf, NULL);
3835 if (result != VK_SUCCESS)
3836 return;
3837
3838 result =
3839 dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
3840 max_draw_count * triangle_fan_exec_buf_stride,
3841 DZN_INTERNAL_BUF_DEFAULT,
3842 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3843 0,
3844 &triangle_fan_exec_buf, NULL);
3845 if (result != VK_SUCCESS)
3846 return;
3847 }
3848
3849 struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
3850 .draw_buf_stride = *inout_draw_buf_stride,
3851 .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
3852 .triangle_fan_index_buf_start =
3853 triangle_fan_index_buf ?
3854 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
3855 .exec_buf_start =
3856 draw_type.triangle_fan_primitive_restart ?
3857 ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
3858 };
3859 uint32_t params_size;
3860 if (draw_type.triangle_fan_primitive_restart)
3861 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
3862 else if (draw_type.triangle_fan)
3863 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
3864 else
3865 params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
3866
3867 struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type.value];
3868 uint32_t root_param_idx = 0;
3869
3870 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3871 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3872 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
3873 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
3874 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3875 params_size / 4, (const void *)¶ms, 0);
3876 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
3877 draw_buf_gpu);
3878 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3879 ID3D12Resource_GetGPUVirtualAddress(exec_buf));
3880 if (*inout_count_buf) {
3881 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3882 root_param_idx++,
3883 ID3D12Resource_GetGPUVirtualAddress(*inout_count_buf) +
3884 *inout_count_buf_offset);
3885 }
3886
3887 if (triangle_fan_exec_buf) {
3888 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
3889 root_param_idx++,
3890 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
3891 }
3892
3893 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
3894
3895 D3D12_BUFFER_BARRIER buf_barriers[2];
3896 D3D12_BARRIER_GROUP enhanced_barriers = {
3897 .NumBarriers = 0,
3898 .Type = D3D12_BARRIER_TYPE_BUFFER,
3899 .pBufferBarriers = buf_barriers
3900 };
3901
3902 if (triangle_fan_exec_buf) {
3903 enum dzn_index_type index_type =
3904 *inout_indexed ?
3905 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, draw_type.triangle_fan_primitive_restart) :
3906 DZN_NO_INDEX;
3907 struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
3908 &device->triangle_fan[index_type];
3909
3910 struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
3911
3912 assert(rewrite_index->root_sig);
3913 assert(rewrite_index->pipeline_state);
3914 assert(rewrite_index->cmd_sig);
3915
3916 if (cmdbuf->enhanced_barriers) {
3917 dzn_cmd_buffer_buffer_barrier(cmdbuf, triangle_fan_exec_buf,
3918 D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3919 D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
3920 }
3921 else {
3922 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
3923 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3924 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3925 DZN_QUEUE_TRANSITION_FLUSH);
3926 }
3927
3928 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3929 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL;
3930 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
3931 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
3932 root_param_idx = 0;
3933 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
3934 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
3935 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
3936 sizeof(rewrite_index_params) / 4,
3937 (const void *)&rewrite_index_params, 0);
3938
3939 if (*inout_indexed) {
3940 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
3941 root_param_idx++,
3942 cmdbuf->state.ib.view.BufferLocation);
3943 }
3944
3945 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
3946 max_draw_count, triangle_fan_exec_buf, 0,
3947 *inout_count_buf ? exec_buf : NULL, 0);
3948
3949 if (cmdbuf->enhanced_barriers) {
3950 buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3951 .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3952 .SyncAfter = D3D12_BARRIER_SYNC_INDEX_INPUT,
3953 .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3954 .AccessAfter = D3D12_BARRIER_ACCESS_INDEX_BUFFER,
3955 .pResource = triangle_fan_index_buf,
3956 .Offset = 0, .Size = UINT64_MAX
3957 };
3958 }
3959 else {
3960 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
3961 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3962 D3D12_RESOURCE_STATE_INDEX_BUFFER,
3963 DZN_QUEUE_TRANSITION_FLUSH);
3964 }
3965
3966 /* After our triangle-fan lowering the draw is indexed */
3967 *inout_indexed = true;
3968 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
3969 cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
3970 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
3971 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3972 }
3973
3974 if (cmdbuf->enhanced_barriers) {
3975 buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){
3976 .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING,
3977 .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
3978 .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS,
3979 .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT,
3980 .pResource = exec_buf,
3981 .Offset = 0, .Size = UINT64_MAX
3982 };
3983 ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &enhanced_barriers);
3984 }
3985 else {
3986 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
3987 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3988 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
3989 DZN_QUEUE_TRANSITION_FLUSH);
3990 }
3991
3992 /* We don't mess up with the driver state when executing our internal
3993 * compute shader, but we still change the D3D12 state, so let's mark
3994 * things dirty if needed.
3995 */
3996 cmdbuf->state.pipeline = NULL;
3997 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
3998 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3999 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4000 }
4001
4002 *inout_draw_buf = exec_buf;
4003 *inout_draw_buf_offset = exec_buf_draw_offset;
4004 if (*inout_count_buf) {
4005 *inout_count_buf = exec_buf;
4006 *inout_count_buf_offset = 0;
4007 }
4008 *inout_draw_buf_stride = exec_buf_stride;
4009 }
4010
4011 static void
dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer * cmdbuf,ID3D12Resource * draw_buf,size_t draw_buf_offset,ID3D12Resource * count_buf,size_t count_buf_offset,uint32_t max_draw_count,uint32_t draw_buf_stride,bool indexed)4012 dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
4013 ID3D12Resource *draw_buf,
4014 size_t draw_buf_offset,
4015 ID3D12Resource *count_buf,
4016 size_t count_buf_offset,
4017 uint32_t max_draw_count,
4018 uint32_t draw_buf_stride,
4019 bool indexed)
4020 {
4021 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4022 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4023 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
4024 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
4025 uint32_t min_draw_buf_stride =
4026 indexed ?
4027 sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) :
4028 sizeof(D3D12_DRAW_ARGUMENTS);
4029 bool prim_restart =
4030 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
4031
4032 draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
4033 assert(draw_buf_stride >= min_draw_buf_stride);
4034 assert((draw_buf_stride & 3) == 0);
4035
4036 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
4037
4038 struct dzn_indirect_draw_type draw_type;
4039 draw_type.value = 0;
4040 draw_type.indexed = indexed;
4041 draw_type.indirect_count = count_buf != NULL;
4042 draw_type.draw_params = pipeline->needs_draw_sysvals && !pdev->options21.ExtendedCommandInfoSupported;
4043 draw_type.draw_id = max_draw_count > 1 && pdev->options21.ExecuteIndirectTier < D3D12_EXECUTE_INDIRECT_TIER_1_1;
4044 draw_type.triangle_fan = pipeline->ia.triangle_fan;
4045 draw_type.triangle_fan_primitive_restart = draw_type.triangle_fan && prim_restart;
4046
4047 if (draw_type.draw_params || draw_type.draw_id || draw_type.triangle_fan) {
4048 dzn_cmd_buffer_patch_indirect_draw(cmdbuf, draw_type,
4049 &draw_buf, &draw_buf_offset,
4050 &count_buf, &count_buf_offset,
4051 max_draw_count, &draw_buf_stride, &indexed);
4052 }
4053
4054
4055 struct dzn_indirect_draw_cmd_sig_key cmd_sig_key;
4056 memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
4057 cmd_sig_key.indexed = indexed;
4058 cmd_sig_key.triangle_fan = draw_type.triangle_fan;
4059 cmd_sig_key.draw_params = draw_type.draw_params;
4060 cmd_sig_key.draw_id = max_draw_count > 1;
4061 cmd_sig_key.custom_stride = draw_buf_stride;
4062 ID3D12CommandSignature *cmdsig =
4063 dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_key);
4064
4065 if (!cmdsig) {
4066 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4067 return;
4068 }
4069
4070 cmdbuf->state.sysvals.gfx.first_vertex = 0;
4071 cmdbuf->state.sysvals.gfx.base_instance = 0;
4072 cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
4073
4074 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
4075 1 : pipeline->multiview.view_mask;
4076 u_foreach_bit(view, view_mask) {
4077 cmdbuf->state.sysvals.gfx.view_index = view;
4078 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4079 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4080
4081 dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
4082
4083 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
4084 max_draw_count,
4085 draw_buf, draw_buf_offset,
4086 count_buf, count_buf_offset);
4087 }
4088
4089 /* Restore the old IB view if we modified it during the triangle fan lowering */
4090 if (draw_type.triangle_fan) {
4091 cmdbuf->state.ib.view = ib_view;
4092 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4093 }
4094 }
4095
4096 static void
dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer * cmdbuf)4097 dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
4098 {
4099 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4100 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4101 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4102 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
4103
4104 /* Reset the dirty states */
4105 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
4106 }
4107
4108 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * info)4109 dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
4110 const VkCopyBufferInfo2 *info)
4111 {
4112 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4113 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
4114 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
4115
4116 for (int i = 0; i < info->regionCount; i++) {
4117 const VkBufferCopy2 *region = info->pRegions + i;
4118
4119 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
4120 src_buffer->res, region->srcOffset,
4121 region->size);
4122 }
4123 }
4124
4125 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)4126 dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
4127 const VkCopyBufferToImageInfo2 *info)
4128 {
4129 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4130
4131 for (int i = 0; i < info->regionCount; i++) {
4132 const VkBufferImageCopy2 *region = info->pRegions + i;
4133
4134 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4135 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4136 dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
4137 }
4138 }
4139 }
4140
4141 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)4142 dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
4143 const VkCopyImageToBufferInfo2 *info)
4144 {
4145 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4146
4147 for (int i = 0; i < info->regionCount; i++) {
4148 const VkBufferImageCopy2 *region = info->pRegions + i;
4149
4150 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
4151 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
4152 dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
4153 }
4154 }
4155 }
4156
4157 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)4158 dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
4159 const VkCopyImageInfo2 *info)
4160 {
4161 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4162 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4163 struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
4164 VK_FROM_HANDLE(dzn_image, src, info->srcImage);
4165 VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
4166
4167 assert(src->vk.samples == dst->vk.samples);
4168
4169 bool requires_temp_res = false;
4170
4171 for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
4172 const VkImageCopy2 *region = &info->pRegions[i];
4173
4174 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4175 assert(aspect & region->dstSubresource.aspectMask);
4176
4177 if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
4178 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
4179 src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
4180 dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
4181 requires_temp_res = true;
4182 break;
4183 }
4184 }
4185 }
4186
4187 bool use_blit = false;
4188 if (src->vk.samples > 1) {
4189 use_blit = requires_temp_res;
4190
4191 for (int i = 0; i < info->regionCount; i++) {
4192 const VkImageCopy2 *region = info->pRegions + i;
4193 if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
4194 region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
4195 region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
4196 region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
4197 region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
4198 region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
4199 use_blit = true;
4200 }
4201 }
4202
4203 if (use_blit) {
4204 /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
4205 * is issued on a transfer queue, but we don't have any better option
4206 * right now...
4207 */
4208 STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
4209
4210 VkBlitImageInfo2 blit_info = {
4211 .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
4212 .srcImage = info->srcImage,
4213 .srcImageLayout = info->srcImageLayout,
4214 .dstImage = info->dstImage,
4215 .dstImageLayout = info->dstImageLayout,
4216 .regionCount = info->regionCount,
4217 .pRegions = blit_regions,
4218 .filter = VK_FILTER_NEAREST,
4219 };
4220
4221 for (uint32_t r = 0; r < info->regionCount; r++) {
4222 blit_regions[r] = (VkImageBlit2) {
4223 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4224 .srcSubresource = info->pRegions[r].srcSubresource,
4225 .srcOffsets = {
4226 info->pRegions[r].srcOffset,
4227 info->pRegions[r].srcOffset,
4228 },
4229 .dstSubresource = info->pRegions[r].dstSubresource,
4230 .dstOffsets = {
4231 info->pRegions[r].dstOffset,
4232 info->pRegions[r].dstOffset,
4233 },
4234 };
4235
4236 blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
4237 blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
4238 blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
4239 blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
4240 blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
4241 blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
4242 }
4243
4244 dzn_CmdBlitImage2(commandBuffer, &blit_info);
4245
4246 STACK_ARRAY_FINISH(blit_regions);
4247 return;
4248 }
4249
4250 D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
4251 D3D12_RESOURCE_DESC tmp_desc = {
4252 .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
4253 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
4254 .DepthOrArraySize = 1,
4255 .MipLevels = 1,
4256 .Format = src->desc.Format,
4257 .SampleDesc = { .Count = 1, .Quality = 0 },
4258 .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
4259 .Flags = D3D12_RESOURCE_FLAG_NONE,
4260 };
4261
4262 if (requires_temp_res) {
4263 ID3D12Device4 *dev = device->dev;
4264 VkImageAspectFlags aspect = 0;
4265 uint64_t max_size = 0;
4266
4267 if (vk_format_has_depth(src->vk.format))
4268 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4269 else if (vk_format_has_stencil(src->vk.format))
4270 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
4271 else
4272 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
4273
4274 for (uint32_t i = 0; i < info->regionCount; i++) {
4275 const VkImageCopy2 *region = &info->pRegions[i];
4276 uint64_t region_size = 0;
4277
4278 tmp_desc.Format =
4279 dzn_image_get_dxgi_format(pdev, src->vk.format,
4280 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
4281 aspect);
4282 tmp_desc.Width = region->extent.width;
4283 tmp_desc.Height = region->extent.height;
4284
4285 ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
4286 0, 1, 0,
4287 NULL, NULL, NULL,
4288 ®ion_size);
4289 max_size = MAX2(max_size, region_size * region->extent.depth);
4290 }
4291
4292 VkResult result =
4293 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
4294 DZN_INTERNAL_BUF_DEFAULT,
4295 D3D12_RESOURCE_STATE_COPY_DEST,
4296 0,
4297 &tmp_loc.pResource, NULL);
4298 if (result != VK_SUCCESS)
4299 return;
4300
4301 tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4302 }
4303
4304 for (int i = 0; i < info->regionCount; i++) {
4305 const VkImageCopy2 *region = &info->pRegions[i];
4306
4307 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
4308 for (uint32_t l = 0; l < MAX2(region->srcSubresource.layerCount, region->dstSubresource.layerCount); l++)
4309 dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
4310 }
4311 }
4312 }
4313
4314 static VkResult
dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer * cmdbuf,uint32_t num_view_slots,D3D12_FILTER sampler_filter,struct dzn_descriptor_heap ** view_heap,uint32_t * view_heap_slot,struct dzn_descriptor_heap ** sampler_heap,uint32_t * sampler_heap_slot)4315 dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer *cmdbuf,
4316 uint32_t num_view_slots, D3D12_FILTER sampler_filter,
4317 struct dzn_descriptor_heap **view_heap, uint32_t *view_heap_slot,
4318 struct dzn_descriptor_heap **sampler_heap, uint32_t *sampler_heap_slot)
4319 {
4320 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4321
4322 VkResult result =
4323 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
4324 num_view_slots, view_heap, view_heap_slot);
4325
4326 if (result != VK_SUCCESS) {
4327 vk_command_buffer_set_error(&cmdbuf->vk, result);
4328 return result;
4329 }
4330
4331 if (!device->support_static_samplers) {
4332 result =
4333 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->sampler_pool, device,
4334 1, sampler_heap, sampler_heap_slot);
4335
4336 if (result != VK_SUCCESS) {
4337 vk_command_buffer_set_error(&cmdbuf->vk, result);
4338 return result;
4339 }
4340
4341 D3D12_SAMPLER_DESC sampler_desc = {
4342 .Filter = sampler_filter,
4343 .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4344 .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4345 .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
4346 .MipLODBias = 0,
4347 .MaxAnisotropy = 0,
4348 .MinLOD = 0,
4349 .MaxLOD = D3D12_FLOAT32_MAX,
4350 };
4351 ID3D12Device4_CreateSampler(device->dev, &sampler_desc,
4352 dzn_descriptor_heap_get_cpu_handle(*sampler_heap, *sampler_heap_slot));
4353 }
4354
4355 if (*view_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
4356 (*sampler_heap && *sampler_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])) {
4357 ID3D12DescriptorHeap * const heaps[] = { (*view_heap)->heap, *sampler_heap ? (*sampler_heap)->heap : NULL };
4358 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = *view_heap;
4359 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = *sampler_heap;
4360 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, *sampler_heap ? 2 : 1, heaps);
4361 }
4362
4363 return VK_SUCCESS;
4364 }
4365
4366 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * info)4367 dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
4368 const VkBlitImageInfo2 *info)
4369 {
4370 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4371
4372 if (info->regionCount == 0)
4373 return;
4374
4375 uint32_t desc_count = 0;
4376 for (uint32_t r = 0; r < info->regionCount; r++)
4377 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4378
4379 struct dzn_descriptor_heap *heap;
4380 uint32_t heap_slot;
4381 struct dzn_descriptor_heap *sampler_heap = NULL;
4382 uint32_t sampler_heap_slot = 0;
4383 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4384 info->filter == VK_FILTER_LINEAR ?
4385 D3D12_FILTER_MIN_MAG_MIP_LINEAR :
4386 D3D12_FILTER_MIN_MAG_MIP_POINT,
4387 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4388
4389 if (result != VK_SUCCESS)
4390 return;
4391
4392 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4393
4394 for (uint32_t r = 0; r < info->regionCount; r++)
4395 dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4396
4397 cmdbuf->state.pipeline = NULL;
4398 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4399 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4400 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4401 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4402 }
4403 }
4404
4405 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4406 dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
4407 const VkResolveImageInfo2 *info)
4408 {
4409 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4410
4411 if (info->regionCount == 0)
4412 return;
4413
4414 uint32_t desc_count = 0;
4415 for (uint32_t r = 0; r < info->regionCount; r++)
4416 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
4417
4418 struct dzn_descriptor_heap *heap;
4419 uint32_t heap_slot;
4420 struct dzn_descriptor_heap *sampler_heap = NULL;
4421 uint32_t sampler_heap_slot = 0;
4422 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4423 D3D12_FILTER_MIN_MAG_MIP_POINT,
4424 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4425 if (result != VK_SUCCESS)
4426 return;
4427
4428 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4429
4430 for (uint32_t r = 0; r < info->regionCount; r++)
4431 dzn_cmd_buffer_resolve_region(cmdbuf, info, VK_RESOLVE_MODE_AVERAGE_BIT, heap, &heap_slot, sampler_heap, sampler_heap_slot, r);
4432
4433 cmdbuf->state.pipeline = NULL;
4434 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4435 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4436 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4437 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4438 }
4439 }
4440
4441 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4442 dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
4443 VkImage image,
4444 VkImageLayout imageLayout,
4445 const VkClearColorValue *pColor,
4446 uint32_t rangeCount,
4447 const VkImageSubresourceRange *pRanges)
4448 {
4449 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4450 VK_FROM_HANDLE(dzn_image, img, image);
4451
4452 dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
4453 }
4454
4455 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)4456 dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
4457 VkImage image,
4458 VkImageLayout imageLayout,
4459 const VkClearDepthStencilValue *pDepthStencil,
4460 uint32_t rangeCount,
4461 const VkImageSubresourceRange *pRanges)
4462 {
4463 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4464 VK_FROM_HANDLE(dzn_image, img, image);
4465
4466 dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
4467 }
4468
4469 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)4470 dzn_CmdDispatchBase(VkCommandBuffer commandBuffer,
4471 uint32_t baseGroupX,
4472 uint32_t baseGroupY,
4473 uint32_t baseGroupZ,
4474 uint32_t groupCountX,
4475 uint32_t groupCountY,
4476 uint32_t groupCountZ)
4477 {
4478 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4479
4480 cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
4481 cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
4482 cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
4483 cmdbuf->state.sysvals.compute.base_group_x = baseGroupX;
4484 cmdbuf->state.sysvals.compute.base_group_y = baseGroupY;
4485 cmdbuf->state.sysvals.compute.base_group_z = baseGroupZ;
4486 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4487 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4488
4489 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4490 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
4491 }
4492
4493 VKAPI_ATTR void VKAPI_CALL
dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)4494 dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
4495 VkBuffer dstBuffer,
4496 VkDeviceSize dstOffset,
4497 VkDeviceSize size,
4498 uint32_t data)
4499 {
4500 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4501 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4502
4503 if (size == VK_WHOLE_SIZE)
4504 size = buf->size - dstOffset;
4505
4506 size &= ~3ULL;
4507
4508 ID3D12Resource *src_res;
4509 uint64_t src_offset;
4510 VkResult result =
4511 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4512 DZN_INTERNAL_BUF_UPLOAD,
4513 D3D12_RESOURCE_STATE_GENERIC_READ,
4514 4,
4515 &src_res,
4516 &src_offset);
4517 if (result != VK_SUCCESS)
4518 return;
4519
4520 uint32_t *cpu_ptr;
4521 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
4522 cpu_ptr += src_offset / sizeof(uint32_t);
4523 for (uint32_t i = 0; i < size / 4; i++)
4524 cpu_ptr[i] = data;
4525
4526 ID3D12Resource_Unmap(src_res, 0, NULL);
4527
4528 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4529 }
4530
4531 VKAPI_ATTR void VKAPI_CALL
dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,const void * data)4532 dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
4533 VkBuffer dstBuffer,
4534 VkDeviceSize dstOffset,
4535 VkDeviceSize size,
4536 const void *data)
4537 {
4538 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4539 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4540
4541 if (size == VK_WHOLE_SIZE)
4542 size = buf->size - dstOffset;
4543
4544 /*
4545 * The spec says:
4546 * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
4547 * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
4548 * is not a multiple of 4, then the nearest smaller multiple is used."
4549 */
4550 size &= ~3ULL;
4551
4552 ID3D12Resource *src_res;
4553 uint64_t src_offset;
4554 VkResult result =
4555 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
4556 DZN_INTERNAL_BUF_UPLOAD,
4557 D3D12_RESOURCE_STATE_GENERIC_READ,
4558 4,
4559 &src_res, &src_offset);
4560 if (result != VK_SUCCESS)
4561 return;
4562
4563 void *cpu_ptr;
4564 ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
4565 memcpy((uint8_t *)cpu_ptr + src_offset, data, size),
4566 ID3D12Resource_Unmap(src_res, 0, NULL);
4567
4568 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size);
4569 }
4570
4571 VKAPI_ATTR void VKAPI_CALL
dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)4572 dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
4573 uint32_t attachmentCount,
4574 const VkClearAttachment *pAttachments,
4575 uint32_t rectCount,
4576 const VkClearRect *pRects)
4577 {
4578 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4579
4580 for (unsigned i = 0; i < attachmentCount; i++) {
4581 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
4582 struct dzn_image_view *view = NULL;
4583
4584 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
4585 assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
4586 view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
4587 layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
4588 } else {
4589 if (cmdbuf->state.render.attachments.depth.iview &&
4590 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
4591 view = cmdbuf->state.render.attachments.depth.iview;
4592 layout = cmdbuf->state.render.attachments.depth.layout;
4593 }
4594
4595 if (cmdbuf->state.render.attachments.stencil.iview &&
4596 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
4597 assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
4598 view = cmdbuf->state.render.attachments.stencil.iview;
4599 layout = cmdbuf->state.render.attachments.stencil.layout;
4600 }
4601 }
4602
4603 if (!view)
4604 continue;
4605
4606 for (uint32_t j = 0; j < rectCount; j++) {
4607 D3D12_RECT rect;
4608 dzn_translate_rect(&rect, &pRects[j].rect);
4609
4610 uint32_t view_mask = cmdbuf->state.multiview.view_mask;
4611 if (view_mask != 0) {
4612 u_foreach_bit(layer, view_mask) {
4613 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4614 &pAttachments[i].clearValue,
4615 pAttachments[i].aspectMask,
4616 pRects[j].baseArrayLayer + layer,
4617 pRects[j].layerCount,
4618 1, &rect);
4619 }
4620 } else {
4621 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
4622 &pAttachments[i].clearValue,
4623 pAttachments[i].aspectMask,
4624 pRects[j].baseArrayLayer,
4625 pRects[j].layerCount,
4626 1, &rect);
4627 }
4628 }
4629 }
4630 }
4631
4632 static D3D12_RESOLVE_MODE
dzn_get_resolve_mode(VkResolveModeFlags mode)4633 dzn_get_resolve_mode(VkResolveModeFlags mode)
4634 {
4635 switch (mode) {
4636 case VK_RESOLVE_MODE_AVERAGE_BIT: return D3D12_RESOLVE_MODE_AVERAGE;
4637 case VK_RESOLVE_MODE_MAX_BIT: return D3D12_RESOLVE_MODE_MAX;
4638 case VK_RESOLVE_MODE_MIN_BIT: return D3D12_RESOLVE_MODE_MIN;
4639 /* TODO */
4640 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return D3D12_RESOLVE_MODE_MIN;
4641 default: return D3D12_RESOLVE_MODE_AVERAGE;
4642 }
4643 }
4644
4645 static void
dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,const VkImageSubresourceRange * src_range,const VkImageSubresourceRange * dst_range)4646 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer *cmdbuf,
4647 const struct dzn_rendering_attachment *att,
4648 VkImageAspectFlagBits aspect,
4649 const VkImageSubresourceRange *src_range,
4650 const VkImageSubresourceRange *dst_range)
4651 {
4652 uint32_t desc_count = util_bitcount(aspect) * src_range->levelCount * src_range->layerCount;
4653
4654 struct dzn_descriptor_heap *heap;
4655 uint32_t heap_slot;
4656 struct dzn_descriptor_heap *sampler_heap = NULL;
4657 uint32_t sampler_heap_slot = 0;
4658 VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count,
4659 D3D12_FILTER_MIN_MAG_MIP_POINT,
4660 &heap, &heap_slot, &sampler_heap, &sampler_heap_slot);
4661 if (result != VK_SUCCESS)
4662 return;
4663
4664 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
4665
4666 VkImageResolve2 region = {
4667 .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
4668 .srcSubresource = {
4669 .aspectMask = aspect,
4670 .baseArrayLayer = src_range->baseArrayLayer,
4671 .layerCount = src_range->layerCount,
4672 },
4673 .dstSubresource = {
4674 .aspectMask = aspect,
4675 .baseArrayLayer = dst_range->baseArrayLayer,
4676 .layerCount = dst_range->layerCount,
4677 },
4678 };
4679 VkResolveImageInfo2 resolve_info = {
4680 .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2,
4681 .srcImage = vk_image_to_handle(att->iview->vk.image),
4682 .dstImage = vk_image_to_handle(att->resolve.iview->vk.image),
4683 .srcImageLayout = att->layout,
4684 .dstImageLayout = att->resolve.layout,
4685 .regionCount = 1,
4686 .pRegions = ®ion
4687 };
4688 for (uint32_t level = 0; level < src_range->levelCount; ++level) {
4689 region.srcSubresource.mipLevel = level + src_range->baseMipLevel;
4690 region.dstSubresource.mipLevel = level + dst_range->baseMipLevel;
4691 region.extent = (VkExtent3D){
4692 u_minify(att->iview->vk.image->extent.width, region.srcSubresource.mipLevel),
4693 u_minify(att->iview->vk.image->extent.height, region.srcSubresource.mipLevel),
4694 u_minify(att->iview->vk.image->extent.depth, region.srcSubresource.mipLevel),
4695 };
4696 dzn_cmd_buffer_resolve_region(cmdbuf, &resolve_info, att->resolve.mode, heap, &heap_slot, sampler_heap, sampler_heap_slot, 0);
4697 }
4698
4699 cmdbuf->state.pipeline = NULL;
4700 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
4701 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
4702 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
4703 DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4704 }
4705 }
4706
4707 static void
dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer * cmdbuf,const struct dzn_rendering_attachment * att,VkImageAspectFlagBits aspect,bool force_blit_resolve)4708 dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
4709 const struct dzn_rendering_attachment *att,
4710 VkImageAspectFlagBits aspect,
4711 bool force_blit_resolve)
4712 {
4713 struct dzn_image_view *src = att->iview;
4714 struct dzn_image_view *dst = att->resolve.iview;
4715
4716 if (!src || !dst || att->resolve.mode == VK_RESOLVE_MODE_NONE)
4717 return;
4718
4719 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4720 struct dzn_physical_device *pdev =
4721 container_of(device->vk.physical, struct dzn_physical_device, vk);
4722
4723 struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
4724 struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
4725
4726 VkImageSubresourceRange src_range = {
4727 .aspectMask = (VkImageAspectFlags)aspect,
4728 .baseMipLevel = src->vk.base_mip_level,
4729 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4730 .baseArrayLayer = src->vk.base_array_layer,
4731 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4732 };
4733 if (src_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4734 src_range.baseArrayLayer = 0;
4735 src_range.layerCount = 1;
4736 }
4737
4738 VkImageSubresourceRange dst_range = {
4739 .aspectMask = (VkImageAspectFlags)aspect,
4740 .baseMipLevel = dst->vk.base_mip_level,
4741 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
4742 .baseArrayLayer = dst->vk.base_array_layer,
4743 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
4744 };
4745 if (dst_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4746 dst_range.baseArrayLayer = 0;
4747 dst_range.layerCount = 1;
4748 }
4749
4750 if (force_blit_resolve ||
4751 /* Resolve modes other than average are poorly tested / buggy */
4752 att->resolve.mode != VK_RESOLVE_MODE_AVERAGE_BIT ||
4753 /* D3D resolve API can't go from (e.g.) D32S8X24 to D32 */
4754 src->vk.view_format != dst->vk.view_format) {
4755 dzn_cmd_buffer_resolve_rendering_attachment_via_blit(cmdbuf, att, aspect, &src_range, &dst_range);
4756 return;
4757 }
4758
4759 VkImageLayout src_layout = att->layout;
4760 VkImageLayout dst_layout = att->resolve.layout;
4761
4762 D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect, cmdbuf->type);
4763 D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect, cmdbuf->type);
4764 D3D12_BARRIER_LAYOUT src_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4765 src_needed_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
4766 D3D12_BARRIER_LAYOUT dst_restore_layout = D3D12_BARRIER_LAYOUT_COMMON,
4767 dst_needed_layout = D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
4768 if (cmdbuf->enhanced_barriers) {
4769 src_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, src_img,
4770 src_layout, src_needed_layout,
4771 &src_range);
4772 dst_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst_img,
4773 dst_layout, dst_needed_layout,
4774 &dst_range);
4775 } else {
4776 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4777 src_state,
4778 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4779 DZN_QUEUE_TRANSITION_FLUSH);
4780 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4781 dst_state,
4782 D3D12_RESOURCE_STATE_RESOLVE_DEST,
4783 DZN_QUEUE_TRANSITION_FLUSH);
4784 }
4785
4786 for (uint32_t level = 0; level < src_range.levelCount; level++) {
4787 for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
4788 uint32_t src_subres =
4789 dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
4790 uint32_t dst_subres =
4791 dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
4792
4793 DXGI_FORMAT format =
4794 dzn_image_get_dxgi_format(pdev, dst->vk.format,
4795 dst->vk.usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
4796 aspect);
4797
4798 if (cmdbuf->cmdlist8 &&
4799 pdev->options2.ProgrammableSamplePositionsTier > D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED) {
4800 ID3D12GraphicsCommandList8_ResolveSubresourceRegion(cmdbuf->cmdlist8,
4801 dst_img->res, dst_subres,
4802 0, 0,
4803 src_img->res, src_subres,
4804 NULL,
4805 format,
4806 dzn_get_resolve_mode(att->resolve.mode));
4807 } else {
4808 ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
4809 dst_img->res, dst_subres,
4810 src_img->res, src_subres,
4811 format);
4812 }
4813 }
4814 }
4815
4816 if (cmdbuf->enhanced_barriers) {
4817 dzn_cmd_buffer_restore_layout(cmdbuf, src_img,
4818 D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE,
4819 src_needed_layout, src_restore_layout,
4820 &src_range);
4821 dzn_cmd_buffer_restore_layout(cmdbuf, dst_img,
4822 D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_DEST,
4823 dst_needed_layout, dst_restore_layout,
4824 &dst_range);
4825 } else {
4826 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
4827 D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
4828 src_state,
4829 DZN_QUEUE_TRANSITION_FLUSH);
4830 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
4831 D3D12_RESOURCE_STATE_RESOLVE_DEST,
4832 dst_state,
4833 DZN_QUEUE_TRANSITION_FLUSH);
4834 }
4835 }
4836
4837 static void
dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,VkImageAspectFlagBits aspect)4838 dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
4839 const VkRenderingAttachmentInfo *att,
4840 VkImageAspectFlagBits aspect)
4841 {
4842 const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
4843 vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
4844 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4845
4846 if (!initial_layout || !iview)
4847 return;
4848
4849 struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
4850 VkImageSubresourceRange range = {
4851 .aspectMask = aspect,
4852 .baseMipLevel = iview->vk.base_mip_level,
4853 .levelCount = iview->vk.level_count,
4854 .baseArrayLayer = iview->vk.base_array_layer,
4855 .layerCount = iview->vk.layer_count,
4856 };
4857 if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
4858 range.baseArrayLayer = 0;
4859 range.layerCount = 1;
4860 }
4861
4862 if (cmdbuf->enhanced_barriers) {
4863 D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL;
4864 D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON;
4865 if (initial_layout->initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
4866 sync_before = D3D12_BARRIER_SYNC_NONE;
4867 access_before = D3D12_BARRIER_ACCESS_NO_ACCESS;
4868 }
4869
4870 D3D12_BARRIER_LAYOUT layout_before = dzn_vk_layout_to_d3d_layout(initial_layout->initialLayout, cmdbuf->type, aspect);
4871 D3D12_BARRIER_LAYOUT layout_after = dzn_vk_layout_to_d3d_layout(att->imageLayout, cmdbuf->type, aspect);
4872 if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) {
4873 layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED;
4874 layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED;
4875 }
4876
4877 dzn_cmd_buffer_image_barrier(cmdbuf, image,
4878 sync_before, D3D12_BARRIER_SYNC_DRAW,
4879 access_before, D3D12_BARRIER_ACCESS_COMMON,
4880 layout_before,
4881 layout_after,
4882 &range);
4883 } else {
4884 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
4885 initial_layout->initialLayout,
4886 att->imageLayout,
4887 DZN_QUEUE_TRANSITION_FLUSH);
4888 }
4889 }
4890
4891 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)4892 dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
4893 const VkRenderingInfo *pRenderingInfo)
4894 {
4895 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4896
4897 D3D12_RECT new_render_area = {
4898 .left = pRenderingInfo->renderArea.offset.x,
4899 .top = pRenderingInfo->renderArea.offset.y,
4900 .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
4901 .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
4902 };
4903
4904 // The render area has an impact on the scissor state.
4905 if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
4906 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
4907 cmdbuf->state.render.area = new_render_area;
4908 }
4909
4910 cmdbuf->state.render.flags = pRenderingInfo->flags;
4911 cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
4912 cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
4913
4914 D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
4915 D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
4916
4917 cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
4918 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
4919 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
4920 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
4921
4922 cmdbuf->state.render.attachments.colors[i].iview = iview;
4923 cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
4924 cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
4925 cmdbuf->state.render.attachments.colors[i].resolve.iview =
4926 dzn_image_view_from_handle(att->resolveImageView);
4927 cmdbuf->state.render.attachments.colors[i].resolve.layout =
4928 att->resolveImageLayout;
4929 cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
4930
4931 if (!iview) {
4932 rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
4933 continue;
4934 }
4935
4936 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4937 rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
4938 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4939 VK_IMAGE_ASPECT_COLOR_BIT);
4940 }
4941
4942 if (pRenderingInfo->pDepthAttachment) {
4943 const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
4944
4945 cmdbuf->state.render.attachments.depth.iview =
4946 dzn_image_view_from_handle(att->imageView);
4947 cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
4948 cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
4949 cmdbuf->state.render.attachments.depth.resolve.iview =
4950 dzn_image_view_from_handle(att->resolveImageView);
4951 cmdbuf->state.render.attachments.depth.resolve.layout =
4952 att->resolveImageLayout;
4953 cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
4954 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4955 VK_IMAGE_ASPECT_DEPTH_BIT);
4956 }
4957
4958 if (pRenderingInfo->pStencilAttachment) {
4959 const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
4960
4961 cmdbuf->state.render.attachments.stencil.iview =
4962 dzn_image_view_from_handle(att->imageView);
4963 cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
4964 cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
4965 cmdbuf->state.render.attachments.stencil.resolve.iview =
4966 dzn_image_view_from_handle(att->resolveImageView);
4967 cmdbuf->state.render.attachments.stencil.resolve.layout =
4968 att->resolveImageLayout;
4969 cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
4970 dzn_rendering_attachment_initial_transition(cmdbuf, att,
4971 VK_IMAGE_ASPECT_STENCIL_BIT);
4972 }
4973
4974 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
4975 struct dzn_image_view *z_iview =
4976 pRenderingInfo->pDepthAttachment ?
4977 dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
4978 NULL;
4979 struct dzn_image_view *s_iview =
4980 pRenderingInfo->pStencilAttachment ?
4981 dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
4982 NULL;
4983 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
4984 assert(!z_iview || !s_iview || z_iview == s_iview);
4985
4986 if (iview) {
4987 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
4988
4989 zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
4990 }
4991 }
4992
4993 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
4994 pRenderingInfo->colorAttachmentCount,
4995 pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
4996 false, zs_handle.ptr ? &zs_handle : NULL);
4997
4998 for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
4999 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
5000 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
5001
5002 if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
5003 !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5004 if (pRenderingInfo->viewMask != 0) {
5005 u_foreach_bit(layer, pRenderingInfo->viewMask) {
5006 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
5007 &att->clearValue,
5008 VK_IMAGE_ASPECT_COLOR_BIT, layer,
5009 1, 1, &cmdbuf->state.render.area);
5010 }
5011 } else {
5012 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
5013 &att->clearValue,
5014 VK_IMAGE_ASPECT_COLOR_BIT, 0,
5015 pRenderingInfo->layerCount, 1,
5016 &cmdbuf->state.render.area);
5017 }
5018 }
5019 }
5020
5021 if ((pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) &&
5022 !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) {
5023 const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
5024 const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
5025 struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
5026 struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
5027 struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
5028 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
5029
5030 assert(!z_iview || !s_iview || z_iview == s_iview);
5031
5032 VkImageAspectFlags aspects = 0;
5033 VkClearValue clear_val;
5034
5035 if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5036 aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
5037 clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
5038 layout = z_att->imageLayout;
5039 }
5040
5041 if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
5042 aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
5043 clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
5044 layout = s_att->imageLayout;
5045 }
5046
5047 if (aspects != 0) {
5048 if (pRenderingInfo->viewMask != 0) {
5049 u_foreach_bit(layer, pRenderingInfo->viewMask) {
5050 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5051 &clear_val, aspects, layer,
5052 1, 1, &cmdbuf->state.render.area);
5053 }
5054 } else {
5055 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
5056 &clear_val, aspects, 0,
5057 VK_REMAINING_ARRAY_LAYERS, 1,
5058 &cmdbuf->state.render.area);
5059 }
5060 }
5061 }
5062
5063 cmdbuf->state.multiview.num_views = MAX2(util_bitcount(pRenderingInfo->viewMask), 1);
5064 cmdbuf->state.multiview.view_mask = MAX2(pRenderingInfo->viewMask, 1);
5065 }
5066
5067 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndRendering(VkCommandBuffer commandBuffer)5068 dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
5069 {
5070 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5071
5072 if (!(cmdbuf->state.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
5073 for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
5074 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5075 &cmdbuf->state.render.attachments.colors[i],
5076 VK_IMAGE_ASPECT_COLOR_BIT, false);
5077 }
5078
5079 bool separate_stencil_resolve =
5080 cmdbuf->state.render.attachments.depth.resolve.mode !=
5081 cmdbuf->state.render.attachments.stencil.resolve.mode;
5082 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5083 &cmdbuf->state.render.attachments.depth,
5084 VK_IMAGE_ASPECT_DEPTH_BIT,
5085 separate_stencil_resolve);
5086 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
5087 &cmdbuf->state.render.attachments.stencil,
5088 VK_IMAGE_ASPECT_STENCIL_BIT,
5089 separate_stencil_resolve);
5090 }
5091
5092 memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
5093 }
5094
5095 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline pipe)5096 dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
5097 VkPipelineBindPoint pipelineBindPoint,
5098 VkPipeline pipe)
5099 {
5100 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5101 VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
5102
5103 cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
5104 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5105 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
5106 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
5107
5108 if (!gfx->vp.dynamic) {
5109 memcpy(cmdbuf->state.viewports, gfx->vp.desc,
5110 gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
5111 cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5112 cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5113 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5114 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5115 }
5116
5117 if (!gfx->scissor.dynamic) {
5118 memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
5119 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
5120 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5121 }
5122
5123 if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
5124 cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
5125 cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
5126 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
5127 }
5128
5129 if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
5130 cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
5131 cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
5132 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5133 }
5134
5135 if (!gfx->blend.dynamic_constants) {
5136 memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
5137 sizeof(cmdbuf->state.blend.constants));
5138 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5139 }
5140
5141 for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
5142 cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
5143
5144 if (gfx->vb.count > 0)
5145 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
5146 }
5147 }
5148
5149 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)5150 dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
5151 VkPipelineBindPoint pipelineBindPoint,
5152 VkPipelineLayout layout,
5153 uint32_t firstSet,
5154 uint32_t descriptorSetCount,
5155 const VkDescriptorSet *pDescriptorSets,
5156 uint32_t dynamicOffsetCount,
5157 const uint32_t *pDynamicOffsets)
5158 {
5159 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5160 VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
5161
5162 struct dzn_descriptor_state *desc_state =
5163 &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
5164 uint32_t dirty = 0;
5165
5166 for (uint32_t i = 0; i < descriptorSetCount; i++) {
5167 uint32_t idx = firstSet + i;
5168 VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
5169
5170 if (desc_state->sets[idx].set != set) {
5171 desc_state->sets[idx].set = set;
5172 dirty |= DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << idx;
5173 }
5174
5175 uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
5176 if (dynamic_buffer_count) {
5177 assert(dynamicOffsetCount >= dynamic_buffer_count);
5178
5179 for (uint32_t j = 0; j < dynamic_buffer_count; j++)
5180 desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
5181
5182 dynamicOffsetCount -= dynamic_buffer_count;
5183 pDynamicOffsets += dynamic_buffer_count;
5184 dirty |= DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS;
5185 }
5186 }
5187
5188 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
5189 }
5190
5191 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)5192 dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
5193 uint32_t firstViewport,
5194 uint32_t viewportCount,
5195 const VkViewport *pViewports)
5196 {
5197 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5198
5199 STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
5200
5201 for (uint32_t i = 0; i < viewportCount; i++) {
5202 uint32_t vp = i + firstViewport;
5203
5204 dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
5205
5206 if (pViewports[i].minDepth > pViewports[i].maxDepth)
5207 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5208 else
5209 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
5210
5211 if (pViewports[i].height > 0)
5212 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
5213 else
5214 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
5215 }
5216
5217 cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width;
5218 cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height;
5219
5220 if (viewportCount) {
5221 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
5222 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5223 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5224 }
5225 }
5226
5227 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)5228 dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
5229 uint32_t firstScissor,
5230 uint32_t scissorCount,
5231 const VkRect2D *pScissors)
5232 {
5233 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5234
5235 for (uint32_t i = 0; i < scissorCount; i++)
5236 dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
5237
5238 if (scissorCount)
5239 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
5240 }
5241
5242 VKAPI_ATTR void VKAPI_CALL
dzn_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)5243 dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
5244 VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
5245 const void *pValues)
5246 {
5247 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5248 struct dzn_cmd_buffer_push_constant_state *states[2];
5249 uint32_t num_states = 0;
5250
5251 if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
5252 states[num_states++] = &cmdbuf->state.push_constant.gfx;
5253
5254 if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
5255 states[num_states++] = &cmdbuf->state.push_constant.compute;
5256
5257 for (uint32_t i = 0; i < num_states; i++) {
5258 memcpy(((char *)states[i]->values) + offset, pValues, size);
5259 states[i]->offset =
5260 states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
5261 states[i]->end = MAX2(states[i]->end, offset + size);
5262 }
5263 }
5264
5265 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)5266 dzn_CmdDraw(VkCommandBuffer commandBuffer,
5267 uint32_t vertexCount,
5268 uint32_t instanceCount,
5269 uint32_t firstVertex,
5270 uint32_t firstInstance)
5271 {
5272 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5273
5274 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5275 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5276
5277 cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
5278 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5279
5280 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5281 1 : pipeline->multiview.view_mask;
5282
5283 if (pipeline->ia.triangle_fan) {
5284 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5285
5286 VkResult result =
5287 dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
5288 if (result != VK_SUCCESS || !vertexCount)
5289 return;
5290
5291 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5292 u_foreach_bit(view, view_mask) {
5293 cmdbuf->state.sysvals.gfx.view_index = view;
5294 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5295 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5296 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5297 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
5298 firstVertex, firstInstance);
5299 }
5300
5301 /* Restore the IB view if we modified it when lowering triangle fans. */
5302 if (ib_view.SizeInBytes > 0) {
5303 cmdbuf->state.ib.view = ib_view;
5304 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5305 }
5306 } else {
5307 cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
5308 u_foreach_bit(view, view_mask) {
5309 cmdbuf->state.sysvals.gfx.view_index = view;
5310 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5311 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5312 dzn_cmd_buffer_prepare_draw(cmdbuf, false);
5313 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
5314 firstVertex, firstInstance);
5315 }
5316 }
5317 }
5318
5319 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)5320 dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
5321 uint32_t indexCount,
5322 uint32_t instanceCount,
5323 uint32_t firstIndex,
5324 int32_t vertexOffset,
5325 uint32_t firstInstance)
5326 {
5327 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5328
5329 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
5330 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5331
5332 if (pipeline->ia.triangle_fan &&
5333 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
5334 /* The indexed+primitive-restart+triangle-fan combination is a mess,
5335 * since we have to walk the index buffer, skip entries with the
5336 * special 0xffff/0xffffffff values, and push triangle list indices
5337 * for the remaining values. All of this has an impact on the index
5338 * count passed to the draw call, which forces us to use the indirect
5339 * path.
5340 */
5341 D3D12_DRAW_INDEXED_ARGUMENTS params = {
5342 .IndexCountPerInstance = indexCount,
5343 .InstanceCount = instanceCount,
5344 .StartIndexLocation = firstIndex,
5345 .BaseVertexLocation = vertexOffset,
5346 .StartInstanceLocation = firstInstance,
5347 };
5348
5349 ID3D12Resource *draw_buf;
5350 uint64_t offset;
5351 VkResult result =
5352 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
5353 DZN_INTERNAL_BUF_UPLOAD,
5354 D3D12_RESOURCE_STATE_GENERIC_READ,
5355 4,
5356 &draw_buf, &offset);
5357 if (result != VK_SUCCESS)
5358 return;
5359
5360 void *cpu_ptr;
5361 ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
5362 memcpy((uint8_t *)cpu_ptr + offset, ¶ms, sizeof(params));
5363
5364 ID3D12Resource_Unmap(draw_buf, 0, NULL);
5365
5366 dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, offset, NULL, 0, 1, sizeof(params), true);
5367 return;
5368 }
5369
5370 cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
5371 cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
5372 cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
5373
5374 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
5375
5376 if (pipeline->ia.triangle_fan) {
5377 VkResult result =
5378 dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
5379 if (result != VK_SUCCESS || !indexCount)
5380 return;
5381 }
5382
5383 uint32_t view_mask = pipeline->multiview.native_view_instancing ?
5384 1 : pipeline->multiview.view_mask;
5385 u_foreach_bit(view, view_mask) {
5386 cmdbuf->state.sysvals.gfx.view_index = view;
5387 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
5388 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5389
5390 dzn_cmd_buffer_prepare_draw(cmdbuf, true);
5391 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
5392 vertexOffset, firstInstance);
5393 }
5394
5395 /* Restore the IB view if we modified it when lowering triangle fans. */
5396 if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
5397 cmdbuf->state.ib.view = ib_view;
5398 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5399 }
5400 }
5401
5402 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5403 dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
5404 VkBuffer buffer,
5405 VkDeviceSize offset,
5406 uint32_t drawCount,
5407 uint32_t stride)
5408 {
5409 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5410 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5411
5412 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
5413 }
5414
5415 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)5416 dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
5417 VkBuffer buffer,
5418 VkDeviceSize offset,
5419 uint32_t drawCount,
5420 uint32_t stride)
5421 {
5422 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5423 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5424
5425 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
5426 }
5427
5428 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5429 dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
5430 VkBuffer buffer,
5431 VkDeviceSize offset,
5432 VkBuffer countBuffer,
5433 VkDeviceSize countBufferOffset,
5434 uint32_t maxDrawCount,
5435 uint32_t stride)
5436 {
5437 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5438 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5439 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5440
5441 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5442 count_buf->res, countBufferOffset,
5443 maxDrawCount, stride, false);
5444 }
5445
5446 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)5447 dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
5448 VkBuffer buffer,
5449 VkDeviceSize offset,
5450 VkBuffer countBuffer,
5451 VkDeviceSize countBufferOffset,
5452 uint32_t maxDrawCount,
5453 uint32_t stride)
5454 {
5455 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5456 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5457 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
5458
5459 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
5460 count_buf->res, countBufferOffset,
5461 maxDrawCount, stride, true);
5462 }
5463
5464 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets)5465 dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
5466 uint32_t firstBinding,
5467 uint32_t bindingCount,
5468 const VkBuffer *pBuffers,
5469 const VkDeviceSize *pOffsets)
5470 {
5471 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5472
5473 if (!bindingCount)
5474 return;
5475
5476 D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
5477
5478 for (uint32_t i = 0; i < bindingCount; i++) {
5479 VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
5480
5481 vbviews[firstBinding + i].BufferLocation = buf->gpuva + pOffsets[i];
5482 vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
5483 }
5484
5485 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
5486 firstBinding + bindingCount - 1);
5487 }
5488
5489 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)5490 dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
5491 VkBuffer buffer,
5492 VkDeviceSize offset,
5493 VkIndexType indexType)
5494 {
5495 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5496 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5497
5498 cmdbuf->state.ib.view.BufferLocation = buf->gpuva + offset;
5499 cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
5500 switch (indexType) {
5501 case VK_INDEX_TYPE_UINT16:
5502 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
5503 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
5504 break;
5505 case VK_INDEX_TYPE_UINT32:
5506 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
5507 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
5508 break;
5509 default: unreachable("Invalid index type");
5510 }
5511
5512 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
5513
5514 const struct dzn_graphics_pipeline *pipeline =
5515 (const struct dzn_graphics_pipeline *)cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
5516
5517 if (pipeline &&
5518 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
5519 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5520 }
5521
5522 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,VkEvent event,VkPipelineStageFlags2 stageMask)5523 dzn_CmdResetEvent2(VkCommandBuffer commandBuffer,
5524 VkEvent event,
5525 VkPipelineStageFlags2 stageMask)
5526 {
5527 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5528 VK_FROM_HANDLE(dzn_event, evt, event);
5529
5530 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
5531 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5532 }
5533
5534 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,VkEvent event,const VkDependencyInfo * pDependencyInfo)5535 dzn_CmdSetEvent2(VkCommandBuffer commandBuffer,
5536 VkEvent event,
5537 const VkDependencyInfo *pDependencyInfo)
5538 {
5539 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5540 VK_FROM_HANDLE(dzn_event, evt, event);
5541
5542 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
5543 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5544 }
5545
5546 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfo)5547 dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer,
5548 uint32_t eventCount,
5549 const VkEvent *pEvents,
5550 const VkDependencyInfo *pDependencyInfo)
5551 {
5552 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5553
5554 /* Intra-command list wait is handle by this pipeline flush, which is
5555 * overkill, but that's the best we can do with the standard D3D12 barrier
5556 * API.
5557 *
5558 * Inter-command list is taken care of by the serialization done at the
5559 * ExecuteCommandList() level:
5560 * "Calling ExecuteCommandLists twice in succession (from the same thread,
5561 * or different threads) guarantees that the first workload (A) finishes
5562 * before the second workload (B)"
5563 *
5564 * HOST -> DEVICE signaling is ignored and we assume events are always
5565 * signaled when we reach the vkCmdWaitEvents() point.:
5566 * "Command buffers in the submission can include vkCmdWaitEvents commands
5567 * that wait on events that will not be signaled by earlier commands in the
5568 * queue. Such events must be signaled by the application using vkSetEvent,
5569 * and the vkCmdWaitEvents commands that wait upon them must not be inside
5570 * a render pass instance.
5571 * The event must be set before the vkCmdWaitEvents command is executed."
5572 */
5573 bool flush_pipeline = false;
5574
5575 for (uint32_t i = 0; i < eventCount; i++) {
5576 VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
5577
5578 struct hash_entry *he =
5579 _mesa_hash_table_search(cmdbuf->events.ht, event);
5580 if (he) {
5581 enum dzn_event_state state = (uintptr_t)he->data;
5582 assert(state != DZN_EVENT_STATE_RESET);
5583 flush_pipeline = state == DZN_EVENT_STATE_SET;
5584 }
5585 }
5586
5587 if (flush_pipeline) {
5588 if (cmdbuf->enhanced_barriers) {
5589 dzn_cmd_buffer_global_barrier(cmdbuf,
5590 D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL,
5591 D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON);
5592 } else {
5593 D3D12_RESOURCE_BARRIER barrier = {
5594 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5595 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5596 .UAV = {.pResource = NULL },
5597 };
5598
5599 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5600 }
5601 }
5602 cmdbuf->vk.base.device->dispatch_table.CmdPipelineBarrier2(
5603 vk_command_buffer_to_handle(&cmdbuf->vk),
5604 pDependencyInfo);
5605 }
5606
5607 VKAPI_ATTR void VKAPI_CALL
dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)5608 dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
5609 VkQueryPool queryPool,
5610 uint32_t query,
5611 VkQueryControlFlags flags)
5612 {
5613 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5614 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5615
5616 struct dzn_cmd_buffer_query_pool_state *state =
5617 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5618 if (!state)
5619 return;
5620
5621 for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5622 qpool->queries[query + i].type = dzn_query_pool_get_query_type(qpool, flags);
5623
5624 ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5625
5626 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, query, cmdbuf->state.multiview.num_views);
5627 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, query, cmdbuf->state.multiview.num_views);
5628 }
5629
5630 VKAPI_ATTR void VKAPI_CALL
dzn_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)5631 dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
5632 VkQueryPool queryPool,
5633 uint32_t query)
5634 {
5635 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5636 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5637
5638 struct dzn_cmd_buffer_query_pool_state *state =
5639 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5640 if (!state)
5641 return;
5642
5643 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5644
5645 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5646 if (cmdbuf->state.multiview.num_views > 1)
5647 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5648 }
5649
5650 VKAPI_ATTR void VKAPI_CALL
dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)5651 dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
5652 VkPipelineStageFlags2 stage,
5653 VkQueryPool queryPool,
5654 uint32_t query)
5655 {
5656 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5657 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5658
5659 struct dzn_cmd_buffer_query_pool_state *state =
5660 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5661 if (!state)
5662 return;
5663
5664 /* Execution barrier so the timestamp gets written after the pipeline flush. */
5665 D3D12_RESOURCE_BARRIER barrier = {
5666 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
5667 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
5668 .UAV = { .pResource = NULL },
5669 };
5670
5671 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
5672
5673 for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i)
5674 qpool->queries[query + i].type = D3D12_QUERY_TYPE_TIMESTAMP;
5675 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
5676
5677 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
5678 if (cmdbuf->state.multiview.num_views > 1)
5679 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1);
5680 }
5681
5682
5683 VKAPI_ATTR void VKAPI_CALL
dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)5684 dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
5685 VkQueryPool queryPool,
5686 uint32_t firstQuery,
5687 uint32_t queryCount)
5688 {
5689 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5690 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5691 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5692
5693 struct dzn_cmd_buffer_query_pool_state *state =
5694 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5695
5696 if (!state)
5697 return;
5698
5699 uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
5700
5701 for (uint32_t q = 0; q < queryCount; q += q_step) {
5702 uint32_t q_count = MIN2(queryCount - q, q_step);
5703
5704 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5705 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5706 device->queries.refs,
5707 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5708 q_count * sizeof(uint64_t));
5709 }
5710
5711 q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
5712
5713 for (uint32_t q = 0; q < queryCount; q += q_step) {
5714 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
5715 dzn_query_pool_get_result_offset(qpool, firstQuery + q),
5716 device->queries.refs,
5717 DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
5718 qpool->query_size);
5719 }
5720
5721 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
5722 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
5723 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, firstQuery, queryCount);
5724 }
5725
5726 VKAPI_ATTR void VKAPI_CALL
dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)5727 dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
5728 VkQueryPool queryPool,
5729 uint32_t firstQuery,
5730 uint32_t queryCount,
5731 VkBuffer dstBuffer,
5732 VkDeviceSize dstOffset,
5733 VkDeviceSize stride,
5734 VkQueryResultFlags flags)
5735 {
5736 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5737 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
5738 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
5739
5740 struct dzn_cmd_buffer_query_pool_state *qpstate =
5741 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
5742 if (!qpstate)
5743 return;
5744
5745 VkResult result =
5746 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
5747 if (result != VK_SUCCESS)
5748 return;
5749
5750 bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
5751 stride == qpool->query_size &&
5752 !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
5753 #define ALL_STATS \
5754 (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
5755 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
5756 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
5757 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
5758 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
5759 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
5760 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
5761 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
5762 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
5763 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
5764 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
5765 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
5766 qpool->pipeline_statistics != ALL_STATS)
5767 raw_copy = false;
5768 #undef ALL_STATS
5769
5770 if (cmdbuf->enhanced_barriers) {
5771 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
5772 dzn_cmd_buffer_buffer_barrier(cmdbuf, qpool->collect_buffer,
5773 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY,
5774 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5775 }
5776 } else {
5777 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5778 D3D12_RESOURCE_STATE_COPY_DEST,
5779 D3D12_RESOURCE_STATE_COPY_SOURCE,
5780 DZN_QUEUE_TRANSITION_FLUSH);
5781 }
5782
5783 if (raw_copy) {
5784 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5785 qpool->collect_buffer,
5786 dzn_query_pool_get_result_offset(qpool, firstQuery),
5787 dzn_query_pool_get_result_size(qpool, queryCount));
5788 } else {
5789 uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
5790
5791 for (uint32_t q = 0; q < queryCount; q++) {
5792 uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
5793 uint32_t dst_counter_offset = 0;
5794
5795 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
5796 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
5797 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
5798 continue;
5799
5800 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5801 qpool->collect_buffer,
5802 res_offset + (c * sizeof(uint64_t)),
5803 step);
5804 dst_counter_offset += step;
5805 }
5806 } else {
5807 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
5808 qpool->collect_buffer,
5809 res_offset, step);
5810 dst_counter_offset += step;
5811 }
5812
5813 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
5814 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
5815 qpool->collect_buffer,
5816 dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
5817 step);
5818 }
5819
5820 dstOffset += stride;
5821 }
5822 }
5823
5824 if (!cmdbuf->enhanced_barriers) {
5825 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
5826 D3D12_RESOURCE_STATE_COPY_SOURCE,
5827 D3D12_RESOURCE_STATE_COPY_DEST,
5828 0);
5829 }
5830 }
5831
5832 VKAPI_ATTR void VKAPI_CALL
dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)5833 dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
5834 VkBuffer buffer,
5835 VkDeviceSize offset)
5836 {
5837 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5838 VK_FROM_HANDLE(dzn_buffer, buf, buffer);
5839
5840 cmdbuf->state.sysvals.compute.group_count_x = 0;
5841 cmdbuf->state.sysvals.compute.group_count_y = 0;
5842 cmdbuf->state.sysvals.compute.group_count_z = 0;
5843 cmdbuf->state.sysvals.compute.base_group_x = 0;
5844 cmdbuf->state.sysvals.compute.base_group_y = 0;
5845 cmdbuf->state.sysvals.compute.base_group_z = 0;
5846 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
5847 DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
5848
5849 dzn_cmd_buffer_prepare_dispatch(cmdbuf);
5850
5851 struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
5852 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
5853 ID3D12CommandSignature *cmdsig =
5854 dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
5855
5856 if (!cmdsig) {
5857 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
5858 return;
5859 }
5860
5861 ID3D12Resource *exec_buf;
5862 VkResult result =
5863 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
5864 DZN_INTERNAL_BUF_DEFAULT,
5865 D3D12_RESOURCE_STATE_COPY_DEST,
5866 0,
5867 &exec_buf, NULL);
5868 if (result != VK_SUCCESS)
5869 return;
5870
5871 if (cmdbuf->enhanced_barriers) {
5872 dzn_cmd_buffer_buffer_barrier(cmdbuf, buf->res,
5873 D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, D3D12_BARRIER_SYNC_COPY,
5874 D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_ACCESS_COPY_SOURCE);
5875 } else {
5876 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, buf->res, 0, 1,
5877 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5878 D3D12_RESOURCE_STATE_COPY_SOURCE,
5879 DZN_QUEUE_TRANSITION_FLUSH);
5880 }
5881
5882 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
5883 buf->res,
5884 offset,
5885 sizeof(D3D12_DISPATCH_ARGUMENTS));
5886 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
5887 buf->res,
5888 offset,
5889 sizeof(D3D12_DISPATCH_ARGUMENTS));
5890
5891 if (cmdbuf->enhanced_barriers) {
5892 dzn_cmd_buffer_buffer_barrier(cmdbuf, exec_buf,
5893 D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT,
5894 D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT);
5895 } else {
5896 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
5897 D3D12_RESOURCE_STATE_COPY_DEST,
5898 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
5899 DZN_QUEUE_TRANSITION_FLUSH);
5900 }
5901
5902 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
5903 }
5904
5905 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)5906 dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
5907 float lineWidth)
5908 {
5909 assert(lineWidth == 1.0f);
5910 }
5911
5912 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)5913 dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
5914 float depthBiasConstantFactor,
5915 float depthBiasClamp,
5916 float depthBiasSlopeFactor)
5917 {
5918 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5919 struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk);
5920
5921 cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
5922 cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
5923 cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
5924 cmdbuf->state.sysvals.gfx.depth_bias = depthBiasConstantFactor;
5925 if (pdev->options16.DynamicDepthBiasSupported)
5926 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BIAS;
5927 else
5928 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5929 }
5930
5931 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])5932 dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
5933 const float blendConstants[4])
5934 {
5935 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5936
5937 memcpy(cmdbuf->state.blend.constants, blendConstants,
5938 sizeof(cmdbuf->state.blend.constants));
5939 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
5940 }
5941
5942 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)5943 dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
5944 float minDepthBounds,
5945 float maxDepthBounds)
5946 {
5947 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5948 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
5949 struct dzn_physical_device *pdev =
5950 container_of(device->vk.physical, struct dzn_physical_device, vk);
5951
5952 if (pdev->options2.DepthBoundsTestSupported) {
5953 cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
5954 cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
5955 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
5956 }
5957 }
5958
5959 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)5960 dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
5961 VkStencilFaceFlags faceMask,
5962 uint32_t compareMask)
5963 {
5964 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5965
5966 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5967 cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
5968 cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
5969 }
5970
5971 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5972 cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
5973 cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
5974 }
5975
5976 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
5977 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5978 }
5979
5980 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)5981 dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
5982 VkStencilFaceFlags faceMask,
5983 uint32_t writeMask)
5984 {
5985 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
5986
5987 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5988 cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
5989 cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
5990 }
5991
5992 if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5993 cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
5994 cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
5995 }
5996
5997 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
5998 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
5999 }
6000
6001 VKAPI_ATTR void VKAPI_CALL
dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)6002 dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
6003 VkStencilFaceFlags faceMask,
6004 uint32_t reference)
6005 {
6006 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
6007
6008 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
6009 cmdbuf->state.zsa.stencil_test.front.ref = reference;
6010
6011 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
6012 cmdbuf->state.zsa.stencil_test.back.ref = reference;
6013
6014 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
6015 }
6016