1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <fcntl.h>
28 #include "drm-uapi/drm_fourcc.h"
29 #include "drm-uapi/drm.h"
30 #include <xf86drm.h>
31
32 #include "anv_private.h"
33 #include "anv_measure.h"
34 #include "util/u_debug.h"
35 #include "util/os_file.h"
36 #include "util/os_misc.h"
37 #include "util/u_atomic.h"
38 #if DETECT_OS_ANDROID
39 #include "util/u_gralloc/u_gralloc.h"
40 #endif
41 #include "util/u_string.h"
42 #include "vk_common_entrypoints.h"
43 #include "vk_util.h"
44 #include "vk_deferred_operation.h"
45 #include "vk_drm_syncobj.h"
46 #include "common/intel_aux_map.h"
47 #include "common/intel_common.h"
48 #include "common/intel_debug_identifier.h"
49
50 #include "i915/anv_device.h"
51 #include "xe/anv_device.h"
52
53 #include "genxml/gen7_pack.h"
54 #include "genxml/genX_bits.h"
55
56 static void
anv_device_init_border_colors(struct anv_device * device)57 anv_device_init_border_colors(struct anv_device *device)
58 {
59 static const struct gfx8_border_color border_colors[] = {
60 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
61 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
62 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
63 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
64 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
65 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
66 };
67
68 device->border_colors =
69 anv_state_pool_emit_data(&device->dynamic_state_pool,
70 sizeof(border_colors), 64, border_colors);
71 }
72
73 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)74 anv_device_init_trivial_batch(struct anv_device *device)
75 {
76 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
77 ANV_BO_ALLOC_MAPPED |
78 ANV_BO_ALLOC_HOST_COHERENT |
79 ANV_BO_ALLOC_INTERNAL |
80 ANV_BO_ALLOC_CAPTURE,
81 0 /* explicit_address */,
82 &device->trivial_batch_bo);
83 if (result != VK_SUCCESS)
84 return result;
85
86 struct anv_batch batch = {
87 .start = device->trivial_batch_bo->map,
88 .next = device->trivial_batch_bo->map,
89 .end = device->trivial_batch_bo->map + 4096,
90 };
91
92 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
93 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
94
95 return VK_SUCCESS;
96 }
97
98 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)99 get_bo_from_pool(struct intel_batch_decode_bo *ret,
100 struct anv_block_pool *pool,
101 uint64_t address)
102 {
103 anv_block_pool_foreach_bo(bo, pool) {
104 uint64_t bo_address = intel_48b_address(bo->offset);
105 if (address >= bo_address && address < (bo_address + bo->size)) {
106 *ret = (struct intel_batch_decode_bo) {
107 .addr = bo_address,
108 .size = bo->size,
109 .map = bo->map,
110 };
111 return true;
112 }
113 }
114 return false;
115 }
116
117 /* Finding a buffer for batch decoding */
118 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)119 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
120 {
121 struct anv_device *device = v_batch;
122 struct intel_batch_decode_bo ret_bo = {};
123
124 assert(ppgtt);
125
126 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
127 return ret_bo;
128 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
129 return ret_bo;
130 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
131 return ret_bo;
132 if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
133 return ret_bo;
134 if (device->physical->indirect_descriptors &&
135 get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
136 return ret_bo;
137 if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
138 return ret_bo;
139 if (device->physical->indirect_descriptors &&
140 get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
141 return ret_bo;
142 if (device->info->has_aux_map &&
143 get_bo_from_pool(&ret_bo, &device->aux_tt_pool.block_pool, address))
144 return ret_bo;
145
146 if (!device->cmd_buffer_being_decoded)
147 return (struct intel_batch_decode_bo) { };
148
149 struct anv_batch_bo **bbo;
150 u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
151 /* The decoder zeroes out the top 16 bits, so we need to as well */
152 uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);
153
154 if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
155 return (struct intel_batch_decode_bo) {
156 .addr = bo_address,
157 .size = (*bbo)->bo->size,
158 .map = (*bbo)->bo->map,
159 };
160 }
161
162 uint32_t dep_words = (*bbo)->relocs.dep_words;
163 BITSET_WORD *deps = (*bbo)->relocs.deps;
164 for (uint32_t w = 0; w < dep_words; w++) {
165 BITSET_WORD mask = deps[w];
166 while (mask) {
167 int i = u_bit_scan(&mask);
168 uint32_t gem_handle = w * BITSET_WORDBITS + i;
169 struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
170 assert(bo->refcount > 0);
171 bo_address = bo->offset & (~0ull >> 16);
172 if (address >= bo_address && address < bo_address + bo->size) {
173 return (struct intel_batch_decode_bo) {
174 .addr = bo_address,
175 .size = bo->size,
176 .map = bo->map,
177 };
178 }
179 }
180 }
181 }
182
183 return (struct intel_batch_decode_bo) { };
184 }
185
186 struct intel_aux_map_buffer {
187 struct intel_buffer base;
188 struct anv_state state;
189 };
190
191 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)192 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
193 {
194 struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
195 if (!buf)
196 return NULL;
197
198 struct anv_device *device = (struct anv_device*)driver_ctx;
199
200 struct anv_state_pool *pool = &device->aux_tt_pool;
201 buf->state = anv_state_pool_alloc(pool, size, size);
202
203 buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
204 buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
205 buf->base.map = buf->state.map;
206 buf->base.driver_bo = &buf->state;
207 return &buf->base;
208 }
209
210 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)211 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
212 {
213 struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
214 struct anv_device *device = (struct anv_device*)driver_ctx;
215 struct anv_state_pool *pool = &device->aux_tt_pool;
216 anv_state_pool_free(pool, buf->state);
217 free(buf);
218 }
219
220 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
221 .alloc = intel_aux_map_buffer_alloc,
222 .free = intel_aux_map_buffer_free,
223 };
224
225 static VkResult
anv_device_setup_context_or_vm(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)226 anv_device_setup_context_or_vm(struct anv_device *device,
227 const VkDeviceCreateInfo *pCreateInfo,
228 const uint32_t num_queues)
229 {
230 switch (device->info->kmd_type) {
231 case INTEL_KMD_TYPE_I915:
232 return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
233 case INTEL_KMD_TYPE_XE:
234 return anv_xe_device_setup_vm(device);
235 default:
236 unreachable("Missing");
237 return VK_ERROR_UNKNOWN;
238 }
239 }
240
241 static bool
anv_device_destroy_context_or_vm(struct anv_device * device)242 anv_device_destroy_context_or_vm(struct anv_device *device)
243 {
244 switch (device->info->kmd_type) {
245 case INTEL_KMD_TYPE_I915:
246 if (device->physical->has_vm_control)
247 return anv_i915_device_destroy_vm(device);
248 else
249 return intel_gem_destroy_context(device->fd, device->context_id);
250 case INTEL_KMD_TYPE_XE:
251 return anv_xe_device_destroy_vm(device);
252 default:
253 unreachable("Missing");
254 return false;
255 }
256 }
257
258 static VkResult
anv_device_init_trtt(struct anv_device * device)259 anv_device_init_trtt(struct anv_device *device)
260 {
261 if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
262 !device->vk.enabled_features.sparseBinding)
263 return VK_SUCCESS;
264
265 struct anv_trtt *trtt = &device->trtt;
266
267 VkResult result =
268 vk_sync_create(&device->vk,
269 &device->physical->sync_syncobj_type,
270 VK_SYNC_IS_TIMELINE,
271 0 /* initial_value */,
272 &trtt->timeline);
273 if (result != VK_SUCCESS)
274 return result;
275
276 simple_mtx_init(&trtt->mutex, mtx_plain);
277
278 list_inithead(&trtt->in_flight_batches);
279
280 return VK_SUCCESS;
281 }
282
283 static void
anv_device_finish_trtt(struct anv_device * device)284 anv_device_finish_trtt(struct anv_device *device)
285 {
286 if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
287 !device->vk.enabled_features.sparseBinding)
288 return;
289
290 struct anv_trtt *trtt = &device->trtt;
291
292 anv_sparse_trtt_garbage_collect_batches(device, true);
293
294 vk_sync_destroy(&device->vk, trtt->timeline);
295
296 simple_mtx_destroy(&trtt->mutex);
297
298 vk_free(&device->vk.alloc, trtt->l3_mirror);
299 vk_free(&device->vk.alloc, trtt->l2_mirror);
300
301 for (int i = 0; i < trtt->num_page_table_bos; i++)
302 anv_device_release_bo(device, trtt->page_table_bos[i]);
303
304 vk_free(&device->vk.alloc, trtt->page_table_bos);
305 }
306
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)307 VkResult anv_CreateDevice(
308 VkPhysicalDevice physicalDevice,
309 const VkDeviceCreateInfo* pCreateInfo,
310 const VkAllocationCallbacks* pAllocator,
311 VkDevice* pDevice)
312 {
313 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
314 VkResult result;
315 struct anv_device *device;
316
317 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
318
319 /* Check requested queues and fail if we are requested to create any
320 * queues with flags we don't support.
321 */
322 assert(pCreateInfo->queueCreateInfoCount > 0);
323 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
324 if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
325 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
326 }
327
328 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
329 sizeof(*device), 8,
330 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
331 if (!device)
332 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
333
334 struct vk_device_dispatch_table dispatch_table;
335
336 bool override_initial_entrypoints = true;
337 if (physical_device->instance->vk.app_info.app_name &&
338 !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
339 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
340 &anv_hitman3_device_entrypoints,
341 true);
342 override_initial_entrypoints = false;
343 }
344 if (physical_device->info.ver < 12 &&
345 physical_device->instance->vk.app_info.app_name &&
346 !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
347 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
348 &anv_doom64_device_entrypoints,
349 true);
350 override_initial_entrypoints = false;
351 }
352 #if DETECT_OS_ANDROID
353 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
354 &anv_android_device_entrypoints,
355 true);
356 override_initial_entrypoints = false;
357 #endif
358 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
359 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
360 &anv_rmv_device_entrypoints,
361 true);
362 override_initial_entrypoints = false;
363 }
364 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
365 anv_genX(&physical_device->info, device_entrypoints),
366 override_initial_entrypoints);
367 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
368 &anv_device_entrypoints, false);
369 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
370 &wsi_device_entrypoints, false);
371
372
373 result = vk_device_init(&device->vk, &physical_device->vk,
374 &dispatch_table, pCreateInfo, pAllocator);
375 if (result != VK_SUCCESS)
376 goto fail_alloc;
377
378 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
379 for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
380 struct intel_batch_decode_ctx *decoder = &device->decoder[i];
381
382 const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
383
384 intel_batch_decode_ctx_init_brw(decoder,
385 &physical_device->compiler->isa,
386 &physical_device->info,
387 stderr, decode_flags, NULL,
388 decode_get_bo, NULL, device);
389 intel_batch_stats_reset(decoder);
390
391 decoder->engine = physical_device->queue.families[i].engine_class;
392 decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
393 decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
394 decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
395 }
396 }
397
398 anv_device_set_physical(device, physical_device);
399 device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);
400
401 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
402 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
403 if (device->fd == -1) {
404 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
405 goto fail_device;
406 }
407
408 switch (device->info->kmd_type) {
409 case INTEL_KMD_TYPE_I915:
410 device->vk.check_status = anv_i915_device_check_status;
411 break;
412 case INTEL_KMD_TYPE_XE:
413 device->vk.check_status = anv_xe_device_check_status;
414 break;
415 default:
416 unreachable("Missing");
417 }
418
419 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
420 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
421 if (physical_device->info.kmd_type == INTEL_KMD_TYPE_I915)
422 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
423 vk_device_set_drm_fd(&device->vk, device->fd);
424
425 uint32_t num_queues = 0;
426 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
427 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
428
429 result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
430 if (result != VK_SUCCESS)
431 goto fail_fd;
432
433 device->queues =
434 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
435 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
436 if (device->queues == NULL) {
437 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
438 goto fail_context_id;
439 }
440
441 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
442 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
443 goto fail_queues_alloc;
444 }
445
446 /* keep the page with address zero out of the allocator */
447 util_vma_heap_init(&device->vma_lo,
448 device->physical->va.low_heap.addr,
449 device->physical->va.low_heap.size);
450
451 util_vma_heap_init(&device->vma_hi,
452 device->physical->va.high_heap.addr,
453 device->physical->va.high_heap.size);
454
455 if (device->physical->indirect_descriptors) {
456 util_vma_heap_init(&device->vma_desc,
457 device->physical->va.indirect_descriptor_pool.addr,
458 device->physical->va.indirect_descriptor_pool.size);
459 } else {
460 util_vma_heap_init(&device->vma_desc,
461 device->physical->va.bindless_surface_state_pool.addr,
462 device->physical->va.bindless_surface_state_pool.size);
463 }
464
465 /* Always initialized because the the memory types point to this and they
466 * are on the physical device.
467 */
468 util_vma_heap_init(&device->vma_dynamic_visible,
469 device->physical->va.dynamic_visible_pool.addr,
470 device->physical->va.dynamic_visible_pool.size);
471 util_vma_heap_init(&device->vma_trtt,
472 device->physical->va.trtt.addr,
473 device->physical->va.trtt.size);
474
475 list_inithead(&device->memory_objects);
476 list_inithead(&device->image_private_objects);
477
478 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
479 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
480 goto fail_vmas;
481 }
482
483 pthread_condattr_t condattr;
484 if (pthread_condattr_init(&condattr) != 0) {
485 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
486 goto fail_mutex;
487 }
488 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
489 pthread_condattr_destroy(&condattr);
490 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
491 goto fail_mutex;
492 }
493 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
494 pthread_condattr_destroy(&condattr);
495 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
496 goto fail_mutex;
497 }
498 pthread_condattr_destroy(&condattr);
499
500 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
501 anv_memory_trace_init(device);
502
503 result = anv_bo_cache_init(&device->bo_cache, device);
504 if (result != VK_SUCCESS)
505 goto fail_queue_cond;
506
507 anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
508 ANV_BO_ALLOC_MAPPED |
509 ANV_BO_ALLOC_HOST_CACHED_COHERENT |
510 ANV_BO_ALLOC_CAPTURE);
511 if (device->vk.enabled_extensions.KHR_acceleration_structure) {
512 anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
513 0 /* alloc_flags */);
514 }
515
516 /* Because scratch is also relative to General State Base Address, we leave
517 * the base address 0 and start the pool memory at an offset. This way we
518 * get the correct offsets in the anv_states that get allocated from it.
519 */
520 result = anv_state_pool_init(&device->general_state_pool, device,
521 &(struct anv_state_pool_params) {
522 .name = "general pool",
523 .base_address = 0,
524 .start_offset = device->physical->va.general_state_pool.addr,
525 .block_size = 16384,
526 .max_size = device->physical->va.general_state_pool.size
527 });
528 if (result != VK_SUCCESS)
529 goto fail_batch_bo_pool;
530
531 result = anv_state_pool_init(&device->dynamic_state_pool, device,
532 &(struct anv_state_pool_params) {
533 .name = "dynamic pool",
534 .base_address = device->physical->va.dynamic_state_pool.addr,
535 .block_size = 16384,
536 .max_size = device->physical->va.dynamic_state_pool.size,
537 });
538 if (result != VK_SUCCESS)
539 goto fail_general_state_pool;
540
541 /* The border color pointer is limited to 24 bits, so we need to make
542 * sure that any such color used at any point in the program doesn't
543 * exceed that limit.
544 * We achieve that by reserving all the custom border colors we support
545 * right off the bat, so they are close to the base address.
546 */
547 result = anv_state_reserved_array_pool_init(&device->custom_border_colors,
548 &device->dynamic_state_pool,
549 MAX_CUSTOM_BORDER_COLORS,
550 sizeof(struct gfx8_border_color), 64);
551 if (result != VK_SUCCESS)
552 goto fail_dynamic_state_pool;
553
554 result = anv_state_pool_init(&device->instruction_state_pool, device,
555 &(struct anv_state_pool_params) {
556 .name = "instruction pool",
557 .base_address = device->physical->va.instruction_state_pool.addr,
558 .block_size = 16384,
559 .max_size = device->physical->va.instruction_state_pool.size,
560 });
561 if (result != VK_SUCCESS)
562 goto fail_custom_border_color_pool;
563
564 if (device->info->verx10 >= 125) {
565 /* Put the scratch surface states at the beginning of the internal
566 * surface state pool.
567 */
568 result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
569 &(struct anv_state_pool_params) {
570 .name = "scratch surface state pool",
571 .base_address = device->physical->va.scratch_surface_state_pool.addr,
572 .block_size = 4096,
573 .max_size = device->physical->va.scratch_surface_state_pool.size,
574 });
575 if (result != VK_SUCCESS)
576 goto fail_instruction_state_pool;
577
578 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
579 &(struct anv_state_pool_params) {
580 .name = "internal surface state pool",
581 .base_address = device->physical->va.internal_surface_state_pool.addr,
582 .start_offset = device->physical->va.scratch_surface_state_pool.size,
583 .block_size = 4096,
584 .max_size = device->physical->va.internal_surface_state_pool.size,
585 });
586 } else {
587 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
588 &(struct anv_state_pool_params) {
589 .name = "internal surface state pool",
590 .base_address = device->physical->va.internal_surface_state_pool.addr,
591 .block_size = 4096,
592 .max_size = device->physical->va.internal_surface_state_pool.size,
593 });
594 }
595 if (result != VK_SUCCESS)
596 goto fail_scratch_surface_state_pool;
597
598 if (device->physical->indirect_descriptors) {
599 result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
600 &(struct anv_state_pool_params) {
601 .name = "bindless surface state pool",
602 .base_address = device->physical->va.bindless_surface_state_pool.addr,
603 .block_size = 4096,
604 .max_size = device->physical->va.bindless_surface_state_pool.size,
605 });
606 if (result != VK_SUCCESS)
607 goto fail_internal_surface_state_pool;
608 }
609
610 if (device->info->verx10 >= 125) {
611 /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
612 * table its own base address separately from surface state base.
613 */
614 result = anv_state_pool_init(&device->binding_table_pool, device,
615 &(struct anv_state_pool_params) {
616 .name = "binding table pool",
617 .base_address = device->physical->va.binding_table_pool.addr,
618 .block_size = BINDING_TABLE_POOL_BLOCK_SIZE,
619 .max_size = device->physical->va.binding_table_pool.size,
620 });
621 } else {
622 /* The binding table should be in front of the surface states in virtual
623 * address space so that all surface states can be express as relative
624 * offsets from the binding table location.
625 */
626 assert(device->physical->va.binding_table_pool.addr <
627 device->physical->va.internal_surface_state_pool.addr);
628 int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
629 (int64_t)device->physical->va.internal_surface_state_pool.addr;
630 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
631 result = anv_state_pool_init(&device->binding_table_pool, device,
632 &(struct anv_state_pool_params) {
633 .name = "binding table pool",
634 .base_address = device->physical->va.internal_surface_state_pool.addr,
635 .start_offset = bt_pool_offset,
636 .block_size = BINDING_TABLE_POOL_BLOCK_SIZE,
637 .max_size = device->physical->va.internal_surface_state_pool.size,
638 });
639 }
640 if (result != VK_SUCCESS)
641 goto fail_bindless_surface_state_pool;
642
643 if (device->physical->indirect_descriptors) {
644 result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
645 &(struct anv_state_pool_params) {
646 .name = "indirect push descriptor pool",
647 .base_address = device->physical->va.indirect_push_descriptor_pool.addr,
648 .block_size = 4096,
649 .max_size = device->physical->va.indirect_push_descriptor_pool.size,
650 });
651 if (result != VK_SUCCESS)
652 goto fail_binding_table_pool;
653 }
654
655 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
656 device->info->verx10 >= 125) {
657 /* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
658 * way we can wire push descriptors is through the bindless heap. This
659 * state pool is a 1Gb carve out of the 4Gb HW heap.
660 */
661 result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
662 &(struct anv_state_pool_params) {
663 .name = "push descriptor buffer state pool",
664 .base_address = device->physical->va.push_descriptor_buffer_pool.addr,
665 .block_size = 4096,
666 .max_size = device->physical->va.push_descriptor_buffer_pool.size,
667 });
668 if (result != VK_SUCCESS)
669 goto fail_indirect_push_descriptor_pool;
670 }
671
672 if (device->info->has_aux_map) {
673 result = anv_state_pool_init(&device->aux_tt_pool, device,
674 &(struct anv_state_pool_params) {
675 .name = "aux-tt pool",
676 .base_address = device->physical->va.aux_tt_pool.addr,
677 .block_size = 16384,
678 .max_size = device->physical->va.aux_tt_pool.size,
679 });
680 if (result != VK_SUCCESS)
681 goto fail_push_descriptor_buffer_pool;
682
683 device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
684 &physical_device->info);
685 if (!device->aux_map_ctx)
686 goto fail_aux_tt_pool;
687 }
688
689 result = anv_device_alloc_bo(device, "workaround", 8192,
690 ANV_BO_ALLOC_CAPTURE |
691 ANV_BO_ALLOC_HOST_COHERENT |
692 ANV_BO_ALLOC_MAPPED |
693 ANV_BO_ALLOC_INTERNAL,
694 0 /* explicit_address */,
695 &device->workaround_bo);
696 if (result != VK_SUCCESS)
697 goto fail_surface_aux_map_pool;
698
699 if (intel_needs_workaround(device->info, 14019708328)) {
700 result = anv_device_alloc_bo(device, "dummy_aux", 4096,
701 0 /* alloc_flags */,
702 0 /* explicit_address */,
703 &device->dummy_aux_bo);
704 if (result != VK_SUCCESS)
705 goto fail_workaround_bo;
706
707 device->isl_dev.dummy_aux_address = device->dummy_aux_bo->offset;
708 }
709
710 struct anv_address wa_addr = (struct anv_address) {
711 .bo = device->workaround_bo,
712 };
713
714 wa_addr = anv_address_add_aligned(wa_addr,
715 intel_debug_write_identifiers(
716 device->workaround_bo->map,
717 device->workaround_bo->size,
718 "Anv"), 32);
719
720 device->rt_uuid_addr = wa_addr;
721 memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
722 physical_device->rt_uuid,
723 sizeof(physical_device->rt_uuid));
724
725 /* Make sure the workaround address is the last one in the workaround BO,
726 * so that writes never overwrite other bits of data stored in the
727 * workaround BO.
728 */
729 wa_addr = anv_address_add_aligned(wa_addr,
730 sizeof(physical_device->rt_uuid), 64);
731 device->workaround_address = wa_addr;
732
733 /* Make sure we don't over the allocated BO. */
734 assert(device->workaround_address.offset < device->workaround_bo->size);
735 /* We also need 64B (maximum GRF size) from the workaround address (see
736 * TBIMR workaround)
737 */
738 assert((device->workaround_bo->size -
739 device->workaround_address.offset) >= 64);
740
741 device->workarounds.doom64_images = NULL;
742
743
744 device->debug_frame_desc =
745 intel_debug_get_identifier_block(device->workaround_bo->map,
746 device->workaround_bo->size,
747 INTEL_DEBUG_BLOCK_TYPE_FRAME);
748
749 if (device->vk.enabled_extensions.KHR_ray_query) {
750 uint32_t ray_queries_size =
751 align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
752
753 result = anv_device_alloc_bo(device, "ray queries",
754 ray_queries_size,
755 ANV_BO_ALLOC_INTERNAL,
756 0 /* explicit_address */,
757 &device->ray_query_bo);
758 if (result != VK_SUCCESS)
759 goto fail_dummy_aux_bo;
760 }
761
762 result = anv_device_init_trivial_batch(device);
763 if (result != VK_SUCCESS)
764 goto fail_ray_query_bo;
765
766 /* Emit the CPS states before running the initialization batch as those
767 * structures are referenced.
768 */
769 if (device->info->ver >= 12) {
770 uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
771
772 if (device->info->has_coarse_pixel_primitive_and_cb)
773 n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
774
775 n_cps_states += 1; /* Disable CPS */
776
777 /* Each of the combinaison must be replicated on all viewports */
778 n_cps_states *= MAX_VIEWPORTS;
779
780 device->cps_states =
781 anv_state_pool_alloc(&device->dynamic_state_pool,
782 n_cps_states * CPS_STATE_length(device->info) * 4,
783 32);
784 if (device->cps_states.map == NULL)
785 goto fail_trivial_batch;
786
787 anv_genX(device->info, init_cps_device_state)(device);
788 }
789
790 if (device->physical->indirect_descriptors) {
791 /* Allocate a null surface state at surface state offset 0. This makes
792 * NULL descriptor handling trivial because we can just memset
793 * structures to zero and they have a valid descriptor.
794 */
795 device->null_surface_state =
796 anv_state_pool_alloc(&device->bindless_surface_state_pool,
797 device->isl_dev.ss.size,
798 device->isl_dev.ss.align);
799 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
800 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
801 assert(device->null_surface_state.offset == 0);
802 } else {
803 /* When using direct descriptors, those can hold the null surface state
804 * directly. We still need a null surface for the binding table entries
805 * though but this one can live anywhere the internal surface state
806 * pool.
807 */
808 device->null_surface_state =
809 anv_state_pool_alloc(&device->internal_surface_state_pool,
810 device->isl_dev.ss.size,
811 device->isl_dev.ss.align);
812 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
813 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
814 }
815
816 isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
817 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
818
819 anv_scratch_pool_init(device, &device->scratch_pool, false);
820 anv_scratch_pool_init(device, &device->protected_scratch_pool, true);
821
822 /* TODO(RT): Do we want some sort of data structure for this? */
823 memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
824
825 if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
826 /* The docs say to always allocate 128KB per DSS */
827 const uint32_t btd_fifo_bo_size =
828 128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
829 result = anv_device_alloc_bo(device,
830 "rt-btd-fifo",
831 btd_fifo_bo_size,
832 ANV_BO_ALLOC_INTERNAL,
833 0 /* explicit_address */,
834 &device->btd_fifo_bo);
835 if (result != VK_SUCCESS)
836 goto fail_trivial_batch_bo_and_scratch_pool;
837 }
838
839 struct vk_pipeline_cache_create_info pcc_info = { .weak_ref = true, };
840 device->vk.mem_cache =
841 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
842 if (!device->vk.mem_cache) {
843 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
844 goto fail_btd_fifo_bo;
845 }
846
847 /* Internal shaders need their own pipeline cache because, unlike the rest
848 * of ANV, it won't work at all without the cache. It depends on it for
849 * shaders to remain resident while it runs. Therefore, we need a special
850 * cache just for BLORP/RT that's forced to always be enabled.
851 */
852 struct vk_pipeline_cache_create_info internal_pcc_info = {
853 .force_enable = true,
854 .weak_ref = false,
855 };
856 device->internal_cache =
857 vk_pipeline_cache_create(&device->vk, &internal_pcc_info, NULL);
858 if (device->internal_cache == NULL) {
859 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
860 goto fail_default_pipeline_cache;
861 }
862
863 /* The device (currently is ICL/TGL) does not have float64 support. */
864 if (!device->info->has_64bit_float &&
865 device->physical->instance->fp64_workaround_enabled)
866 anv_load_fp64_shader(device);
867
868 if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
869 result = anv_device_print_init(device);
870 if (result != VK_SUCCESS)
871 goto fail_internal_cache;
872 }
873
874 #if DETECT_OS_ANDROID
875 device->u_gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
876 #endif
877
878 device->robust_buffer_access =
879 device->vk.enabled_features.robustBufferAccess ||
880 device->vk.enabled_features.nullDescriptor;
881
882 device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
883 4);
884 p_atomic_set(&device->draw_call_count, 0);
885
886 /* Create a separate command pool for companion RCS command buffer. */
887 if (device->info->verx10 >= 125) {
888 VkCommandPoolCreateInfo pool_info = {
889 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
890 .queueFamilyIndex =
891 anv_get_first_render_queue_index(device->physical),
892 };
893
894 result = vk_common_CreateCommandPool(anv_device_to_handle(device),
895 &pool_info, NULL,
896 &device->companion_rcs_cmd_pool);
897 if (result != VK_SUCCESS) {
898 goto fail_print;
899 }
900 }
901
902 result = anv_device_init_trtt(device);
903 if (result != VK_SUCCESS)
904 goto fail_companion_cmd_pool;
905
906 result = anv_device_init_rt_shaders(device);
907 if (result != VK_SUCCESS) {
908 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
909 goto fail_trtt;
910 }
911
912 anv_device_init_blorp(device);
913
914 anv_device_init_border_colors(device);
915
916 anv_device_init_internal_kernels(device);
917
918 anv_device_init_astc_emu(device);
919
920 anv_device_perf_init(device);
921
922 anv_device_init_embedded_samplers(device);
923
924 BITSET_ONES(device->gfx_dirty_state);
925 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
926 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
927 if (device->info->ver < 11)
928 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
929 if (device->info->ver < 12) {
930 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
931 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
932 }
933 if (!device->vk.enabled_extensions.EXT_sample_locations)
934 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
935 if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
936 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
937 if (!device->vk.enabled_extensions.EXT_mesh_shader) {
938 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
939 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
940 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
941 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
942 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
943 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
944 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
945 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
946 }
947 if (!intel_needs_workaround(device->info, 18019816803))
948 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
949 if (!intel_needs_workaround(device->info, 14018283232))
950 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14018283232);
951 if (device->info->ver > 9)
952 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
953
954 device->queue_count = 0;
955 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
956 const VkDeviceQueueCreateInfo *queueCreateInfo =
957 &pCreateInfo->pQueueCreateInfos[i];
958
959 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
960 result = anv_queue_init(device, &device->queues[device->queue_count],
961 queueCreateInfo, j);
962 if (result != VK_SUCCESS)
963 goto fail_queues;
964
965 device->queue_count++;
966 }
967 }
968
969 anv_device_utrace_init(device);
970
971 result = anv_genX(device->info, init_device_state)(device);
972 if (result != VK_SUCCESS)
973 goto fail_utrace;
974
975 *pDevice = anv_device_to_handle(device);
976
977 return VK_SUCCESS;
978
979 fail_utrace:
980 anv_device_utrace_finish(device);
981 fail_queues:
982 for (uint32_t i = 0; i < device->queue_count; i++)
983 anv_queue_finish(&device->queues[i]);
984 anv_device_finish_embedded_samplers(device);
985 anv_device_finish_blorp(device);
986 anv_device_finish_astc_emu(device);
987 anv_device_finish_internal_kernels(device);
988 anv_device_finish_rt_shaders(device);
989 fail_trtt:
990 anv_device_finish_trtt(device);
991 fail_companion_cmd_pool:
992 if (device->info->verx10 >= 125) {
993 vk_common_DestroyCommandPool(anv_device_to_handle(device),
994 device->companion_rcs_cmd_pool, NULL);
995 }
996 fail_print:
997 if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
998 anv_device_print_fini(device);
999 fail_internal_cache:
1000 vk_pipeline_cache_destroy(device->internal_cache, NULL);
1001 fail_default_pipeline_cache:
1002 vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1003 fail_btd_fifo_bo:
1004 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1005 anv_device_release_bo(device, device->btd_fifo_bo);
1006 fail_trivial_batch_bo_and_scratch_pool:
1007 anv_scratch_pool_finish(device, &device->scratch_pool);
1008 anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1009 fail_trivial_batch:
1010 anv_device_release_bo(device, device->trivial_batch_bo);
1011 fail_ray_query_bo:
1012 if (device->ray_query_bo)
1013 anv_device_release_bo(device, device->ray_query_bo);
1014 fail_dummy_aux_bo:
1015 if (device->dummy_aux_bo)
1016 anv_device_release_bo(device, device->dummy_aux_bo);
1017 fail_workaround_bo:
1018 anv_device_release_bo(device, device->workaround_bo);
1019 fail_surface_aux_map_pool:
1020 if (device->info->has_aux_map) {
1021 intel_aux_map_finish(device->aux_map_ctx);
1022 device->aux_map_ctx = NULL;
1023 }
1024 fail_aux_tt_pool:
1025 if (device->info->has_aux_map)
1026 anv_state_pool_finish(&device->aux_tt_pool);
1027 fail_push_descriptor_buffer_pool:
1028 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1029 device->info->verx10 >= 125)
1030 anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1031 fail_indirect_push_descriptor_pool:
1032 if (device->physical->indirect_descriptors)
1033 anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1034 fail_binding_table_pool:
1035 anv_state_pool_finish(&device->binding_table_pool);
1036 fail_bindless_surface_state_pool:
1037 if (device->physical->indirect_descriptors)
1038 anv_state_pool_finish(&device->bindless_surface_state_pool);
1039 fail_internal_surface_state_pool:
1040 anv_state_pool_finish(&device->internal_surface_state_pool);
1041 fail_scratch_surface_state_pool:
1042 if (device->info->verx10 >= 125)
1043 anv_state_pool_finish(&device->scratch_surface_state_pool);
1044 fail_instruction_state_pool:
1045 anv_state_pool_finish(&device->instruction_state_pool);
1046 fail_custom_border_color_pool:
1047 anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1048 fail_dynamic_state_pool:
1049 anv_state_pool_finish(&device->dynamic_state_pool);
1050 fail_general_state_pool:
1051 anv_state_pool_finish(&device->general_state_pool);
1052 fail_batch_bo_pool:
1053 if (device->vk.enabled_extensions.KHR_acceleration_structure)
1054 anv_bo_pool_finish(&device->bvh_bo_pool);
1055 anv_bo_pool_finish(&device->batch_bo_pool);
1056 anv_bo_cache_finish(&device->bo_cache);
1057 fail_queue_cond:
1058 pthread_cond_destroy(&device->queue_submit);
1059 fail_mutex:
1060 pthread_mutex_destroy(&device->mutex);
1061 fail_vmas:
1062 util_vma_heap_finish(&device->vma_trtt);
1063 util_vma_heap_finish(&device->vma_dynamic_visible);
1064 util_vma_heap_finish(&device->vma_desc);
1065 util_vma_heap_finish(&device->vma_hi);
1066 util_vma_heap_finish(&device->vma_lo);
1067 pthread_mutex_destroy(&device->vma_mutex);
1068 fail_queues_alloc:
1069 vk_free(&device->vk.alloc, device->queues);
1070 fail_context_id:
1071 anv_device_destroy_context_or_vm(device);
1072 fail_fd:
1073 close(device->fd);
1074 fail_device:
1075 vk_device_finish(&device->vk);
1076 fail_alloc:
1077 vk_free(&device->vk.alloc, device);
1078
1079 return result;
1080 }
1081
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1082 void anv_DestroyDevice(
1083 VkDevice _device,
1084 const VkAllocationCallbacks* pAllocator)
1085 {
1086 ANV_FROM_HANDLE(anv_device, device, _device);
1087
1088 if (!device)
1089 return;
1090
1091 #if DETECT_OS_ANDROID
1092 u_gralloc_destroy(&device->u_gralloc);
1093 #endif
1094
1095 anv_memory_trace_finish(device);
1096
1097 struct anv_physical_device *pdevice = device->physical;
1098
1099 /* Do TRTT batch garbage collection before destroying queues. */
1100 anv_device_finish_trtt(device);
1101
1102 anv_device_utrace_finish(device);
1103
1104 for (uint32_t i = 0; i < device->queue_count; i++)
1105 anv_queue_finish(&device->queues[i]);
1106 vk_free(&device->vk.alloc, device->queues);
1107
1108 anv_device_finish_blorp(device);
1109
1110 anv_device_finish_rt_shaders(device);
1111
1112 anv_device_finish_astc_emu(device);
1113
1114 anv_device_finish_internal_kernels(device);
1115
1116 if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
1117 anv_device_print_fini(device);
1118
1119 vk_pipeline_cache_destroy(device->internal_cache, NULL);
1120 vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1121
1122 anv_device_finish_embedded_samplers(device);
1123
1124 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1125 anv_device_release_bo(device, device->btd_fifo_bo);
1126
1127 if (device->info->verx10 >= 125) {
1128 vk_common_DestroyCommandPool(anv_device_to_handle(device),
1129 device->companion_rcs_cmd_pool, NULL);
1130 }
1131
1132 anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1133 #ifdef HAVE_VALGRIND
1134 /* We only need to free these to prevent valgrind errors. The backing
1135 * BO will go away in a couple of lines so we don't actually leak.
1136 */
1137 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1138 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
1139 anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
1140 anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
1141 #endif
1142
1143 for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
1144 if (device->rt_scratch_bos[i] != NULL)
1145 anv_device_release_bo(device, device->rt_scratch_bos[i]);
1146 }
1147
1148 anv_scratch_pool_finish(device, &device->scratch_pool);
1149 anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1150
1151 if (device->vk.enabled_extensions.KHR_ray_query) {
1152 for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
1153 if (device->ray_query_shadow_bos[i] != NULL)
1154 anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
1155 }
1156 anv_device_release_bo(device, device->ray_query_bo);
1157 }
1158 anv_device_release_bo(device, device->workaround_bo);
1159 if (device->dummy_aux_bo)
1160 anv_device_release_bo(device, device->dummy_aux_bo);
1161 anv_device_release_bo(device, device->trivial_batch_bo);
1162
1163 if (device->info->has_aux_map) {
1164 intel_aux_map_finish(device->aux_map_ctx);
1165 device->aux_map_ctx = NULL;
1166 anv_state_pool_finish(&device->aux_tt_pool);
1167 }
1168 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1169 device->info->verx10 >= 125)
1170 anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1171 if (device->physical->indirect_descriptors)
1172 anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1173 anv_state_pool_finish(&device->binding_table_pool);
1174 if (device->info->verx10 >= 125)
1175 anv_state_pool_finish(&device->scratch_surface_state_pool);
1176 anv_state_pool_finish(&device->internal_surface_state_pool);
1177 if (device->physical->indirect_descriptors)
1178 anv_state_pool_finish(&device->bindless_surface_state_pool);
1179 anv_state_pool_finish(&device->instruction_state_pool);
1180 anv_state_pool_finish(&device->dynamic_state_pool);
1181 anv_state_pool_finish(&device->general_state_pool);
1182
1183 if (device->vk.enabled_extensions.KHR_acceleration_structure)
1184 anv_bo_pool_finish(&device->bvh_bo_pool);
1185 anv_bo_pool_finish(&device->batch_bo_pool);
1186
1187 anv_bo_cache_finish(&device->bo_cache);
1188
1189 util_vma_heap_finish(&device->vma_trtt);
1190 util_vma_heap_finish(&device->vma_dynamic_visible);
1191 util_vma_heap_finish(&device->vma_desc);
1192 util_vma_heap_finish(&device->vma_hi);
1193 util_vma_heap_finish(&device->vma_lo);
1194 pthread_mutex_destroy(&device->vma_mutex);
1195
1196 pthread_cond_destroy(&device->queue_submit);
1197 pthread_mutex_destroy(&device->mutex);
1198
1199 ralloc_free(device->fp64_nir);
1200
1201 anv_device_destroy_context_or_vm(device);
1202
1203 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
1204 for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
1205 if (INTEL_DEBUG(DEBUG_BATCH_STATS))
1206 intel_batch_print_stats(&device->decoder[i]);
1207 intel_batch_decode_ctx_finish(&device->decoder[i]);
1208 }
1209 }
1210
1211 close(device->fd);
1212
1213 vk_device_finish(&device->vk);
1214 vk_free(&device->vk.alloc, device);
1215 }
1216
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1217 VkResult anv_EnumerateInstanceLayerProperties(
1218 uint32_t* pPropertyCount,
1219 VkLayerProperties* pProperties)
1220 {
1221 if (pProperties == NULL) {
1222 *pPropertyCount = 0;
1223 return VK_SUCCESS;
1224 }
1225
1226 /* None supported at this time */
1227 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1228 }
1229
1230 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)1231 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1232 int64_t timeout)
1233 {
1234 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1235 if (ret == -1 && errno == ETIME) {
1236 return VK_TIMEOUT;
1237 } else if (ret == -1) {
1238 /* We don't know the real error. */
1239 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
1240 } else {
1241 return VK_SUCCESS;
1242 }
1243 }
1244
1245 static struct util_vma_heap *
anv_vma_heap_for_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)1246 anv_vma_heap_for_flags(struct anv_device *device,
1247 enum anv_bo_alloc_flags alloc_flags)
1248 {
1249 if (alloc_flags & ANV_BO_ALLOC_TRTT)
1250 return &device->vma_trtt;
1251
1252 if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
1253 return &device->vma_lo;
1254
1255 if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
1256 return &device->vma_desc;
1257
1258 if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL)
1259 return &device->vma_dynamic_visible;
1260
1261 return &device->vma_hi;
1262 }
1263
1264 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct util_vma_heap ** out_vma_heap)1265 anv_vma_alloc(struct anv_device *device,
1266 uint64_t size, uint64_t align,
1267 enum anv_bo_alloc_flags alloc_flags,
1268 uint64_t client_address,
1269 struct util_vma_heap **out_vma_heap)
1270 {
1271 pthread_mutex_lock(&device->vma_mutex);
1272
1273 uint64_t addr = 0;
1274 *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
1275
1276 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
1277 assert(*out_vma_heap == &device->vma_hi ||
1278 *out_vma_heap == &device->vma_dynamic_visible ||
1279 *out_vma_heap == &device->vma_trtt);
1280
1281 if (client_address) {
1282 if (util_vma_heap_alloc_addr(*out_vma_heap,
1283 client_address, size)) {
1284 addr = client_address;
1285 }
1286 } else {
1287 (*out_vma_heap)->alloc_high = false;
1288 addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1289 (*out_vma_heap)->alloc_high = true;
1290 }
1291 /* We don't want to fall back to other heaps */
1292 goto done;
1293 }
1294
1295 assert(client_address == 0);
1296
1297 addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1298
1299 done:
1300 pthread_mutex_unlock(&device->vma_mutex);
1301
1302 assert(addr == intel_48b_address(addr));
1303 return intel_canonical_address(addr);
1304 }
1305
1306 void
anv_vma_free(struct anv_device * device,struct util_vma_heap * vma_heap,uint64_t address,uint64_t size)1307 anv_vma_free(struct anv_device *device,
1308 struct util_vma_heap *vma_heap,
1309 uint64_t address, uint64_t size)
1310 {
1311 assert(vma_heap == &device->vma_lo ||
1312 vma_heap == &device->vma_hi ||
1313 vma_heap == &device->vma_desc ||
1314 vma_heap == &device->vma_dynamic_visible ||
1315 vma_heap == &device->vma_trtt);
1316
1317 const uint64_t addr_48b = intel_48b_address(address);
1318
1319 pthread_mutex_lock(&device->vma_mutex);
1320
1321 util_vma_heap_free(vma_heap, addr_48b, size);
1322
1323 pthread_mutex_unlock(&device->vma_mutex);
1324 }
1325
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1326 VkResult anv_AllocateMemory(
1327 VkDevice _device,
1328 const VkMemoryAllocateInfo* pAllocateInfo,
1329 const VkAllocationCallbacks* pAllocator,
1330 VkDeviceMemory* pMem)
1331 {
1332 ANV_FROM_HANDLE(anv_device, device, _device);
1333 struct anv_physical_device *pdevice = device->physical;
1334 struct anv_device_memory *mem;
1335 VkResult result = VK_SUCCESS;
1336
1337 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1338
1339 VkDeviceSize aligned_alloc_size =
1340 align64(pAllocateInfo->allocationSize, 4096);
1341
1342 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
1343 const struct anv_memory_type *mem_type =
1344 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
1345 assert(mem_type->heapIndex < pdevice->memory.heap_count);
1346 struct anv_memory_heap *mem_heap =
1347 &pdevice->memory.heaps[mem_type->heapIndex];
1348
1349 if (aligned_alloc_size > mem_heap->size)
1350 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1351
1352 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
1353 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
1354 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1355
1356 mem = vk_device_memory_create(&device->vk, pAllocateInfo,
1357 pAllocator, sizeof(*mem));
1358 if (mem == NULL)
1359 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1360
1361 mem->type = mem_type;
1362 mem->map = NULL;
1363 mem->map_size = 0;
1364 mem->map_delta = 0;
1365
1366 enum anv_bo_alloc_flags alloc_flags = 0;
1367
1368 const VkImportMemoryFdInfoKHR *fd_info = NULL;
1369 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
1370 const struct wsi_memory_allocate_info *wsi_info = NULL;
1371 uint64_t client_address = 0;
1372
1373 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
1374 /* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
1375 * value, so use cast to avoid compiler warn
1376 */
1377 switch ((uint32_t)ext->sType) {
1378 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
1379 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
1380 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
1381 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
1382 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
1383 /* handled by vk_device_memory_create */
1384 break;
1385
1386 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1387 fd_info = (void *)ext;
1388 break;
1389
1390 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
1391 dedicated_info = (void *)ext;
1392 break;
1393
1394 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
1395 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
1396 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
1397 client_address = addr_info->opaqueCaptureAddress;
1398 break;
1399 }
1400
1401 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1402 wsi_info = (void *)ext;
1403 break;
1404
1405 default:
1406 vk_debug_ignored_stype(ext->sType);
1407 break;
1408 }
1409 }
1410
1411 /* If i915 reported a mappable/non_mappable vram regions and the
1412 * application want lmem mappable, then we need to use the
1413 * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
1414 */
1415 if (pdevice->vram_mappable.size > 0 &&
1416 pdevice->vram_non_mappable.size > 0 &&
1417 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
1418 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
1419 alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
1420
1421 if (!mem_heap->is_local_mem)
1422 alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
1423
1424 if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
1425 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
1426
1427 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
1428 alloc_flags |= ANV_BO_ALLOC_PROTECTED;
1429
1430 /* For now, always allocated AUX-TT aligned memory, regardless of dedicated
1431 * allocations. An application can for example, suballocate a large
1432 * VkDeviceMemory and try to bind an image created with a CCS modifier. In
1433 * that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
1434 * requirements, so we need to ensure both the VkDeviceMemory and the
1435 * alignment reported through vkGetImageMemoryRequirements() meet the
1436 * AUX-TT requirement.
1437 *
1438 * Allocations with the special dynamic_visible mem type are for things like
1439 * descriptor buffers, so AUX-TT alignment is not needed here.
1440 */
1441 if (device->info->has_aux_map && !mem_type->dynamic_visible)
1442 alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;
1443
1444 /* If the allocation is not dedicated nor a host pointer, allocate
1445 * additional CCS space.
1446 *
1447 * Allocations with the special dynamic_visible mem type are for things like
1448 * descriptor buffers, which don't need any compression.
1449 */
1450 if (device->physical->alloc_aux_tt_mem &&
1451 dedicated_info == NULL &&
1452 mem->vk.host_ptr == NULL &&
1453 !mem_type->dynamic_visible)
1454 alloc_flags |= ANV_BO_ALLOC_AUX_CCS;
1455
1456 /* TODO: Android, ChromeOS and other applications may need another way to
1457 * allocate buffers that can be scanout to display but it should pretty
1458 * easy to catch those as Xe KMD driver will print warnings in dmesg when
1459 * scanning buffers allocated without proper flag set.
1460 */
1461 if (wsi_info)
1462 alloc_flags |= ANV_BO_ALLOC_SCANOUT;
1463
1464 /* Anything imported or exported is EXTERNAL */
1465 if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
1466 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
1467
1468 /* wsi has its own way of synchronizing with the compositor */
1469 if (pdevice->instance->external_memory_implicit_sync &&
1470 !wsi_info && dedicated_info &&
1471 dedicated_info->image != VK_NULL_HANDLE) {
1472 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1473
1474 /* Apply implicit sync to be compatible with clients relying on
1475 * implicit fencing. This matches the behavior in iris i915_batch
1476 * submit. An example client is VA-API (iHD), so only dedicated
1477 * image scenario has to be covered.
1478 */
1479 alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;
1480
1481 /* For color attachment, apply IMPLICIT_WRITE so a client on the
1482 * consumer side relying on implicit fencing can have a fence to
1483 * wait for render complete.
1484 */
1485 if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
1486 alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
1487 }
1488 }
1489
1490 /* TODO: Disabling compression on external bos will cause problems once we
1491 * have a modifier that supports compression (Xe2+).
1492 */
1493 if (!(alloc_flags & ANV_BO_ALLOC_EXTERNAL) && mem_type->compressed)
1494 alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
1495
1496 if (mem_type->dynamic_visible)
1497 alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
1498
1499 if (mem->vk.ahardware_buffer) {
1500 result = anv_import_ahw_memory(_device, mem);
1501 if (result != VK_SUCCESS)
1502 goto fail;
1503
1504 goto success;
1505 }
1506
1507 /* The Vulkan spec permits handleType to be 0, in which case the struct is
1508 * ignored.
1509 */
1510 if (fd_info && fd_info->handleType) {
1511 /* At the moment, we support only the below handle types. */
1512 assert(fd_info->handleType ==
1513 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1514 fd_info->handleType ==
1515 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1516
1517 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
1518 client_address, &mem->bo);
1519 if (result != VK_SUCCESS)
1520 goto fail;
1521
1522 /* For security purposes, we reject importing the bo if it's smaller
1523 * than the requested allocation size. This prevents a malicious client
1524 * from passing a buffer to a trusted client, lying about the size, and
1525 * telling the trusted client to try and texture from an image that goes
1526 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1527 * in the trusted client. The trusted client can protect itself against
1528 * this sort of attack but only if it can trust the buffer size.
1529 */
1530 if (mem->bo->size < aligned_alloc_size) {
1531 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
1532 "aligned allocationSize too large for "
1533 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
1534 "%"PRIu64"B > %"PRIu64"B",
1535 aligned_alloc_size, mem->bo->size);
1536 anv_device_release_bo(device, mem->bo);
1537 goto fail;
1538 }
1539
1540 /* From the Vulkan spec:
1541 *
1542 * "Importing memory from a file descriptor transfers ownership of
1543 * the file descriptor from the application to the Vulkan
1544 * implementation. The application must not perform any operations on
1545 * the file descriptor after a successful import."
1546 *
1547 * If the import fails, we leave the file descriptor open.
1548 */
1549 close(fd_info->fd);
1550 goto success;
1551 }
1552
1553 if (mem->vk.host_ptr) {
1554 if (mem->vk.import_handle_type ==
1555 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
1556 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1557 goto fail;
1558 }
1559
1560 assert(mem->vk.import_handle_type ==
1561 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
1562
1563 result = anv_device_import_bo_from_host_ptr(device,
1564 mem->vk.host_ptr,
1565 mem->vk.size,
1566 alloc_flags,
1567 client_address,
1568 &mem->bo);
1569 if (result != VK_SUCCESS)
1570 goto fail;
1571
1572 goto success;
1573 }
1574
1575 if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
1576 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1577 } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
1578 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1579 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1580 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
1581 alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
1582 } else {
1583 /* Required to set some host mode to have a valid pat index set */
1584 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1585 }
1586
1587 /* Regular allocate (not importing memory). */
1588
1589 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
1590 alloc_flags, client_address, &mem->bo);
1591 if (result != VK_SUCCESS)
1592 goto fail;
1593
1594 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
1595 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1596
1597 /* Some legacy (non-modifiers) consumers need the tiling to be set on
1598 * the BO. In this case, we have a dedicated allocation.
1599 */
1600 if (image->vk.wsi_legacy_scanout) {
1601 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
1602 result = anv_device_set_bo_tiling(device, mem->bo,
1603 surf->row_pitch_B,
1604 surf->tiling);
1605 if (result != VK_SUCCESS) {
1606 anv_device_release_bo(device, mem->bo);
1607 goto fail;
1608 }
1609 }
1610 }
1611
1612 success:
1613 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
1614 if (mem_heap_used > mem_heap->size) {
1615 p_atomic_add(&mem_heap->used, -mem->bo->size);
1616 anv_device_release_bo(device, mem->bo);
1617 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
1618 "Out of heap memory");
1619 goto fail;
1620 }
1621
1622 pthread_mutex_lock(&device->mutex);
1623 list_addtail(&mem->link, &device->memory_objects);
1624 pthread_mutex_unlock(&device->mutex);
1625
1626 ANV_RMV(heap_create, device, mem, false, 0);
1627
1628 *pMem = anv_device_memory_to_handle(mem);
1629
1630 return VK_SUCCESS;
1631
1632 fail:
1633 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1634
1635 return result;
1636 }
1637
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)1638 VkResult anv_GetMemoryFdKHR(
1639 VkDevice device_h,
1640 const VkMemoryGetFdInfoKHR* pGetFdInfo,
1641 int* pFd)
1642 {
1643 ANV_FROM_HANDLE(anv_device, dev, device_h);
1644 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
1645
1646 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1647
1648 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1649 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1650
1651 return anv_device_export_bo(dev, mem->bo, pFd);
1652 }
1653
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1654 VkResult anv_GetMemoryFdPropertiesKHR(
1655 VkDevice _device,
1656 VkExternalMemoryHandleTypeFlagBits handleType,
1657 int fd,
1658 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
1659 {
1660 ANV_FROM_HANDLE(anv_device, device, _device);
1661
1662 switch (handleType) {
1663 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1664 /* dma-buf can be imported as any memory type */
1665 pMemoryFdProperties->memoryTypeBits =
1666 (1 << device->physical->memory.type_count) - 1;
1667 return VK_SUCCESS;
1668
1669 default:
1670 /* The valid usage section for this function says:
1671 *
1672 * "handleType must not be one of the handle types defined as
1673 * opaque."
1674 *
1675 * So opaque handle types fall into the default "unsupported" case.
1676 */
1677 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1678 }
1679 }
1680
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)1681 VkResult anv_GetMemoryHostPointerPropertiesEXT(
1682 VkDevice _device,
1683 VkExternalMemoryHandleTypeFlagBits handleType,
1684 const void* pHostPointer,
1685 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
1686 {
1687 ANV_FROM_HANDLE(anv_device, device, _device);
1688
1689 assert(pMemoryHostPointerProperties->sType ==
1690 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
1691
1692 switch (handleType) {
1693 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1694 /* Host memory can be imported as any memory type. */
1695 pMemoryHostPointerProperties->memoryTypeBits =
1696 (1ull << device->physical->memory.type_count) - 1;
1697
1698 return VK_SUCCESS;
1699
1700 default:
1701 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1702 }
1703 }
1704
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1705 void anv_FreeMemory(
1706 VkDevice _device,
1707 VkDeviceMemory _mem,
1708 const VkAllocationCallbacks* pAllocator)
1709 {
1710 ANV_FROM_HANDLE(anv_device, device, _device);
1711 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1712
1713 if (mem == NULL)
1714 return;
1715
1716 pthread_mutex_lock(&device->mutex);
1717 list_del(&mem->link);
1718 pthread_mutex_unlock(&device->mutex);
1719
1720 if (mem->map) {
1721 const VkMemoryUnmapInfoKHR unmap = {
1722 .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
1723 .memory = _mem,
1724 };
1725 anv_UnmapMemory2KHR(_device, &unmap);
1726 }
1727
1728 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
1729 -mem->bo->size);
1730
1731 anv_device_release_bo(device, mem->bo);
1732
1733 ANV_RMV(resource_destroy, device, mem);
1734
1735 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1736 }
1737
anv_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)1738 VkResult anv_MapMemory2KHR(
1739 VkDevice _device,
1740 const VkMemoryMapInfoKHR* pMemoryMapInfo,
1741 void** ppData)
1742 {
1743 ANV_FROM_HANDLE(anv_device, device, _device);
1744 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);
1745
1746 if (mem == NULL) {
1747 *ppData = NULL;
1748 return VK_SUCCESS;
1749 }
1750
1751 if (mem->vk.host_ptr) {
1752 *ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
1753 return VK_SUCCESS;
1754 }
1755
1756 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1757 *
1758 * * memory must have been created with a memory type that reports
1759 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
1760 */
1761 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
1762 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1763 "Memory object not mappable.");
1764 }
1765
1766 assert(pMemoryMapInfo->size > 0);
1767 const VkDeviceSize offset = pMemoryMapInfo->offset;
1768 const VkDeviceSize size =
1769 vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
1770 pMemoryMapInfo->size);
1771
1772 if (size != (size_t)size) {
1773 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1774 "requested size 0x%"PRIx64" does not fit in %u bits",
1775 size, (unsigned)(sizeof(size_t) * 8));
1776 }
1777
1778 /* From the Vulkan 1.2.194 spec:
1779 *
1780 * "memory must not be currently host mapped"
1781 */
1782 if (mem->map != NULL) {
1783 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1784 "Memory object already mapped.");
1785 }
1786
1787 void *placed_addr = NULL;
1788 if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
1789 const VkMemoryMapPlacedInfoEXT *placed_info =
1790 vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
1791 assert(placed_info != NULL);
1792 placed_addr = placed_info->pPlacedAddress;
1793 }
1794
1795 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1796 uint64_t map_offset;
1797 if (!device->physical->info.has_mmap_offset)
1798 map_offset = offset & ~4095ull;
1799 else
1800 map_offset = 0;
1801 assert(offset >= map_offset);
1802 uint64_t map_size = (offset + size) - map_offset;
1803
1804 /* Let's map whole pages */
1805 map_size = align64(map_size, 4096);
1806
1807 void *map;
1808 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
1809 map_size, placed_addr, &map);
1810 if (result != VK_SUCCESS)
1811 return result;
1812
1813 mem->map = map;
1814 mem->map_size = map_size;
1815 mem->map_delta = (offset - map_offset);
1816 *ppData = mem->map + mem->map_delta;
1817
1818 return VK_SUCCESS;
1819 }
1820
anv_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)1821 VkResult anv_UnmapMemory2KHR(
1822 VkDevice _device,
1823 const VkMemoryUnmapInfoKHR* pMemoryUnmapInfo)
1824 {
1825 ANV_FROM_HANDLE(anv_device, device, _device);
1826 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);
1827
1828 if (mem == NULL || mem->vk.host_ptr)
1829 return VK_SUCCESS;
1830
1831 VkResult result =
1832 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size,
1833 pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
1834 if (result != VK_SUCCESS)
1835 return result;
1836
1837 mem->map = NULL;
1838 mem->map_size = 0;
1839 mem->map_delta = 0;
1840
1841 return VK_SUCCESS;
1842 }
1843
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1844 VkResult anv_FlushMappedMemoryRanges(
1845 VkDevice _device,
1846 uint32_t memoryRangeCount,
1847 const VkMappedMemoryRange* pMemoryRanges)
1848 {
1849 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1850 ANV_FROM_HANDLE(anv_device, device, _device);
1851
1852 if (!device->physical->memory.need_flush)
1853 return VK_SUCCESS;
1854
1855 /* Make sure the writes we're flushing have landed. */
1856 __builtin_ia32_mfence();
1857
1858 for (uint32_t i = 0; i < memoryRangeCount; i++) {
1859 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1860 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1861 continue;
1862
1863 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1864 if (map_offset >= mem->map_size)
1865 continue;
1866
1867 intel_flush_range(mem->map + map_offset,
1868 MIN2(pMemoryRanges[i].size,
1869 mem->map_size - map_offset));
1870 }
1871 #endif
1872 return VK_SUCCESS;
1873 }
1874
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1875 VkResult anv_InvalidateMappedMemoryRanges(
1876 VkDevice _device,
1877 uint32_t memoryRangeCount,
1878 const VkMappedMemoryRange* pMemoryRanges)
1879 {
1880 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1881 ANV_FROM_HANDLE(anv_device, device, _device);
1882
1883 if (!device->physical->memory.need_flush)
1884 return VK_SUCCESS;
1885
1886 for (uint32_t i = 0; i < memoryRangeCount; i++) {
1887 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1888 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1889 continue;
1890
1891 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1892 if (map_offset >= mem->map_size)
1893 continue;
1894
1895 intel_invalidate_range(mem->map + map_offset,
1896 MIN2(pMemoryRanges[i].size,
1897 mem->map_size - map_offset));
1898 }
1899
1900 /* Make sure no reads get moved up above the invalidate. */
1901 __builtin_ia32_mfence();
1902 #endif
1903 return VK_SUCCESS;
1904 }
1905
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1906 void anv_GetDeviceMemoryCommitment(
1907 VkDevice device,
1908 VkDeviceMemory memory,
1909 VkDeviceSize* pCommittedMemoryInBytes)
1910 {
1911 *pCommittedMemoryInBytes = 0;
1912 }
1913
1914 static inline clockid_t
anv_get_default_cpu_clock_id(void)1915 anv_get_default_cpu_clock_id(void)
1916 {
1917 #ifdef CLOCK_MONOTONIC_RAW
1918 return CLOCK_MONOTONIC_RAW;
1919 #else
1920 return CLOCK_MONOTONIC;
1921 #endif
1922 }
1923
1924 static inline clockid_t
vk_time_domain_to_clockid(VkTimeDomainKHR domain)1925 vk_time_domain_to_clockid(VkTimeDomainKHR domain)
1926 {
1927 switch (domain) {
1928 #ifdef CLOCK_MONOTONIC_RAW
1929 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
1930 return CLOCK_MONOTONIC_RAW;
1931 #endif
1932 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
1933 return CLOCK_MONOTONIC;
1934 default:
1935 unreachable("Missing");
1936 return CLOCK_MONOTONIC;
1937 }
1938 }
1939
1940 static inline bool
is_cpu_time_domain(VkTimeDomainKHR domain)1941 is_cpu_time_domain(VkTimeDomainKHR domain)
1942 {
1943 return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
1944 domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
1945 }
1946
1947 static inline bool
is_gpu_time_domain(VkTimeDomainKHR domain)1948 is_gpu_time_domain(VkTimeDomainKHR domain)
1949 {
1950 return domain == VK_TIME_DOMAIN_DEVICE_KHR;
1951 }
1952
anv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)1953 VkResult anv_GetCalibratedTimestampsKHR(
1954 VkDevice _device,
1955 uint32_t timestampCount,
1956 const VkCalibratedTimestampInfoKHR *pTimestampInfos,
1957 uint64_t *pTimestamps,
1958 uint64_t *pMaxDeviation)
1959 {
1960 ANV_FROM_HANDLE(anv_device, device, _device);
1961 const uint64_t timestamp_frequency = device->info->timestamp_frequency;
1962 const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
1963 uint32_t d, increment;
1964 uint64_t begin, end;
1965 uint64_t max_clock_period = 0;
1966 const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
1967 const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
1968 clockid_t cpu_clock_id = -1;
1969
1970 begin = end = vk_clock_gettime(anv_get_default_cpu_clock_id());
1971
1972 for (d = 0, increment = 1; d < timestampCount; d += increment) {
1973 const VkTimeDomainKHR current = pTimestampInfos[d].timeDomain;
1974 /* If we have a request pattern like this :
1975 * - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
1976 * - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
1977 * - domain2 = domain0 (optional)
1978 *
1979 * We can combine all of those into a single ioctl for maximum accuracy.
1980 */
1981 if (has_correlate_timestamp && (d + 1) < timestampCount) {
1982 const VkTimeDomainKHR next = pTimestampInfos[d + 1].timeDomain;
1983
1984 if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
1985 (is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
1986 /* We'll consume at least 2 elements. */
1987 increment = 2;
1988
1989 if (is_cpu_time_domain(current))
1990 cpu_clock_id = vk_time_domain_to_clockid(current);
1991 else
1992 cpu_clock_id = vk_time_domain_to_clockid(next);
1993
1994 uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
1995 if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
1996 kmd_type,
1997 INTEL_ENGINE_CLASS_RENDER,
1998 0 /* engine_instance */,
1999 cpu_clock_id,
2000 &cpu_timestamp,
2001 &gpu_timestamp,
2002 &cpu_delta_timestamp))
2003 return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");
2004
2005 cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
2006 if (is_cpu_time_domain(current)) {
2007 pTimestamps[d] = cpu_timestamp;
2008 pTimestamps[d + 1] = gpu_timestamp;
2009 } else {
2010 pTimestamps[d] = gpu_timestamp;
2011 pTimestamps[d + 1] = cpu_end_timestamp;
2012 }
2013 max_clock_period = MAX2(max_clock_period, device_period);
2014
2015 /* If we can consume a third element */
2016 if ((d + 2) < timestampCount &&
2017 is_cpu_time_domain(current) &&
2018 current == pTimestampInfos[d + 2].timeDomain) {
2019 pTimestamps[d + 2] = cpu_end_timestamp;
2020 increment++;
2021 }
2022
2023 /* If we're the first element, we can replace begin */
2024 if (d == 0 && cpu_clock_id == anv_get_default_cpu_clock_id())
2025 begin = cpu_timestamp;
2026
2027 /* If we're in the same clock domain as begin/end. We can set the end. */
2028 if (cpu_clock_id == anv_get_default_cpu_clock_id())
2029 end = cpu_end_timestamp;
2030
2031 continue;
2032 }
2033 }
2034
2035 /* fallback to regular method */
2036 increment = 1;
2037 switch (current) {
2038 case VK_TIME_DOMAIN_DEVICE_KHR:
2039 if (!intel_gem_read_render_timestamp(device->fd,
2040 device->info->kmd_type,
2041 &pTimestamps[d])) {
2042 return vk_device_set_lost(&device->vk, "Failed to read the "
2043 "TIMESTAMP register: %m");
2044 }
2045 max_clock_period = MAX2(max_clock_period, device_period);
2046 break;
2047 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
2048 pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
2049 max_clock_period = MAX2(max_clock_period, 1);
2050 break;
2051
2052 #ifdef CLOCK_MONOTONIC_RAW
2053 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
2054 pTimestamps[d] = begin;
2055 break;
2056 #endif
2057 default:
2058 pTimestamps[d] = 0;
2059 break;
2060 }
2061 }
2062
2063 /* If last timestamp was not get with has_correlate_timestamp method or
2064 * if it was but last cpu clock is not the default one, get time again
2065 */
2066 if (increment == 1 || cpu_clock_id != anv_get_default_cpu_clock_id())
2067 end = vk_clock_gettime(anv_get_default_cpu_clock_id());
2068
2069 *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
2070
2071 return VK_SUCCESS;
2072 }
2073
2074 const struct intel_device_info_pat_entry *
anv_device_get_pat_entry(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)2075 anv_device_get_pat_entry(struct anv_device *device,
2076 enum anv_bo_alloc_flags alloc_flags)
2077 {
2078 if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
2079 return &device->info->pat.cached_coherent;
2080
2081 if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
2082 return &device->info->pat.compressed;
2083
2084 /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
2085 * be snopped by GPU and lmem will always be WC.
2086 * This might change in future discrete platforms.
2087 */
2088 if (anv_physical_device_has_vram(device->physical)) {
2089 if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
2090 return &device->info->pat.cached_coherent;
2091 return &device->info->pat.writecombining;
2092 }
2093
2094 /* Integrated platforms handling only */
2095 if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
2096 return &device->info->pat.cached_coherent;
2097 else if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
2098 return &device->info->pat.scanout;
2099 else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
2100 return &device->info->pat.writeback_incoherent;
2101 else
2102 return &device->info->pat.writecombining;
2103 }
2104