xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <fcntl.h>
28 #include "drm-uapi/drm_fourcc.h"
29 #include "drm-uapi/drm.h"
30 #include <xf86drm.h>
31 
32 #include "anv_private.h"
33 #include "anv_measure.h"
34 #include "util/u_debug.h"
35 #include "util/os_file.h"
36 #include "util/os_misc.h"
37 #include "util/u_atomic.h"
38 #if DETECT_OS_ANDROID
39 #include "util/u_gralloc/u_gralloc.h"
40 #endif
41 #include "util/u_string.h"
42 #include "vk_common_entrypoints.h"
43 #include "vk_util.h"
44 #include "vk_deferred_operation.h"
45 #include "vk_drm_syncobj.h"
46 #include "common/intel_aux_map.h"
47 #include "common/intel_common.h"
48 #include "common/intel_debug_identifier.h"
49 
50 #include "i915/anv_device.h"
51 #include "xe/anv_device.h"
52 
53 #include "genxml/gen7_pack.h"
54 #include "genxml/genX_bits.h"
55 
56 static void
anv_device_init_border_colors(struct anv_device * device)57 anv_device_init_border_colors(struct anv_device *device)
58 {
59    static const struct gfx8_border_color border_colors[] = {
60       [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
61       [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
62       [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
63       [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
64       [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
65       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
66    };
67 
68    device->border_colors =
69       anv_state_pool_emit_data(&device->dynamic_state_pool,
70                                sizeof(border_colors), 64, border_colors);
71 }
72 
73 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)74 anv_device_init_trivial_batch(struct anv_device *device)
75 {
76    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
77                                          ANV_BO_ALLOC_MAPPED |
78                                          ANV_BO_ALLOC_HOST_COHERENT |
79                                          ANV_BO_ALLOC_INTERNAL |
80                                          ANV_BO_ALLOC_CAPTURE,
81                                          0 /* explicit_address */,
82                                          &device->trivial_batch_bo);
83    if (result != VK_SUCCESS)
84       return result;
85 
86    struct anv_batch batch = {
87       .start = device->trivial_batch_bo->map,
88       .next = device->trivial_batch_bo->map,
89       .end = device->trivial_batch_bo->map + 4096,
90    };
91 
92    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
93    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
94 
95    return VK_SUCCESS;
96 }
97 
98 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)99 get_bo_from_pool(struct intel_batch_decode_bo *ret,
100                  struct anv_block_pool *pool,
101                  uint64_t address)
102 {
103    anv_block_pool_foreach_bo(bo, pool) {
104       uint64_t bo_address = intel_48b_address(bo->offset);
105       if (address >= bo_address && address < (bo_address + bo->size)) {
106          *ret = (struct intel_batch_decode_bo) {
107             .addr = bo_address,
108             .size = bo->size,
109             .map = bo->map,
110          };
111          return true;
112       }
113    }
114    return false;
115 }
116 
117 /* Finding a buffer for batch decoding */
118 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)119 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
120 {
121    struct anv_device *device = v_batch;
122    struct intel_batch_decode_bo ret_bo = {};
123 
124    assert(ppgtt);
125 
126    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
127       return ret_bo;
128    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
129       return ret_bo;
130    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
131       return ret_bo;
132    if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
133       return ret_bo;
134    if (device->physical->indirect_descriptors &&
135        get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
136       return ret_bo;
137    if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
138       return ret_bo;
139    if (device->physical->indirect_descriptors &&
140        get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
141       return ret_bo;
142    if (device->info->has_aux_map &&
143        get_bo_from_pool(&ret_bo, &device->aux_tt_pool.block_pool, address))
144       return ret_bo;
145 
146    if (!device->cmd_buffer_being_decoded)
147       return (struct intel_batch_decode_bo) { };
148 
149    struct anv_batch_bo **bbo;
150    u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
151       /* The decoder zeroes out the top 16 bits, so we need to as well */
152       uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);
153 
154       if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
155          return (struct intel_batch_decode_bo) {
156             .addr = bo_address,
157             .size = (*bbo)->bo->size,
158             .map = (*bbo)->bo->map,
159          };
160       }
161 
162       uint32_t dep_words = (*bbo)->relocs.dep_words;
163       BITSET_WORD *deps = (*bbo)->relocs.deps;
164       for (uint32_t w = 0; w < dep_words; w++) {
165          BITSET_WORD mask = deps[w];
166          while (mask) {
167             int i = u_bit_scan(&mask);
168             uint32_t gem_handle = w * BITSET_WORDBITS + i;
169             struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
170             assert(bo->refcount > 0);
171             bo_address = bo->offset & (~0ull >> 16);
172             if (address >= bo_address && address < bo_address + bo->size) {
173                return (struct intel_batch_decode_bo) {
174                   .addr = bo_address,
175                   .size = bo->size,
176                   .map = bo->map,
177                };
178             }
179          }
180       }
181    }
182 
183    return (struct intel_batch_decode_bo) { };
184 }
185 
186 struct intel_aux_map_buffer {
187    struct intel_buffer base;
188    struct anv_state state;
189 };
190 
191 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)192 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
193 {
194    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
195    if (!buf)
196       return NULL;
197 
198    struct anv_device *device = (struct anv_device*)driver_ctx;
199 
200    struct anv_state_pool *pool = &device->aux_tt_pool;
201    buf->state = anv_state_pool_alloc(pool, size, size);
202 
203    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
204    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
205    buf->base.map = buf->state.map;
206    buf->base.driver_bo = &buf->state;
207    return &buf->base;
208 }
209 
210 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)211 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
212 {
213    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
214    struct anv_device *device = (struct anv_device*)driver_ctx;
215    struct anv_state_pool *pool = &device->aux_tt_pool;
216    anv_state_pool_free(pool, buf->state);
217    free(buf);
218 }
219 
220 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
221    .alloc = intel_aux_map_buffer_alloc,
222    .free = intel_aux_map_buffer_free,
223 };
224 
225 static VkResult
anv_device_setup_context_or_vm(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)226 anv_device_setup_context_or_vm(struct anv_device *device,
227                                const VkDeviceCreateInfo *pCreateInfo,
228                                const uint32_t num_queues)
229 {
230    switch (device->info->kmd_type) {
231    case INTEL_KMD_TYPE_I915:
232       return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
233    case INTEL_KMD_TYPE_XE:
234       return anv_xe_device_setup_vm(device);
235    default:
236       unreachable("Missing");
237       return VK_ERROR_UNKNOWN;
238    }
239 }
240 
241 static bool
anv_device_destroy_context_or_vm(struct anv_device * device)242 anv_device_destroy_context_or_vm(struct anv_device *device)
243 {
244    switch (device->info->kmd_type) {
245    case INTEL_KMD_TYPE_I915:
246       if (device->physical->has_vm_control)
247          return anv_i915_device_destroy_vm(device);
248       else
249          return intel_gem_destroy_context(device->fd, device->context_id);
250    case INTEL_KMD_TYPE_XE:
251       return anv_xe_device_destroy_vm(device);
252    default:
253       unreachable("Missing");
254       return false;
255    }
256 }
257 
258 static VkResult
anv_device_init_trtt(struct anv_device * device)259 anv_device_init_trtt(struct anv_device *device)
260 {
261    if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
262        !device->vk.enabled_features.sparseBinding)
263       return VK_SUCCESS;
264 
265    struct anv_trtt *trtt = &device->trtt;
266 
267    VkResult result =
268       vk_sync_create(&device->vk,
269                      &device->physical->sync_syncobj_type,
270                      VK_SYNC_IS_TIMELINE,
271                      0 /* initial_value */,
272                      &trtt->timeline);
273    if (result != VK_SUCCESS)
274       return result;
275 
276    simple_mtx_init(&trtt->mutex, mtx_plain);
277 
278    list_inithead(&trtt->in_flight_batches);
279 
280    return VK_SUCCESS;
281 }
282 
283 static void
anv_device_finish_trtt(struct anv_device * device)284 anv_device_finish_trtt(struct anv_device *device)
285 {
286    if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
287        !device->vk.enabled_features.sparseBinding)
288       return;
289 
290    struct anv_trtt *trtt = &device->trtt;
291 
292    anv_sparse_trtt_garbage_collect_batches(device, true);
293 
294    vk_sync_destroy(&device->vk, trtt->timeline);
295 
296    simple_mtx_destroy(&trtt->mutex);
297 
298    vk_free(&device->vk.alloc, trtt->l3_mirror);
299    vk_free(&device->vk.alloc, trtt->l2_mirror);
300 
301    for (int i = 0; i < trtt->num_page_table_bos; i++)
302       anv_device_release_bo(device, trtt->page_table_bos[i]);
303 
304    vk_free(&device->vk.alloc, trtt->page_table_bos);
305 }
306 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)307 VkResult anv_CreateDevice(
308     VkPhysicalDevice                            physicalDevice,
309     const VkDeviceCreateInfo*                   pCreateInfo,
310     const VkAllocationCallbacks*                pAllocator,
311     VkDevice*                                   pDevice)
312 {
313    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
314    VkResult result;
315    struct anv_device *device;
316 
317    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
318 
319    /* Check requested queues and fail if we are requested to create any
320     * queues with flags we don't support.
321     */
322    assert(pCreateInfo->queueCreateInfoCount > 0);
323    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
324       if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
325          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
326    }
327 
328    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
329                        sizeof(*device), 8,
330                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
331    if (!device)
332       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
333 
334    struct vk_device_dispatch_table dispatch_table;
335 
336    bool override_initial_entrypoints = true;
337    if (physical_device->instance->vk.app_info.app_name &&
338        !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
339       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
340                                                 &anv_hitman3_device_entrypoints,
341                                                 true);
342       override_initial_entrypoints = false;
343    }
344    if (physical_device->info.ver < 12 &&
345        physical_device->instance->vk.app_info.app_name &&
346        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
347       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
348                                                 &anv_doom64_device_entrypoints,
349                                                 true);
350       override_initial_entrypoints = false;
351    }
352 #if DETECT_OS_ANDROID
353    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
354                                              &anv_android_device_entrypoints,
355                                              true);
356    override_initial_entrypoints = false;
357 #endif
358    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
359       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
360                                                 &anv_rmv_device_entrypoints,
361                                                 true);
362       override_initial_entrypoints = false;
363    }
364    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
365       anv_genX(&physical_device->info, device_entrypoints),
366       override_initial_entrypoints);
367    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
368       &anv_device_entrypoints, false);
369    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
370       &wsi_device_entrypoints, false);
371 
372 
373    result = vk_device_init(&device->vk, &physical_device->vk,
374                            &dispatch_table, pCreateInfo, pAllocator);
375    if (result != VK_SUCCESS)
376       goto fail_alloc;
377 
378    if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
379       for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
380          struct intel_batch_decode_ctx *decoder = &device->decoder[i];
381 
382          const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
383 
384          intel_batch_decode_ctx_init_brw(decoder,
385                                          &physical_device->compiler->isa,
386                                          &physical_device->info,
387                                          stderr, decode_flags, NULL,
388                                          decode_get_bo, NULL, device);
389          intel_batch_stats_reset(decoder);
390 
391          decoder->engine = physical_device->queue.families[i].engine_class;
392          decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
393          decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
394          decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
395       }
396    }
397 
398    anv_device_set_physical(device, physical_device);
399    device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);
400 
401    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
402    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
403    if (device->fd == -1) {
404       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
405       goto fail_device;
406    }
407 
408    switch (device->info->kmd_type) {
409    case INTEL_KMD_TYPE_I915:
410       device->vk.check_status = anv_i915_device_check_status;
411       break;
412    case INTEL_KMD_TYPE_XE:
413       device->vk.check_status = anv_xe_device_check_status;
414       break;
415    default:
416       unreachable("Missing");
417    }
418 
419    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
420    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
421    if (physical_device->info.kmd_type == INTEL_KMD_TYPE_I915)
422       device->vk.create_sync_for_memory = anv_create_sync_for_memory;
423    vk_device_set_drm_fd(&device->vk, device->fd);
424 
425    uint32_t num_queues = 0;
426    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
427       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
428 
429    result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
430    if (result != VK_SUCCESS)
431       goto fail_fd;
432 
433    device->queues =
434       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
435                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
436    if (device->queues == NULL) {
437       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
438       goto fail_context_id;
439    }
440 
441    if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
442       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
443       goto fail_queues_alloc;
444    }
445 
446    /* keep the page with address zero out of the allocator */
447    util_vma_heap_init(&device->vma_lo,
448                       device->physical->va.low_heap.addr,
449                       device->physical->va.low_heap.size);
450 
451    util_vma_heap_init(&device->vma_hi,
452                       device->physical->va.high_heap.addr,
453                       device->physical->va.high_heap.size);
454 
455    if (device->physical->indirect_descriptors) {
456       util_vma_heap_init(&device->vma_desc,
457                          device->physical->va.indirect_descriptor_pool.addr,
458                          device->physical->va.indirect_descriptor_pool.size);
459    } else {
460       util_vma_heap_init(&device->vma_desc,
461                          device->physical->va.bindless_surface_state_pool.addr,
462                          device->physical->va.bindless_surface_state_pool.size);
463    }
464 
465    /* Always initialized because the the memory types point to this and they
466     * are on the physical device.
467     */
468    util_vma_heap_init(&device->vma_dynamic_visible,
469                       device->physical->va.dynamic_visible_pool.addr,
470                       device->physical->va.dynamic_visible_pool.size);
471    util_vma_heap_init(&device->vma_trtt,
472                       device->physical->va.trtt.addr,
473                       device->physical->va.trtt.size);
474 
475    list_inithead(&device->memory_objects);
476    list_inithead(&device->image_private_objects);
477 
478    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
479       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
480       goto fail_vmas;
481    }
482 
483    pthread_condattr_t condattr;
484    if (pthread_condattr_init(&condattr) != 0) {
485       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
486       goto fail_mutex;
487    }
488    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
489       pthread_condattr_destroy(&condattr);
490       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
491       goto fail_mutex;
492    }
493    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
494       pthread_condattr_destroy(&condattr);
495       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
496       goto fail_mutex;
497    }
498    pthread_condattr_destroy(&condattr);
499 
500    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
501       anv_memory_trace_init(device);
502 
503    result = anv_bo_cache_init(&device->bo_cache, device);
504    if (result != VK_SUCCESS)
505       goto fail_queue_cond;
506 
507    anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
508                     ANV_BO_ALLOC_MAPPED |
509                     ANV_BO_ALLOC_HOST_CACHED_COHERENT |
510                     ANV_BO_ALLOC_CAPTURE);
511    if (device->vk.enabled_extensions.KHR_acceleration_structure) {
512       anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
513                        0 /* alloc_flags */);
514    }
515 
516    /* Because scratch is also relative to General State Base Address, we leave
517     * the base address 0 and start the pool memory at an offset.  This way we
518     * get the correct offsets in the anv_states that get allocated from it.
519     */
520    result = anv_state_pool_init(&device->general_state_pool, device,
521                                 &(struct anv_state_pool_params) {
522                                    .name         = "general pool",
523                                    .base_address = 0,
524                                    .start_offset = device->physical->va.general_state_pool.addr,
525                                    .block_size   = 16384,
526                                    .max_size     = device->physical->va.general_state_pool.size
527                                 });
528    if (result != VK_SUCCESS)
529       goto fail_batch_bo_pool;
530 
531    result = anv_state_pool_init(&device->dynamic_state_pool, device,
532                                 &(struct anv_state_pool_params) {
533                                    .name         = "dynamic pool",
534                                    .base_address = device->physical->va.dynamic_state_pool.addr,
535                                    .block_size   = 16384,
536                                    .max_size     = device->physical->va.dynamic_state_pool.size,
537                                 });
538    if (result != VK_SUCCESS)
539       goto fail_general_state_pool;
540 
541    /* The border color pointer is limited to 24 bits, so we need to make
542     * sure that any such color used at any point in the program doesn't
543     * exceed that limit.
544     * We achieve that by reserving all the custom border colors we support
545     * right off the bat, so they are close to the base address.
546     */
547    result = anv_state_reserved_array_pool_init(&device->custom_border_colors,
548                                                &device->dynamic_state_pool,
549                                                MAX_CUSTOM_BORDER_COLORS,
550                                                sizeof(struct gfx8_border_color), 64);
551    if (result != VK_SUCCESS)
552       goto fail_dynamic_state_pool;
553 
554    result = anv_state_pool_init(&device->instruction_state_pool, device,
555                                 &(struct anv_state_pool_params) {
556                                    .name         = "instruction pool",
557                                    .base_address = device->physical->va.instruction_state_pool.addr,
558                                    .block_size   = 16384,
559                                    .max_size     = device->physical->va.instruction_state_pool.size,
560                                 });
561    if (result != VK_SUCCESS)
562       goto fail_custom_border_color_pool;
563 
564    if (device->info->verx10 >= 125) {
565       /* Put the scratch surface states at the beginning of the internal
566        * surface state pool.
567        */
568       result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
569                                    &(struct anv_state_pool_params) {
570                                       .name         = "scratch surface state pool",
571                                       .base_address = device->physical->va.scratch_surface_state_pool.addr,
572                                       .block_size   = 4096,
573                                       .max_size     = device->physical->va.scratch_surface_state_pool.size,
574                                    });
575       if (result != VK_SUCCESS)
576          goto fail_instruction_state_pool;
577 
578       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
579                                    &(struct anv_state_pool_params) {
580                                       .name         = "internal surface state pool",
581                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
582                                       .start_offset = device->physical->va.scratch_surface_state_pool.size,
583                                       .block_size   = 4096,
584                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
585                                    });
586    } else {
587       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
588                                    &(struct anv_state_pool_params) {
589                                       .name         = "internal surface state pool",
590                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
591                                       .block_size   = 4096,
592                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
593                                    });
594    }
595    if (result != VK_SUCCESS)
596       goto fail_scratch_surface_state_pool;
597 
598    if (device->physical->indirect_descriptors) {
599       result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
600                                    &(struct anv_state_pool_params) {
601                                       .name         = "bindless surface state pool",
602                                       .base_address = device->physical->va.bindless_surface_state_pool.addr,
603                                       .block_size   = 4096,
604                                       .max_size     = device->physical->va.bindless_surface_state_pool.size,
605                                    });
606       if (result != VK_SUCCESS)
607          goto fail_internal_surface_state_pool;
608    }
609 
610    if (device->info->verx10 >= 125) {
611       /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
612        * table its own base address separately from surface state base.
613        */
614       result = anv_state_pool_init(&device->binding_table_pool, device,
615                                    &(struct anv_state_pool_params) {
616                                       .name         = "binding table pool",
617                                       .base_address = device->physical->va.binding_table_pool.addr,
618                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
619                                       .max_size     = device->physical->va.binding_table_pool.size,
620                                    });
621    } else {
622       /* The binding table should be in front of the surface states in virtual
623        * address space so that all surface states can be express as relative
624        * offsets from the binding table location.
625        */
626       assert(device->physical->va.binding_table_pool.addr <
627              device->physical->va.internal_surface_state_pool.addr);
628       int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
629                                (int64_t)device->physical->va.internal_surface_state_pool.addr;
630       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
631       result = anv_state_pool_init(&device->binding_table_pool, device,
632                                    &(struct anv_state_pool_params) {
633                                       .name         = "binding table pool",
634                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
635                                       .start_offset = bt_pool_offset,
636                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
637                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
638                                    });
639    }
640    if (result != VK_SUCCESS)
641       goto fail_bindless_surface_state_pool;
642 
643    if (device->physical->indirect_descriptors) {
644       result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
645                                    &(struct anv_state_pool_params) {
646                                       .name         = "indirect push descriptor pool",
647                                       .base_address = device->physical->va.indirect_push_descriptor_pool.addr,
648                                       .block_size   = 4096,
649                                       .max_size     = device->physical->va.indirect_push_descriptor_pool.size,
650                                    });
651       if (result != VK_SUCCESS)
652          goto fail_binding_table_pool;
653    }
654 
655    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
656        device->info->verx10 >= 125) {
657       /* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
658        * way we can wire push descriptors is through the bindless heap. This
659        * state pool is a 1Gb carve out of the 4Gb HW heap.
660        */
661       result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
662                                    &(struct anv_state_pool_params) {
663                                       .name         = "push descriptor buffer state pool",
664                                       .base_address = device->physical->va.push_descriptor_buffer_pool.addr,
665                                       .block_size   = 4096,
666                                       .max_size     = device->physical->va.push_descriptor_buffer_pool.size,
667                                    });
668       if (result != VK_SUCCESS)
669          goto fail_indirect_push_descriptor_pool;
670    }
671 
672    if (device->info->has_aux_map) {
673       result = anv_state_pool_init(&device->aux_tt_pool, device,
674                                    &(struct anv_state_pool_params) {
675                                       .name         = "aux-tt pool",
676                                       .base_address = device->physical->va.aux_tt_pool.addr,
677                                       .block_size   = 16384,
678                                       .max_size     = device->physical->va.aux_tt_pool.size,
679                                    });
680       if (result != VK_SUCCESS)
681          goto fail_push_descriptor_buffer_pool;
682 
683       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
684                                                &physical_device->info);
685       if (!device->aux_map_ctx)
686          goto fail_aux_tt_pool;
687    }
688 
689    result = anv_device_alloc_bo(device, "workaround", 8192,
690                                 ANV_BO_ALLOC_CAPTURE |
691                                 ANV_BO_ALLOC_HOST_COHERENT |
692                                 ANV_BO_ALLOC_MAPPED |
693                                 ANV_BO_ALLOC_INTERNAL,
694                                 0 /* explicit_address */,
695                                 &device->workaround_bo);
696    if (result != VK_SUCCESS)
697       goto fail_surface_aux_map_pool;
698 
699    if (intel_needs_workaround(device->info, 14019708328)) {
700       result = anv_device_alloc_bo(device, "dummy_aux", 4096,
701                                    0 /* alloc_flags */,
702                                    0 /* explicit_address */,
703                                    &device->dummy_aux_bo);
704       if (result != VK_SUCCESS)
705          goto fail_workaround_bo;
706 
707       device->isl_dev.dummy_aux_address = device->dummy_aux_bo->offset;
708    }
709 
710    struct anv_address wa_addr = (struct anv_address) {
711       .bo = device->workaround_bo,
712    };
713 
714    wa_addr = anv_address_add_aligned(wa_addr,
715                                      intel_debug_write_identifiers(
716                                         device->workaround_bo->map,
717                                         device->workaround_bo->size,
718                                         "Anv"), 32);
719 
720    device->rt_uuid_addr = wa_addr;
721    memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
722           physical_device->rt_uuid,
723           sizeof(physical_device->rt_uuid));
724 
725    /* Make sure the workaround address is the last one in the workaround BO,
726     * so that writes never overwrite other bits of data stored in the
727     * workaround BO.
728     */
729    wa_addr = anv_address_add_aligned(wa_addr,
730                                      sizeof(physical_device->rt_uuid), 64);
731    device->workaround_address = wa_addr;
732 
733    /* Make sure we don't over the allocated BO. */
734    assert(device->workaround_address.offset < device->workaround_bo->size);
735    /* We also need 64B (maximum GRF size) from the workaround address (see
736     * TBIMR workaround)
737     */
738    assert((device->workaround_bo->size -
739            device->workaround_address.offset) >= 64);
740 
741    device->workarounds.doom64_images = NULL;
742 
743 
744    device->debug_frame_desc =
745       intel_debug_get_identifier_block(device->workaround_bo->map,
746                                        device->workaround_bo->size,
747                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
748 
749    if (device->vk.enabled_extensions.KHR_ray_query) {
750       uint32_t ray_queries_size =
751          align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
752 
753       result = anv_device_alloc_bo(device, "ray queries",
754                                    ray_queries_size,
755                                    ANV_BO_ALLOC_INTERNAL,
756                                    0 /* explicit_address */,
757                                    &device->ray_query_bo);
758       if (result != VK_SUCCESS)
759          goto fail_dummy_aux_bo;
760    }
761 
762    result = anv_device_init_trivial_batch(device);
763    if (result != VK_SUCCESS)
764       goto fail_ray_query_bo;
765 
766    /* Emit the CPS states before running the initialization batch as those
767     * structures are referenced.
768     */
769    if (device->info->ver >= 12) {
770       uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
771 
772       if (device->info->has_coarse_pixel_primitive_and_cb)
773          n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
774 
775       n_cps_states += 1; /* Disable CPS */
776 
777        /* Each of the combinaison must be replicated on all viewports */
778       n_cps_states *= MAX_VIEWPORTS;
779 
780       device->cps_states =
781          anv_state_pool_alloc(&device->dynamic_state_pool,
782                               n_cps_states * CPS_STATE_length(device->info) * 4,
783                               32);
784       if (device->cps_states.map == NULL)
785          goto fail_trivial_batch;
786 
787       anv_genX(device->info, init_cps_device_state)(device);
788    }
789 
790    if (device->physical->indirect_descriptors) {
791       /* Allocate a null surface state at surface state offset 0. This makes
792        * NULL descriptor handling trivial because we can just memset
793        * structures to zero and they have a valid descriptor.
794        */
795       device->null_surface_state =
796          anv_state_pool_alloc(&device->bindless_surface_state_pool,
797                               device->isl_dev.ss.size,
798                               device->isl_dev.ss.align);
799       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
800                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
801       assert(device->null_surface_state.offset == 0);
802    } else {
803       /* When using direct descriptors, those can hold the null surface state
804        * directly. We still need a null surface for the binding table entries
805        * though but this one can live anywhere the internal surface state
806        * pool.
807        */
808       device->null_surface_state =
809          anv_state_pool_alloc(&device->internal_surface_state_pool,
810                               device->isl_dev.ss.size,
811                               device->isl_dev.ss.align);
812       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
813                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
814    }
815 
816    isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
817                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
818 
819    anv_scratch_pool_init(device, &device->scratch_pool, false);
820    anv_scratch_pool_init(device, &device->protected_scratch_pool, true);
821 
822    /* TODO(RT): Do we want some sort of data structure for this? */
823    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
824 
825    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
826       /* The docs say to always allocate 128KB per DSS */
827       const uint32_t btd_fifo_bo_size =
828          128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
829       result = anv_device_alloc_bo(device,
830                                    "rt-btd-fifo",
831                                    btd_fifo_bo_size,
832                                    ANV_BO_ALLOC_INTERNAL,
833                                    0 /* explicit_address */,
834                                    &device->btd_fifo_bo);
835       if (result != VK_SUCCESS)
836          goto fail_trivial_batch_bo_and_scratch_pool;
837    }
838 
839    struct vk_pipeline_cache_create_info pcc_info = { .weak_ref = true, };
840    device->vk.mem_cache =
841       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
842    if (!device->vk.mem_cache) {
843       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
844       goto fail_btd_fifo_bo;
845    }
846 
847    /* Internal shaders need their own pipeline cache because, unlike the rest
848     * of ANV, it won't work at all without the cache. It depends on it for
849     * shaders to remain resident while it runs. Therefore, we need a special
850     * cache just for BLORP/RT that's forced to always be enabled.
851     */
852    struct vk_pipeline_cache_create_info internal_pcc_info = {
853       .force_enable = true,
854       .weak_ref = false,
855    };
856    device->internal_cache =
857       vk_pipeline_cache_create(&device->vk, &internal_pcc_info, NULL);
858    if (device->internal_cache == NULL) {
859       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
860       goto fail_default_pipeline_cache;
861    }
862 
863    /* The device (currently is ICL/TGL) does not have float64 support. */
864    if (!device->info->has_64bit_float &&
865       device->physical->instance->fp64_workaround_enabled)
866       anv_load_fp64_shader(device);
867 
868    if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
869       result = anv_device_print_init(device);
870       if (result != VK_SUCCESS)
871          goto fail_internal_cache;
872    }
873 
874 #if DETECT_OS_ANDROID
875    device->u_gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
876 #endif
877 
878    device->robust_buffer_access =
879       device->vk.enabled_features.robustBufferAccess ||
880       device->vk.enabled_features.nullDescriptor;
881 
882    device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
883                                              4);
884    p_atomic_set(&device->draw_call_count, 0);
885 
886    /* Create a separate command pool for companion RCS command buffer. */
887    if (device->info->verx10 >= 125) {
888       VkCommandPoolCreateInfo pool_info = {
889          .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
890          .queueFamilyIndex =
891              anv_get_first_render_queue_index(device->physical),
892       };
893 
894       result = vk_common_CreateCommandPool(anv_device_to_handle(device),
895                                            &pool_info, NULL,
896                                            &device->companion_rcs_cmd_pool);
897       if (result != VK_SUCCESS) {
898          goto fail_print;
899       }
900    }
901 
902    result = anv_device_init_trtt(device);
903    if (result != VK_SUCCESS)
904       goto fail_companion_cmd_pool;
905 
906    result = anv_device_init_rt_shaders(device);
907    if (result != VK_SUCCESS) {
908       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
909       goto fail_trtt;
910    }
911 
912    anv_device_init_blorp(device);
913 
914    anv_device_init_border_colors(device);
915 
916    anv_device_init_internal_kernels(device);
917 
918    anv_device_init_astc_emu(device);
919 
920    anv_device_perf_init(device);
921 
922    anv_device_init_embedded_samplers(device);
923 
924    BITSET_ONES(device->gfx_dirty_state);
925    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
926    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
927    if (device->info->ver < 11)
928       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
929    if (device->info->ver < 12) {
930       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
931       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
932    }
933    if (!device->vk.enabled_extensions.EXT_sample_locations)
934       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
935    if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
936       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
937    if (!device->vk.enabled_extensions.EXT_mesh_shader) {
938       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
939       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
940       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
941       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
942       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
943       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
944       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
945       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
946    }
947    if (!intel_needs_workaround(device->info, 18019816803))
948       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
949    if (!intel_needs_workaround(device->info, 14018283232))
950       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14018283232);
951    if (device->info->ver > 9)
952       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
953 
954    device->queue_count = 0;
955    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
956       const VkDeviceQueueCreateInfo *queueCreateInfo =
957          &pCreateInfo->pQueueCreateInfos[i];
958 
959       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
960          result = anv_queue_init(device, &device->queues[device->queue_count],
961                                  queueCreateInfo, j);
962          if (result != VK_SUCCESS)
963             goto fail_queues;
964 
965          device->queue_count++;
966       }
967    }
968 
969    anv_device_utrace_init(device);
970 
971    result = anv_genX(device->info, init_device_state)(device);
972    if (result != VK_SUCCESS)
973       goto fail_utrace;
974 
975    *pDevice = anv_device_to_handle(device);
976 
977    return VK_SUCCESS;
978 
979  fail_utrace:
980    anv_device_utrace_finish(device);
981  fail_queues:
982    for (uint32_t i = 0; i < device->queue_count; i++)
983       anv_queue_finish(&device->queues[i]);
984    anv_device_finish_embedded_samplers(device);
985    anv_device_finish_blorp(device);
986    anv_device_finish_astc_emu(device);
987    anv_device_finish_internal_kernels(device);
988    anv_device_finish_rt_shaders(device);
989  fail_trtt:
990    anv_device_finish_trtt(device);
991  fail_companion_cmd_pool:
992    if (device->info->verx10 >= 125) {
993       vk_common_DestroyCommandPool(anv_device_to_handle(device),
994                                    device->companion_rcs_cmd_pool, NULL);
995    }
996  fail_print:
997    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
998       anv_device_print_fini(device);
999  fail_internal_cache:
1000    vk_pipeline_cache_destroy(device->internal_cache, NULL);
1001  fail_default_pipeline_cache:
1002    vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1003  fail_btd_fifo_bo:
1004    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1005       anv_device_release_bo(device, device->btd_fifo_bo);
1006  fail_trivial_batch_bo_and_scratch_pool:
1007    anv_scratch_pool_finish(device, &device->scratch_pool);
1008    anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1009  fail_trivial_batch:
1010    anv_device_release_bo(device, device->trivial_batch_bo);
1011  fail_ray_query_bo:
1012    if (device->ray_query_bo)
1013       anv_device_release_bo(device, device->ray_query_bo);
1014  fail_dummy_aux_bo:
1015    if (device->dummy_aux_bo)
1016       anv_device_release_bo(device, device->dummy_aux_bo);
1017  fail_workaround_bo:
1018    anv_device_release_bo(device, device->workaround_bo);
1019  fail_surface_aux_map_pool:
1020    if (device->info->has_aux_map) {
1021       intel_aux_map_finish(device->aux_map_ctx);
1022       device->aux_map_ctx = NULL;
1023    }
1024  fail_aux_tt_pool:
1025    if (device->info->has_aux_map)
1026       anv_state_pool_finish(&device->aux_tt_pool);
1027  fail_push_descriptor_buffer_pool:
1028    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1029        device->info->verx10 >= 125)
1030       anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1031  fail_indirect_push_descriptor_pool:
1032    if (device->physical->indirect_descriptors)
1033       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1034  fail_binding_table_pool:
1035    anv_state_pool_finish(&device->binding_table_pool);
1036  fail_bindless_surface_state_pool:
1037    if (device->physical->indirect_descriptors)
1038       anv_state_pool_finish(&device->bindless_surface_state_pool);
1039  fail_internal_surface_state_pool:
1040    anv_state_pool_finish(&device->internal_surface_state_pool);
1041  fail_scratch_surface_state_pool:
1042    if (device->info->verx10 >= 125)
1043       anv_state_pool_finish(&device->scratch_surface_state_pool);
1044  fail_instruction_state_pool:
1045    anv_state_pool_finish(&device->instruction_state_pool);
1046  fail_custom_border_color_pool:
1047    anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1048  fail_dynamic_state_pool:
1049    anv_state_pool_finish(&device->dynamic_state_pool);
1050  fail_general_state_pool:
1051    anv_state_pool_finish(&device->general_state_pool);
1052  fail_batch_bo_pool:
1053    if (device->vk.enabled_extensions.KHR_acceleration_structure)
1054       anv_bo_pool_finish(&device->bvh_bo_pool);
1055    anv_bo_pool_finish(&device->batch_bo_pool);
1056    anv_bo_cache_finish(&device->bo_cache);
1057  fail_queue_cond:
1058    pthread_cond_destroy(&device->queue_submit);
1059  fail_mutex:
1060    pthread_mutex_destroy(&device->mutex);
1061  fail_vmas:
1062    util_vma_heap_finish(&device->vma_trtt);
1063    util_vma_heap_finish(&device->vma_dynamic_visible);
1064    util_vma_heap_finish(&device->vma_desc);
1065    util_vma_heap_finish(&device->vma_hi);
1066    util_vma_heap_finish(&device->vma_lo);
1067    pthread_mutex_destroy(&device->vma_mutex);
1068  fail_queues_alloc:
1069    vk_free(&device->vk.alloc, device->queues);
1070  fail_context_id:
1071    anv_device_destroy_context_or_vm(device);
1072  fail_fd:
1073    close(device->fd);
1074  fail_device:
1075    vk_device_finish(&device->vk);
1076  fail_alloc:
1077    vk_free(&device->vk.alloc, device);
1078 
1079    return result;
1080 }
1081 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1082 void anv_DestroyDevice(
1083     VkDevice                                    _device,
1084     const VkAllocationCallbacks*                pAllocator)
1085 {
1086    ANV_FROM_HANDLE(anv_device, device, _device);
1087 
1088    if (!device)
1089       return;
1090 
1091 #if DETECT_OS_ANDROID
1092    u_gralloc_destroy(&device->u_gralloc);
1093 #endif
1094 
1095    anv_memory_trace_finish(device);
1096 
1097    struct anv_physical_device *pdevice = device->physical;
1098 
1099    /* Do TRTT batch garbage collection before destroying queues. */
1100    anv_device_finish_trtt(device);
1101 
1102    anv_device_utrace_finish(device);
1103 
1104    for (uint32_t i = 0; i < device->queue_count; i++)
1105       anv_queue_finish(&device->queues[i]);
1106    vk_free(&device->vk.alloc, device->queues);
1107 
1108    anv_device_finish_blorp(device);
1109 
1110    anv_device_finish_rt_shaders(device);
1111 
1112    anv_device_finish_astc_emu(device);
1113 
1114    anv_device_finish_internal_kernels(device);
1115 
1116    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
1117       anv_device_print_fini(device);
1118 
1119    vk_pipeline_cache_destroy(device->internal_cache, NULL);
1120    vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1121 
1122    anv_device_finish_embedded_samplers(device);
1123 
1124    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1125       anv_device_release_bo(device, device->btd_fifo_bo);
1126 
1127    if (device->info->verx10 >= 125) {
1128       vk_common_DestroyCommandPool(anv_device_to_handle(device),
1129                                    device->companion_rcs_cmd_pool, NULL);
1130    }
1131 
1132    anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1133 #ifdef HAVE_VALGRIND
1134    /* We only need to free these to prevent valgrind errors.  The backing
1135     * BO will go away in a couple of lines so we don't actually leak.
1136     */
1137    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1138    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
1139    anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
1140    anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
1141 #endif
1142 
1143    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
1144       if (device->rt_scratch_bos[i] != NULL)
1145          anv_device_release_bo(device, device->rt_scratch_bos[i]);
1146    }
1147 
1148    anv_scratch_pool_finish(device, &device->scratch_pool);
1149    anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1150 
1151    if (device->vk.enabled_extensions.KHR_ray_query) {
1152       for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
1153          if (device->ray_query_shadow_bos[i] != NULL)
1154             anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
1155       }
1156       anv_device_release_bo(device, device->ray_query_bo);
1157    }
1158    anv_device_release_bo(device, device->workaround_bo);
1159    if (device->dummy_aux_bo)
1160       anv_device_release_bo(device, device->dummy_aux_bo);
1161    anv_device_release_bo(device, device->trivial_batch_bo);
1162 
1163    if (device->info->has_aux_map) {
1164       intel_aux_map_finish(device->aux_map_ctx);
1165       device->aux_map_ctx = NULL;
1166       anv_state_pool_finish(&device->aux_tt_pool);
1167    }
1168    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1169        device->info->verx10 >= 125)
1170       anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1171    if (device->physical->indirect_descriptors)
1172       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1173    anv_state_pool_finish(&device->binding_table_pool);
1174    if (device->info->verx10 >= 125)
1175       anv_state_pool_finish(&device->scratch_surface_state_pool);
1176    anv_state_pool_finish(&device->internal_surface_state_pool);
1177    if (device->physical->indirect_descriptors)
1178       anv_state_pool_finish(&device->bindless_surface_state_pool);
1179    anv_state_pool_finish(&device->instruction_state_pool);
1180    anv_state_pool_finish(&device->dynamic_state_pool);
1181    anv_state_pool_finish(&device->general_state_pool);
1182 
1183    if (device->vk.enabled_extensions.KHR_acceleration_structure)
1184       anv_bo_pool_finish(&device->bvh_bo_pool);
1185    anv_bo_pool_finish(&device->batch_bo_pool);
1186 
1187    anv_bo_cache_finish(&device->bo_cache);
1188 
1189    util_vma_heap_finish(&device->vma_trtt);
1190    util_vma_heap_finish(&device->vma_dynamic_visible);
1191    util_vma_heap_finish(&device->vma_desc);
1192    util_vma_heap_finish(&device->vma_hi);
1193    util_vma_heap_finish(&device->vma_lo);
1194    pthread_mutex_destroy(&device->vma_mutex);
1195 
1196    pthread_cond_destroy(&device->queue_submit);
1197    pthread_mutex_destroy(&device->mutex);
1198 
1199    ralloc_free(device->fp64_nir);
1200 
1201    anv_device_destroy_context_or_vm(device);
1202 
1203    if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
1204       for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
1205          if (INTEL_DEBUG(DEBUG_BATCH_STATS))
1206             intel_batch_print_stats(&device->decoder[i]);
1207          intel_batch_decode_ctx_finish(&device->decoder[i]);
1208       }
1209    }
1210 
1211    close(device->fd);
1212 
1213    vk_device_finish(&device->vk);
1214    vk_free(&device->vk.alloc, device);
1215 }
1216 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1217 VkResult anv_EnumerateInstanceLayerProperties(
1218     uint32_t*                                   pPropertyCount,
1219     VkLayerProperties*                          pProperties)
1220 {
1221    if (pProperties == NULL) {
1222       *pPropertyCount = 0;
1223       return VK_SUCCESS;
1224    }
1225 
1226    /* None supported at this time */
1227    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1228 }
1229 
1230 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)1231 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1232                 int64_t timeout)
1233 {
1234    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1235    if (ret == -1 && errno == ETIME) {
1236       return VK_TIMEOUT;
1237    } else if (ret == -1) {
1238       /* We don't know the real error. */
1239       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
1240    } else {
1241       return VK_SUCCESS;
1242    }
1243 }
1244 
1245 static struct util_vma_heap *
anv_vma_heap_for_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)1246 anv_vma_heap_for_flags(struct anv_device *device,
1247                        enum anv_bo_alloc_flags alloc_flags)
1248 {
1249    if (alloc_flags & ANV_BO_ALLOC_TRTT)
1250       return &device->vma_trtt;
1251 
1252    if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
1253       return &device->vma_lo;
1254 
1255    if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
1256       return &device->vma_desc;
1257 
1258    if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL)
1259       return &device->vma_dynamic_visible;
1260 
1261    return &device->vma_hi;
1262 }
1263 
1264 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct util_vma_heap ** out_vma_heap)1265 anv_vma_alloc(struct anv_device *device,
1266               uint64_t size, uint64_t align,
1267               enum anv_bo_alloc_flags alloc_flags,
1268               uint64_t client_address,
1269               struct util_vma_heap **out_vma_heap)
1270 {
1271    pthread_mutex_lock(&device->vma_mutex);
1272 
1273    uint64_t addr = 0;
1274    *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
1275 
1276    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
1277       assert(*out_vma_heap == &device->vma_hi ||
1278              *out_vma_heap == &device->vma_dynamic_visible ||
1279              *out_vma_heap == &device->vma_trtt);
1280 
1281       if (client_address) {
1282          if (util_vma_heap_alloc_addr(*out_vma_heap,
1283                                       client_address, size)) {
1284             addr = client_address;
1285          }
1286       } else {
1287          (*out_vma_heap)->alloc_high = false;
1288          addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1289          (*out_vma_heap)->alloc_high = true;
1290       }
1291       /* We don't want to fall back to other heaps */
1292       goto done;
1293    }
1294 
1295    assert(client_address == 0);
1296 
1297    addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1298 
1299 done:
1300    pthread_mutex_unlock(&device->vma_mutex);
1301 
1302    assert(addr == intel_48b_address(addr));
1303    return intel_canonical_address(addr);
1304 }
1305 
1306 void
anv_vma_free(struct anv_device * device,struct util_vma_heap * vma_heap,uint64_t address,uint64_t size)1307 anv_vma_free(struct anv_device *device,
1308              struct util_vma_heap *vma_heap,
1309              uint64_t address, uint64_t size)
1310 {
1311    assert(vma_heap == &device->vma_lo ||
1312           vma_heap == &device->vma_hi ||
1313           vma_heap == &device->vma_desc ||
1314           vma_heap == &device->vma_dynamic_visible ||
1315           vma_heap == &device->vma_trtt);
1316 
1317    const uint64_t addr_48b = intel_48b_address(address);
1318 
1319    pthread_mutex_lock(&device->vma_mutex);
1320 
1321    util_vma_heap_free(vma_heap, addr_48b, size);
1322 
1323    pthread_mutex_unlock(&device->vma_mutex);
1324 }
1325 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1326 VkResult anv_AllocateMemory(
1327     VkDevice                                    _device,
1328     const VkMemoryAllocateInfo*                 pAllocateInfo,
1329     const VkAllocationCallbacks*                pAllocator,
1330     VkDeviceMemory*                             pMem)
1331 {
1332    ANV_FROM_HANDLE(anv_device, device, _device);
1333    struct anv_physical_device *pdevice = device->physical;
1334    struct anv_device_memory *mem;
1335    VkResult result = VK_SUCCESS;
1336 
1337    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1338 
1339    VkDeviceSize aligned_alloc_size =
1340       align64(pAllocateInfo->allocationSize, 4096);
1341 
1342    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
1343    const struct anv_memory_type *mem_type =
1344       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
1345    assert(mem_type->heapIndex < pdevice->memory.heap_count);
1346    struct anv_memory_heap *mem_heap =
1347       &pdevice->memory.heaps[mem_type->heapIndex];
1348 
1349    if (aligned_alloc_size > mem_heap->size)
1350       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1351 
1352    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
1353    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
1354       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1355 
1356    mem = vk_device_memory_create(&device->vk, pAllocateInfo,
1357                                  pAllocator, sizeof(*mem));
1358    if (mem == NULL)
1359       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1360 
1361    mem->type = mem_type;
1362    mem->map = NULL;
1363    mem->map_size = 0;
1364    mem->map_delta = 0;
1365 
1366    enum anv_bo_alloc_flags alloc_flags = 0;
1367 
1368    const VkImportMemoryFdInfoKHR *fd_info = NULL;
1369    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
1370    const struct wsi_memory_allocate_info *wsi_info = NULL;
1371    uint64_t client_address = 0;
1372 
1373    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
1374       /* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
1375        * value, so use cast to avoid compiler warn
1376        */
1377       switch ((uint32_t)ext->sType) {
1378       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
1379       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
1380       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
1381       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
1382       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
1383          /* handled by vk_device_memory_create */
1384          break;
1385 
1386       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1387          fd_info = (void *)ext;
1388          break;
1389 
1390       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
1391          dedicated_info = (void *)ext;
1392          break;
1393 
1394       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
1395          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
1396             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
1397          client_address = addr_info->opaqueCaptureAddress;
1398          break;
1399       }
1400 
1401       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1402          wsi_info = (void *)ext;
1403          break;
1404 
1405       default:
1406          vk_debug_ignored_stype(ext->sType);
1407          break;
1408       }
1409    }
1410 
1411    /* If i915 reported a mappable/non_mappable vram regions and the
1412     * application want lmem mappable, then we need to use the
1413     * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
1414     */
1415    if (pdevice->vram_mappable.size > 0 &&
1416        pdevice->vram_non_mappable.size > 0 &&
1417        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
1418        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
1419       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
1420 
1421    if (!mem_heap->is_local_mem)
1422       alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
1423 
1424    if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
1425       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
1426 
1427    if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
1428       alloc_flags |= ANV_BO_ALLOC_PROTECTED;
1429 
1430    /* For now, always allocated AUX-TT aligned memory, regardless of dedicated
1431     * allocations. An application can for example, suballocate a large
1432     * VkDeviceMemory and try to bind an image created with a CCS modifier. In
1433     * that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
1434     * requirements, so we need to ensure both the VkDeviceMemory and the
1435     * alignment reported through vkGetImageMemoryRequirements() meet the
1436     * AUX-TT requirement.
1437     *
1438     * Allocations with the special dynamic_visible mem type are for things like
1439     * descriptor buffers, so AUX-TT alignment is not needed here.
1440     */
1441    if (device->info->has_aux_map && !mem_type->dynamic_visible)
1442       alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;
1443 
1444    /* If the allocation is not dedicated nor a host pointer, allocate
1445     * additional CCS space.
1446     *
1447     * Allocations with the special dynamic_visible mem type are for things like
1448     * descriptor buffers, which don't need any compression.
1449     */
1450    if (device->physical->alloc_aux_tt_mem &&
1451        dedicated_info == NULL &&
1452        mem->vk.host_ptr == NULL &&
1453        !mem_type->dynamic_visible)
1454       alloc_flags |= ANV_BO_ALLOC_AUX_CCS;
1455 
1456    /* TODO: Android, ChromeOS and other applications may need another way to
1457     * allocate buffers that can be scanout to display but it should pretty
1458     * easy to catch those as Xe KMD driver will print warnings in dmesg when
1459     * scanning buffers allocated without proper flag set.
1460     */
1461    if (wsi_info)
1462       alloc_flags |= ANV_BO_ALLOC_SCANOUT;
1463 
1464    /* Anything imported or exported is EXTERNAL */
1465    if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
1466       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
1467 
1468       /* wsi has its own way of synchronizing with the compositor */
1469       if (pdevice->instance->external_memory_implicit_sync &&
1470           !wsi_info && dedicated_info &&
1471           dedicated_info->image != VK_NULL_HANDLE) {
1472          ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1473 
1474          /* Apply implicit sync to be compatible with clients relying on
1475           * implicit fencing. This matches the behavior in iris i915_batch
1476           * submit. An example client is VA-API (iHD), so only dedicated
1477           * image scenario has to be covered.
1478           */
1479          alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;
1480 
1481          /* For color attachment, apply IMPLICIT_WRITE so a client on the
1482           * consumer side relying on implicit fencing can have a fence to
1483           * wait for render complete.
1484           */
1485          if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
1486             alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
1487       }
1488    }
1489 
1490    /* TODO: Disabling compression on external bos will cause problems once we
1491     * have a modifier that supports compression (Xe2+).
1492     */
1493    if (!(alloc_flags & ANV_BO_ALLOC_EXTERNAL) && mem_type->compressed)
1494       alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
1495 
1496    if (mem_type->dynamic_visible)
1497       alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
1498 
1499    if (mem->vk.ahardware_buffer) {
1500       result = anv_import_ahw_memory(_device, mem);
1501       if (result != VK_SUCCESS)
1502          goto fail;
1503 
1504       goto success;
1505    }
1506 
1507    /* The Vulkan spec permits handleType to be 0, in which case the struct is
1508     * ignored.
1509     */
1510    if (fd_info && fd_info->handleType) {
1511       /* At the moment, we support only the below handle types. */
1512       assert(fd_info->handleType ==
1513                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1514              fd_info->handleType ==
1515                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1516 
1517       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
1518                                     client_address, &mem->bo);
1519       if (result != VK_SUCCESS)
1520          goto fail;
1521 
1522       /* For security purposes, we reject importing the bo if it's smaller
1523        * than the requested allocation size.  This prevents a malicious client
1524        * from passing a buffer to a trusted client, lying about the size, and
1525        * telling the trusted client to try and texture from an image that goes
1526        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
1527        * in the trusted client.  The trusted client can protect itself against
1528        * this sort of attack but only if it can trust the buffer size.
1529        */
1530       if (mem->bo->size < aligned_alloc_size) {
1531          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
1532                             "aligned allocationSize too large for "
1533                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
1534                             "%"PRIu64"B > %"PRIu64"B",
1535                             aligned_alloc_size, mem->bo->size);
1536          anv_device_release_bo(device, mem->bo);
1537          goto fail;
1538       }
1539 
1540       /* From the Vulkan spec:
1541        *
1542        *    "Importing memory from a file descriptor transfers ownership of
1543        *    the file descriptor from the application to the Vulkan
1544        *    implementation. The application must not perform any operations on
1545        *    the file descriptor after a successful import."
1546        *
1547        * If the import fails, we leave the file descriptor open.
1548        */
1549       close(fd_info->fd);
1550       goto success;
1551    }
1552 
1553    if (mem->vk.host_ptr) {
1554       if (mem->vk.import_handle_type ==
1555           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
1556          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1557          goto fail;
1558       }
1559 
1560       assert(mem->vk.import_handle_type ==
1561              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
1562 
1563       result = anv_device_import_bo_from_host_ptr(device,
1564                                                   mem->vk.host_ptr,
1565                                                   mem->vk.size,
1566                                                   alloc_flags,
1567                                                   client_address,
1568                                                   &mem->bo);
1569       if (result != VK_SUCCESS)
1570          goto fail;
1571 
1572       goto success;
1573    }
1574 
1575    if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
1576       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1577    } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
1578       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1579          alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1580       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
1581          alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
1582    } else {
1583       /* Required to set some host mode to have a valid pat index set */
1584       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1585    }
1586 
1587    /* Regular allocate (not importing memory). */
1588 
1589    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
1590                                 alloc_flags, client_address, &mem->bo);
1591    if (result != VK_SUCCESS)
1592       goto fail;
1593 
1594    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
1595       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1596 
1597       /* Some legacy (non-modifiers) consumers need the tiling to be set on
1598        * the BO.  In this case, we have a dedicated allocation.
1599        */
1600       if (image->vk.wsi_legacy_scanout) {
1601          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
1602          result = anv_device_set_bo_tiling(device, mem->bo,
1603                                            surf->row_pitch_B,
1604                                            surf->tiling);
1605          if (result != VK_SUCCESS) {
1606             anv_device_release_bo(device, mem->bo);
1607             goto fail;
1608          }
1609       }
1610    }
1611 
1612  success:
1613    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
1614    if (mem_heap_used > mem_heap->size) {
1615       p_atomic_add(&mem_heap->used, -mem->bo->size);
1616       anv_device_release_bo(device, mem->bo);
1617       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
1618                          "Out of heap memory");
1619       goto fail;
1620    }
1621 
1622    pthread_mutex_lock(&device->mutex);
1623    list_addtail(&mem->link, &device->memory_objects);
1624    pthread_mutex_unlock(&device->mutex);
1625 
1626    ANV_RMV(heap_create, device, mem, false, 0);
1627 
1628    *pMem = anv_device_memory_to_handle(mem);
1629 
1630    return VK_SUCCESS;
1631 
1632  fail:
1633    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1634 
1635    return result;
1636 }
1637 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)1638 VkResult anv_GetMemoryFdKHR(
1639     VkDevice                                    device_h,
1640     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
1641     int*                                        pFd)
1642 {
1643    ANV_FROM_HANDLE(anv_device, dev, device_h);
1644    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
1645 
1646    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1647 
1648    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1649           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1650 
1651    return anv_device_export_bo(dev, mem->bo, pFd);
1652 }
1653 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1654 VkResult anv_GetMemoryFdPropertiesKHR(
1655     VkDevice                                    _device,
1656     VkExternalMemoryHandleTypeFlagBits          handleType,
1657     int                                         fd,
1658     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
1659 {
1660    ANV_FROM_HANDLE(anv_device, device, _device);
1661 
1662    switch (handleType) {
1663    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1664       /* dma-buf can be imported as any memory type */
1665       pMemoryFdProperties->memoryTypeBits =
1666          (1 << device->physical->memory.type_count) - 1;
1667       return VK_SUCCESS;
1668 
1669    default:
1670       /* The valid usage section for this function says:
1671        *
1672        *    "handleType must not be one of the handle types defined as
1673        *    opaque."
1674        *
1675        * So opaque handle types fall into the default "unsupported" case.
1676        */
1677       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1678    }
1679 }
1680 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)1681 VkResult anv_GetMemoryHostPointerPropertiesEXT(
1682    VkDevice                                    _device,
1683    VkExternalMemoryHandleTypeFlagBits          handleType,
1684    const void*                                 pHostPointer,
1685    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
1686 {
1687    ANV_FROM_HANDLE(anv_device, device, _device);
1688 
1689    assert(pMemoryHostPointerProperties->sType ==
1690           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
1691 
1692    switch (handleType) {
1693    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1694       /* Host memory can be imported as any memory type. */
1695       pMemoryHostPointerProperties->memoryTypeBits =
1696          (1ull << device->physical->memory.type_count) - 1;
1697 
1698       return VK_SUCCESS;
1699 
1700    default:
1701       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1702    }
1703 }
1704 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1705 void anv_FreeMemory(
1706     VkDevice                                    _device,
1707     VkDeviceMemory                              _mem,
1708     const VkAllocationCallbacks*                pAllocator)
1709 {
1710    ANV_FROM_HANDLE(anv_device, device, _device);
1711    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1712 
1713    if (mem == NULL)
1714       return;
1715 
1716    pthread_mutex_lock(&device->mutex);
1717    list_del(&mem->link);
1718    pthread_mutex_unlock(&device->mutex);
1719 
1720    if (mem->map) {
1721       const VkMemoryUnmapInfoKHR unmap = {
1722          .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
1723          .memory = _mem,
1724       };
1725       anv_UnmapMemory2KHR(_device, &unmap);
1726    }
1727 
1728    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
1729                 -mem->bo->size);
1730 
1731    anv_device_release_bo(device, mem->bo);
1732 
1733    ANV_RMV(resource_destroy, device, mem);
1734 
1735    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1736 }
1737 
anv_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)1738 VkResult anv_MapMemory2KHR(
1739     VkDevice                                    _device,
1740     const VkMemoryMapInfoKHR*                   pMemoryMapInfo,
1741     void**                                      ppData)
1742 {
1743    ANV_FROM_HANDLE(anv_device, device, _device);
1744    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);
1745 
1746    if (mem == NULL) {
1747       *ppData = NULL;
1748       return VK_SUCCESS;
1749    }
1750 
1751    if (mem->vk.host_ptr) {
1752       *ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
1753       return VK_SUCCESS;
1754    }
1755 
1756    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1757     *
1758     *  * memory must have been created with a memory type that reports
1759     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
1760     */
1761    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
1762       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1763                        "Memory object not mappable.");
1764    }
1765 
1766    assert(pMemoryMapInfo->size > 0);
1767    const VkDeviceSize offset = pMemoryMapInfo->offset;
1768    const VkDeviceSize size =
1769       vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
1770                                        pMemoryMapInfo->size);
1771 
1772    if (size != (size_t)size) {
1773       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1774                        "requested size 0x%"PRIx64" does not fit in %u bits",
1775                        size, (unsigned)(sizeof(size_t) * 8));
1776    }
1777 
1778    /* From the Vulkan 1.2.194 spec:
1779     *
1780     *    "memory must not be currently host mapped"
1781     */
1782    if (mem->map != NULL) {
1783       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1784                        "Memory object already mapped.");
1785    }
1786 
1787    void *placed_addr = NULL;
1788    if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
1789       const VkMemoryMapPlacedInfoEXT *placed_info =
1790          vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
1791       assert(placed_info != NULL);
1792       placed_addr = placed_info->pPlacedAddress;
1793    }
1794 
1795    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
1796    uint64_t map_offset;
1797    if (!device->physical->info.has_mmap_offset)
1798       map_offset = offset & ~4095ull;
1799    else
1800       map_offset = 0;
1801    assert(offset >= map_offset);
1802    uint64_t map_size = (offset + size) - map_offset;
1803 
1804    /* Let's map whole pages */
1805    map_size = align64(map_size, 4096);
1806 
1807    void *map;
1808    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
1809                                        map_size, placed_addr, &map);
1810    if (result != VK_SUCCESS)
1811       return result;
1812 
1813    mem->map = map;
1814    mem->map_size = map_size;
1815    mem->map_delta = (offset - map_offset);
1816    *ppData = mem->map + mem->map_delta;
1817 
1818    return VK_SUCCESS;
1819 }
1820 
anv_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)1821 VkResult anv_UnmapMemory2KHR(
1822     VkDevice                                    _device,
1823     const VkMemoryUnmapInfoKHR*                 pMemoryUnmapInfo)
1824 {
1825    ANV_FROM_HANDLE(anv_device, device, _device);
1826    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);
1827 
1828    if (mem == NULL || mem->vk.host_ptr)
1829       return VK_SUCCESS;
1830 
1831    VkResult result =
1832       anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size,
1833                           pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
1834    if (result != VK_SUCCESS)
1835       return result;
1836 
1837    mem->map = NULL;
1838    mem->map_size = 0;
1839    mem->map_delta = 0;
1840 
1841    return VK_SUCCESS;
1842 }
1843 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1844 VkResult anv_FlushMappedMemoryRanges(
1845     VkDevice                                    _device,
1846     uint32_t                                    memoryRangeCount,
1847     const VkMappedMemoryRange*                  pMemoryRanges)
1848 {
1849 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1850    ANV_FROM_HANDLE(anv_device, device, _device);
1851 
1852    if (!device->physical->memory.need_flush)
1853       return VK_SUCCESS;
1854 
1855    /* Make sure the writes we're flushing have landed. */
1856    __builtin_ia32_mfence();
1857 
1858    for (uint32_t i = 0; i < memoryRangeCount; i++) {
1859       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1860       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1861          continue;
1862 
1863       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1864       if (map_offset >= mem->map_size)
1865          continue;
1866 
1867       intel_flush_range(mem->map + map_offset,
1868                         MIN2(pMemoryRanges[i].size,
1869                              mem->map_size - map_offset));
1870    }
1871 #endif
1872    return VK_SUCCESS;
1873 }
1874 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1875 VkResult anv_InvalidateMappedMemoryRanges(
1876     VkDevice                                    _device,
1877     uint32_t                                    memoryRangeCount,
1878     const VkMappedMemoryRange*                  pMemoryRanges)
1879 {
1880 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1881    ANV_FROM_HANDLE(anv_device, device, _device);
1882 
1883    if (!device->physical->memory.need_flush)
1884       return VK_SUCCESS;
1885 
1886    for (uint32_t i = 0; i < memoryRangeCount; i++) {
1887       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1888       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1889          continue;
1890 
1891       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1892       if (map_offset >= mem->map_size)
1893          continue;
1894 
1895       intel_invalidate_range(mem->map + map_offset,
1896                              MIN2(pMemoryRanges[i].size,
1897                                   mem->map_size - map_offset));
1898    }
1899 
1900    /* Make sure no reads get moved up above the invalidate. */
1901    __builtin_ia32_mfence();
1902 #endif
1903    return VK_SUCCESS;
1904 }
1905 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1906 void anv_GetDeviceMemoryCommitment(
1907     VkDevice                                    device,
1908     VkDeviceMemory                              memory,
1909     VkDeviceSize*                               pCommittedMemoryInBytes)
1910 {
1911    *pCommittedMemoryInBytes = 0;
1912 }
1913 
1914 static inline clockid_t
anv_get_default_cpu_clock_id(void)1915 anv_get_default_cpu_clock_id(void)
1916 {
1917 #ifdef CLOCK_MONOTONIC_RAW
1918    return CLOCK_MONOTONIC_RAW;
1919 #else
1920    return CLOCK_MONOTONIC;
1921 #endif
1922 }
1923 
1924 static inline clockid_t
vk_time_domain_to_clockid(VkTimeDomainKHR domain)1925 vk_time_domain_to_clockid(VkTimeDomainKHR domain)
1926 {
1927    switch (domain) {
1928 #ifdef CLOCK_MONOTONIC_RAW
1929    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
1930       return CLOCK_MONOTONIC_RAW;
1931 #endif
1932    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
1933       return CLOCK_MONOTONIC;
1934    default:
1935       unreachable("Missing");
1936       return CLOCK_MONOTONIC;
1937    }
1938 }
1939 
1940 static inline bool
is_cpu_time_domain(VkTimeDomainKHR domain)1941 is_cpu_time_domain(VkTimeDomainKHR domain)
1942 {
1943    return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
1944           domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
1945 }
1946 
1947 static inline bool
is_gpu_time_domain(VkTimeDomainKHR domain)1948 is_gpu_time_domain(VkTimeDomainKHR domain)
1949 {
1950    return domain == VK_TIME_DOMAIN_DEVICE_KHR;
1951 }
1952 
anv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)1953 VkResult anv_GetCalibratedTimestampsKHR(
1954    VkDevice                                     _device,
1955    uint32_t                                     timestampCount,
1956    const VkCalibratedTimestampInfoKHR           *pTimestampInfos,
1957    uint64_t                                     *pTimestamps,
1958    uint64_t                                     *pMaxDeviation)
1959 {
1960    ANV_FROM_HANDLE(anv_device, device, _device);
1961    const uint64_t timestamp_frequency = device->info->timestamp_frequency;
1962    const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
1963    uint32_t d, increment;
1964    uint64_t begin, end;
1965    uint64_t max_clock_period = 0;
1966    const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
1967    const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
1968    clockid_t cpu_clock_id = -1;
1969 
1970    begin = end = vk_clock_gettime(anv_get_default_cpu_clock_id());
1971 
1972    for (d = 0, increment = 1; d < timestampCount; d += increment) {
1973       const VkTimeDomainKHR current = pTimestampInfos[d].timeDomain;
1974       /* If we have a request pattern like this :
1975        * - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
1976        * - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
1977        * - domain2 = domain0 (optional)
1978        *
1979        * We can combine all of those into a single ioctl for maximum accuracy.
1980        */
1981       if (has_correlate_timestamp && (d + 1) < timestampCount) {
1982          const VkTimeDomainKHR next = pTimestampInfos[d + 1].timeDomain;
1983 
1984          if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
1985              (is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
1986             /* We'll consume at least 2 elements. */
1987             increment = 2;
1988 
1989             if (is_cpu_time_domain(current))
1990                cpu_clock_id = vk_time_domain_to_clockid(current);
1991             else
1992                cpu_clock_id = vk_time_domain_to_clockid(next);
1993 
1994             uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
1995             if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
1996                                                             kmd_type,
1997                                                             INTEL_ENGINE_CLASS_RENDER,
1998                                                             0 /* engine_instance */,
1999                                                             cpu_clock_id,
2000                                                             &cpu_timestamp,
2001                                                             &gpu_timestamp,
2002                                                             &cpu_delta_timestamp))
2003                return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");
2004 
2005             cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
2006             if (is_cpu_time_domain(current)) {
2007                pTimestamps[d] = cpu_timestamp;
2008                pTimestamps[d + 1] = gpu_timestamp;
2009             } else {
2010                pTimestamps[d] = gpu_timestamp;
2011                pTimestamps[d + 1] = cpu_end_timestamp;
2012             }
2013             max_clock_period = MAX2(max_clock_period, device_period);
2014 
2015             /* If we can consume a third element */
2016             if ((d + 2) < timestampCount &&
2017                 is_cpu_time_domain(current) &&
2018                 current == pTimestampInfos[d + 2].timeDomain) {
2019                pTimestamps[d + 2] = cpu_end_timestamp;
2020                increment++;
2021             }
2022 
2023             /* If we're the first element, we can replace begin */
2024             if (d == 0 && cpu_clock_id == anv_get_default_cpu_clock_id())
2025                begin = cpu_timestamp;
2026 
2027             /* If we're in the same clock domain as begin/end. We can set the end. */
2028             if (cpu_clock_id == anv_get_default_cpu_clock_id())
2029                end = cpu_end_timestamp;
2030 
2031             continue;
2032          }
2033       }
2034 
2035       /* fallback to regular method */
2036       increment = 1;
2037       switch (current) {
2038       case VK_TIME_DOMAIN_DEVICE_KHR:
2039          if (!intel_gem_read_render_timestamp(device->fd,
2040                                               device->info->kmd_type,
2041                                               &pTimestamps[d])) {
2042             return vk_device_set_lost(&device->vk, "Failed to read the "
2043                                       "TIMESTAMP register: %m");
2044          }
2045          max_clock_period = MAX2(max_clock_period, device_period);
2046          break;
2047       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
2048          pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
2049          max_clock_period = MAX2(max_clock_period, 1);
2050          break;
2051 
2052 #ifdef CLOCK_MONOTONIC_RAW
2053       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
2054          pTimestamps[d] = begin;
2055          break;
2056 #endif
2057       default:
2058          pTimestamps[d] = 0;
2059          break;
2060       }
2061    }
2062 
2063    /* If last timestamp was not get with has_correlate_timestamp method or
2064     * if it was but last cpu clock is not the default one, get time again
2065     */
2066    if (increment == 1 || cpu_clock_id != anv_get_default_cpu_clock_id())
2067       end = vk_clock_gettime(anv_get_default_cpu_clock_id());
2068 
2069    *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
2070 
2071    return VK_SUCCESS;
2072 }
2073 
2074 const struct intel_device_info_pat_entry *
anv_device_get_pat_entry(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)2075 anv_device_get_pat_entry(struct anv_device *device,
2076                          enum anv_bo_alloc_flags alloc_flags)
2077 {
2078    if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
2079       return &device->info->pat.cached_coherent;
2080 
2081    if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
2082       return &device->info->pat.compressed;
2083 
2084    /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
2085     * be snopped by GPU and lmem will always be WC.
2086     * This might change in future discrete platforms.
2087     */
2088    if (anv_physical_device_has_vram(device->physical)) {
2089       if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
2090          return &device->info->pat.cached_coherent;
2091       return &device->info->pat.writecombining;
2092    }
2093 
2094    /* Integrated platforms handling only */
2095    if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
2096       return &device->info->pat.cached_coherent;
2097    else if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
2098       return &device->info->pat.scanout;
2099    else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
2100       return &device->info->pat.writeback_incoherent;
2101    else
2102       return &device->info->pat.writecombining;
2103 }
2104