#include "zink_query.h" #include "zink_context.h" #include "zink_clear.h" #include "zink_program.h" #include "zink_resource.h" #include "zink_screen.h" #include "util/u_dump.h" #include "util/u_inlines.h" #include "util/u_memory.h" #define NUM_QUERIES 500 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0) struct zink_query_pool { struct list_head list; VkQueryType vk_query_type; VkQueryPipelineStatisticFlags pipeline_stats; VkQueryPool query_pool; unsigned last_range; unsigned refcount; }; struct zink_query_buffer { struct list_head list; unsigned num_results; struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS]; }; struct zink_vk_query { struct zink_query_pool *pool; unsigned query_id; bool needs_reset; bool started; uint32_t refcount; }; struct zink_query_start { union { struct { bool have_gs; bool have_xfb; bool was_line_loop; }; uint32_t data; }; struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS]; }; struct zink_query { struct threaded_query base; enum pipe_query_type type; /* Everytime the gallium query needs * another vulkan query, add a new start. */ struct util_dynarray starts; unsigned start_offset; VkQueryType vkqtype; unsigned index; bool precise; bool active; /* query is considered active by vk */ bool needs_reset; /* query is considered active by vk and cannot be destroyed */ bool dead; /* query should be destroyed when its fence finishes */ bool needs_update; /* query needs to update its qbos */ bool needs_rast_discard_workaround; /* query needs discard disabled */ bool suspended; bool started_in_rp; //needs to be stopped in rp struct list_head active_list; struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */ bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */ struct zink_batch_usage *batch_uses; //batch that the query was started in struct list_head buffers; union { struct zink_query_buffer *curr_qbo; struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED }; struct zink_resource *predicate; bool predicate_dirty; }; static const struct pipe_driver_query_info zink_specific_queries[] = { {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }}, }; static inline int get_num_starts(struct zink_query *q) { return util_dynarray_num_elements(&q->starts, struct zink_query_start); } static void update_query_id(struct zink_context *ctx, struct zink_query *q); static VkQueryPipelineStatisticFlags pipeline_statistic_convert(enum pipe_statistics_query_index idx) { unsigned map[] = { [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT, [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT, [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT, [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT, [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT, [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT, [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT, [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT, [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT, [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT, [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT }; assert(idx < ARRAY_SIZE(map)); return map[idx]; } static void begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index, VkQueryControlFlags flags) { if (!vkq->started) { VKCTX(CmdBeginQueryIndexedEXT)(ctx->bs->cmdbuf, vkq->pool->query_pool, vkq->query_id, flags, index); vkq->started = true; } } static void end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index) { if (vkq->started) { VKCTX(CmdEndQueryIndexedEXT)(ctx->bs->cmdbuf, vkq->pool->query_pool, vkq->query_id, index); vkq->started = false; } } static void reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq) { if (vkq->needs_reset) { VKCTX(CmdResetQueryPool)(ctx->bs->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1); ctx->bs->has_reordered_work = true; } vkq->needs_reset = false; } void zink_context_destroy_query_pools(struct zink_context *ctx) { struct zink_screen *screen = zink_screen(ctx->base.screen); list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) { VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL); list_del(&pool->list); FREE(pool); } } static struct zink_query_pool * find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx) { VkQueryPipelineStatisticFlags pipeline_stats = 0; if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT; else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE) pipeline_stats = pipeline_statistic_convert(q->index); VkQueryType vk_query_type = q->vkqtype; /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */ if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) { vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; pipeline_stats = 0; } struct zink_screen *screen = zink_screen(ctx->base.screen); list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) { if (pool->vk_query_type == vk_query_type) { if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { if (pool->pipeline_stats == pipeline_stats) return pool; } else return pool; } } struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool); if (!new_pool) return NULL; new_pool->vk_query_type = vk_query_type; new_pool->pipeline_stats = pipeline_stats; VkQueryPoolCreateInfo pool_create = {0}; pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; pool_create.queryType = vk_query_type; pool_create.queryCount = NUM_QUERIES; pool_create.pipelineStatistics = pipeline_stats; VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool); if (status != VK_SUCCESS) { mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status)); FREE(new_pool); return NULL; } list_addtail(&new_pool->list, &ctx->query_pools); return new_pool; } static void update_qbo(struct zink_context *ctx, struct zink_query *q); static void reset_qbos(struct zink_context *ctx, struct zink_query *q); static bool is_emulated_primgen(const struct zink_query *q) { return q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT; } static inline unsigned get_num_query_pools(struct zink_query *q) { if (is_emulated_primgen(q)) return 2; return 1; } static inline unsigned get_num_queries(struct zink_query *q) { if (is_emulated_primgen(q)) return 2; if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) return PIPE_MAX_VERTEX_STREAMS; return 1; } static inline unsigned get_num_results(struct zink_query *q) { if (q->type < PIPE_QUERY_DRIVER_SPECIFIC && q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) return 1; switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: return 1; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_PRIMITIVES_EMITTED: return 2; default: debug_printf("unknown query: %s\n", util_str_query_type(q->type, true)); unreachable("zink: unknown query type"); } } static void timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp) { /* The number of valid bits in a timestamp value is determined by * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written. * - 17.5. Timestamp Queries */ if (screen->timestamp_valid_bits < 64) *timestamp &= (1ull << screen->timestamp_valid_bits) - 1; /* The number of nanoseconds it takes for a timestamp value to be incremented by 1 * can be obtained from VkPhysicalDeviceLimits::timestampPeriod * - 17.5. Timestamp Queries */ *timestamp *= (double)screen->info.props.limits.timestampPeriod; } static VkQueryType convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise) { *precise = false; switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: *precise = true; FALLTHROUGH; case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: return VK_QUERY_TYPE_OCCLUSION; case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_TIMESTAMP: return VK_QUERY_TYPE_TIMESTAMP; case PIPE_QUERY_PRIMITIVES_GENERATED: return screen->info.have_EXT_primitives_generated_query ? VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT : VK_QUERY_TYPE_PIPELINE_STATISTICS; case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: return VK_QUERY_TYPE_PIPELINE_STATISTICS; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_PRIMITIVES_EMITTED: return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; default: debug_printf("unknown query: %s\n", util_str_query_type(query_type, true)); unreachable("zink: unknown query type"); } } static bool needs_stats_list(struct zink_query *query) { return is_emulated_primgen(query) || query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE; } static bool is_time_query(struct zink_query *query) { return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED; } static bool is_so_overflow_query(struct zink_query *query) { return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE; } static bool is_bool_query(struct zink_query *query) { return is_so_overflow_query(query) || query->type == PIPE_QUERY_OCCLUSION_PREDICATE || query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE || query->type == PIPE_QUERY_GPU_FINISHED; } static bool qbo_append(struct pipe_screen *screen, struct zink_query *query) { if (query->curr_qbo && query->curr_qbo->list.next) return true; struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer); if (!qbo) return false; int num_buffers = get_num_queries(query); for (unsigned i = 0; i < num_buffers; i++) { qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_STAGING, /* this is the maximum possible size of the results in a given buffer */ (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t)); if (!qbo->buffers[i]) goto fail; } list_addtail(&qbo->list, &query->buffers); return true; fail: for (unsigned i = 0; i < num_buffers; i++) pipe_resource_reference(&qbo->buffers[i], NULL); FREE(qbo); return false; } static void unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool) { if (!pool || --pool->refcount) return; util_dynarray_append(&ctx->bs->dead_querypools, VkQueryPool, pool->query_pool); if (list_is_linked(&pool->list)) list_del(&pool->list); FREE(pool); } static void unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq) { if (!vkq) return; unref_vk_pool(ctx, vkq->pool); vkq->refcount--; if (vkq->refcount == 0) FREE(vkq); } static void destroy_query(struct zink_context *ctx, struct zink_query *query) { ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen); assert(zink_screen_usage_check_completion(screen, query->batch_uses)); struct zink_query_buffer *qbo, *next; struct zink_query_start *starts = query->starts.data; unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start); for (unsigned j = 0; j < num_starts; j++) { for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { unref_vk_query(ctx, starts[j].vkq[i]); } } util_dynarray_fini(&query->starts); LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) { for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++) pipe_resource_reference(&qbo->buffers[i], NULL); FREE(qbo); } pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL); FREE(query); } static void reset_qbo(struct zink_query *q) { q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list); q->curr_qbo->num_results = 0; } static void query_pool_get_range(struct zink_context *ctx, struct zink_query *q) { bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP; struct zink_query_start *start; int num_queries = get_num_queries(q); if (!is_timestamp || get_num_starts(q) == 0) { size_t size = q->starts.capacity; start = util_dynarray_grow(&q->starts, struct zink_query_start, 1); if (size != q->starts.capacity) { /* when resizing, always zero the new data to avoid garbage */ uint8_t *data = q->starts.data; memset(data + size, 0, q->starts.capacity - size); } } else { start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); } start->data = 0; unsigned num_pools = get_num_query_pools(q); for (unsigned i = 0; i < num_queries; i++) { int pool_idx = num_pools > 1 ? i : 0; /* try and find the active query for this */ struct zink_vk_query *vkq; int xfb_idx = num_queries == 4 ? i : q->index; if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT || (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) { vkq = ctx->curr_xfb_queries[xfb_idx]; vkq->refcount++; vkq->pool->refcount++; } else { struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx); if (pool->last_range == NUM_QUERIES) { list_del(&pool->list); pool = find_or_allocate_qp(ctx, q, pool_idx); } vkq = CALLOC_STRUCT(zink_vk_query); if (!vkq) { mesa_loge("ZINK: failed to allocate vkq!"); return; } pool->refcount++; vkq->refcount = 1; vkq->needs_reset = true; vkq->pool = pool; vkq->started = false; vkq->query_id = pool->last_range++; } unref_vk_query(ctx, start->vkq[i]); start->vkq[i] = vkq; } } static struct pipe_query * zink_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) { struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_query *query = CALLOC_STRUCT(zink_query); if (!query) return NULL; list_inithead(&query->buffers); query->index = index; query->type = query_type; if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC) return (struct pipe_query *)query; if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) return (struct pipe_query *)query; query->vkqtype = convert_query_type(screen, query_type, &query->precise); if (query->vkqtype == -1) return NULL; util_dynarray_init(&query->starts, NULL); assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION); /* use emulated path for drivers without full support */ if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index && !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams) query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS; if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard; } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) { query->needs_rast_discard_workaround = true; } if (!qbo_append(pctx->screen, query)) goto fail; ctx->bs->has_work = true; query->needs_reset = true; query->predicate_dirty = true; if (query->type == PIPE_QUERY_TIMESTAMP) { query->active = true; /* defer pool reset until end_query since we're guaranteed to be threadsafe then */ reset_qbo(query); } return (struct pipe_query *)query; fail: destroy_query(zink_context(pctx), query); return NULL; } static void zink_destroy_query(struct pipe_context *pctx, struct pipe_query *q) { struct zink_query *query = (struct zink_query *)q; /* only destroy if this query isn't active on any batches, * otherwise just mark dead and wait */ if (query->batch_uses) { query->dead = true; return; } destroy_query(zink_context(pctx), query); } void zink_prune_query(struct zink_batch_state *bs, struct zink_query *query) { if (!zink_batch_usage_matches(query->batch_uses, bs)) return; query->batch_uses = NULL; if (query->dead) destroy_query(bs->ctx, query); } static void check_query_results(struct zink_query *query, union pipe_query_result *result, int num_starts, uint64_t *results, uint64_t *xfb_results) { uint64_t last_val = 0; int result_size = get_num_results(query); int idx = 0; util_dynarray_foreach(&query->starts, struct zink_query_start, start) { unsigned i = idx * result_size; idx++; switch (query->type) { case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: case PIPE_QUERY_GPU_FINISHED: result->b |= results[i] != 0; break; case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_TIMESTAMP: /* the application can sum the differences between all N queries to determine the total execution time. * - 17.5. Timestamp Queries */ if (query->type != PIPE_QUERY_TIME_ELAPSED || i) result->u64 += results[i] - last_val; last_val = results[i]; break; case PIPE_QUERY_OCCLUSION_COUNTER: result->u64 += results[i]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) result->u64 += results[i]; else if (start->have_xfb || query->index) result->u64 += xfb_results[i + 1]; else /* if a given draw had a geometry shader, we need to use the first result */ result->u64 += results[i + !start->have_gs]; break; case PIPE_QUERY_PRIMITIVES_EMITTED: /* A query pool created with this type will capture 2 integers - * numPrimitivesWritten and numPrimitivesNeeded - * for the specified vertex stream output from the last vertex processing stage. * - from VK_EXT_transform_feedback spec */ result->u64 += results[i]; break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: /* A query pool created with this type will capture 2 integers - * numPrimitivesWritten and numPrimitivesNeeded - * for the specified vertex stream output from the last vertex processing stage. * - from VK_EXT_transform_feedback spec */ if (start->have_xfb) result->b |= results[i] != results[i + 1]; break; case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: switch (query->index) { case PIPE_STAT_QUERY_IA_VERTICES: result->u64 += start->was_line_loop ? results[i] / 2 : results[i]; break; default: result->u64 += results[i]; break; } break; default: debug_printf("unhandled query type: %s\n", util_str_query_type(query->type, true)); unreachable("unexpected query type"); } } } static bool get_query_result(struct pipe_context *pctx, struct pipe_query *q, bool wait, union pipe_query_result *result) { struct zink_screen *screen = zink_screen(pctx->screen); struct zink_query *query = (struct zink_query *)q; unsigned flags = PIPE_MAP_READ; if (!wait) flags |= PIPE_MAP_DONTBLOCK; if (query->base.flushed) /* this is not a context-safe operation; ensure map doesn't use slab alloc */ flags |= PIPE_MAP_THREAD_SAFE; util_query_clear_result(result, query->type); int num_starts = get_num_starts(query); /* no results: return zero */ if (!num_starts) return true; int result_size = get_num_results(query) * sizeof(uint64_t); int num_maps = get_num_queries(query); struct zink_query_buffer *qbo; struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 }; LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) { uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL }; bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP; if (!qbo->num_results) continue; for (unsigned i = 0; i < num_maps; i++) { results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0, (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]); if (!results[i]) { if (wait) debug_printf("zink: qbo read failed!"); goto fail; } } if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) { check_query_results(query, result, num_starts, results[i], NULL); } } else check_query_results(query, result, num_starts, results[0], results[1]); for (unsigned i = 0 ; i < num_maps; i++) pipe_buffer_unmap(pctx, xfer[i]); /* if overflow is detected we can stop */ if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b) break; } if (is_time_query(query)) timestamp_to_nanoseconds(screen, &result->u64); return true; fail: for (unsigned i = 0 ; i < num_maps; i++) if (xfer[i]) pipe_buffer_unmap(pctx, xfer[i]); return false; } static void force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset) { struct pipe_context *pctx = &ctx->base; unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); struct zink_query *query = (struct zink_query*)pquery; union pipe_query_result result = {0}; if (query->needs_update) update_qbo(ctx, query); bool success = get_query_result(pctx, pquery, true, &result); if (!success) { debug_printf("zink: getting query result failed\n"); return; } if (result_type <= PIPE_QUERY_TYPE_U32) { uint32_t u32; uint32_t limit; if (result_type == PIPE_QUERY_TYPE_I32) limit = INT_MAX; else limit = UINT_MAX; if (is_bool_query(query)) u32 = result.b; else u32 = MIN2(limit, result.u64); tc_buffer_write(pctx, pres, offset, result_size, &u32); } else { uint64_t u64; if (is_bool_query(query)) u64 = result.b; else u64 = result.u64; tc_buffer_write(pctx, pres, offset, result_size, &u64); } } static void copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool, unsigned query_id, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags) { unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t); unsigned base_result_size = get_num_results(query) * type_size; unsigned result_size = base_result_size * num_results; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) result_size += type_size; bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results); zink_batch_no_rp(ctx); /* if it's a single query that doesn't need special handling, we can copy it and be done */ zink_batch_reference_resource_rw(ctx, res, true); res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT; res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size); assert(query_id < NUM_QUERIES); res->obj->unordered_read = res->obj->unordered_write = false; ctx->bs->has_work = true; VKCTX(CmdCopyQueryPoolResults)(ctx->bs->cmdbuf, pool, query_id, num_results, res->obj->buffer, offset, base_result_size, flags); zink_cmd_debug_marker_end(ctx, ctx->bs->cmdbuf, marker); } static void copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags) { struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags); } static void reset_query_range(struct zink_context *ctx, struct zink_query *q) { int num_queries = get_num_queries(q); struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); for (unsigned i = 0; i < num_queries; i++) { reset_vk_query_pool(ctx, start->vkq[i]); } } static void reset_qbos(struct zink_context *ctx, struct zink_query *q) { if (q->needs_update) update_qbo(ctx, q); q->needs_reset = false; /* create new qbo for non-timestamp queries: * timestamp queries should never need more than 2 entries in the qbo */ if (q->type == PIPE_QUERY_TIMESTAMP) return; if (qbo_append(ctx->base.screen, q)) reset_qbo(q); else debug_printf("zink: qbo alloc failed on reset!"); } static inline unsigned get_buffer_offset(struct zink_query *q) { return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t); } static void update_qbo(struct zink_context *ctx, struct zink_query *q) { struct zink_query_buffer *qbo = q->curr_qbo; unsigned num_starts = get_num_starts(q); struct zink_query_start *starts = q->starts.data; bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP; /* timestamp queries just write to offset 0 always */ int num_queries = get_num_queries(q); unsigned num_results = qbo->num_results; for (unsigned i = 0; i < num_queries; i++) { unsigned start_offset = q->start_offset; while (start_offset < num_starts) { unsigned num_merged_copies = 0; VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool; unsigned base_id = starts[start_offset].vkq[i]->query_id; /* iterate over all the starts to see how many can be merged */ for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) { if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies) break; } assert(num_merged_copies); unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t); unsigned offset = is_timestamp ? 0 : cur_offset; copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id, zink_resource(qbo->buffers[i]), offset, num_merged_copies, /* there is an implicit execution dependency from each such query command to all query commands previously submitted to the same queue. There is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before the results of vkCmdEndQuery are available. * - Chapter 18. Queries */ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); if (!is_timestamp) q->curr_qbo->num_results += num_merged_copies; start_offset += num_merged_copies; } } q->start_offset += q->curr_qbo->num_results - num_results; if (is_timestamp) q->curr_qbo->num_results = 1; q->needs_update = false; } static void begin_query(struct zink_context *ctx, struct zink_query *q) { VkQueryControlFlags flags = 0; if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC) return; if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->in_rp) { /* refuse to start CS queries in renderpasses */ if (!list_is_linked(&q->active_list)) list_addtail(&q->active_list, &ctx->suspended_queries); q->suspended = true; return; } update_query_id(ctx, q); q->predicate_dirty = true; if (q->needs_reset) reset_qbos(ctx, q); reset_query_range(ctx, q); q->active = true; ctx->bs->has_work = true; struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); if (q->type == PIPE_QUERY_TIME_ELAPSED) { VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); if (!ctx->in_rp) update_qbo(ctx, q); zink_batch_usage_set(&q->batch_uses, ctx->bs); _mesa_set_add(&ctx->bs->active_queries, q); } /* ignore the rest of begin_query for timestamps */ if (is_time_query(q)) return; /* A query must either begin and end inside the same subpass of a render pass instance, or must both begin and end outside of a render pass instance (i.e. contain entire render pass instances). - 18.2. Query Operation */ q->started_in_rp = ctx->in_rp; if (q->precise) flags |= VK_QUERY_CONTROL_PRECISE_BIT; if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || is_emulated_primgen(q) || q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0]; assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq); ctx->curr_xfb_queries[q->index] = vkq; begin_vk_query_indexed(ctx, vkq, q->index, flags); } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]); ctx->curr_xfb_queries[i] = start->vkq[i]; begin_vk_query_indexed(ctx, start->vkq[i], i, flags); } } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags); } if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) VKCTX(CmdBeginQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags); if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) { assert(!ctx->vertices_query); ctx->vertices_query = q; } if (needs_stats_list(q)) list_addtail(&q->stats_list, &ctx->primitives_generated_queries); zink_batch_usage_set(&q->batch_uses, ctx->bs); _mesa_set_add(&ctx->bs->active_queries, q); if (q->needs_rast_discard_workaround) { ctx->primitives_generated_active = true; if (zink_set_rasterizer_discard(ctx, true)) zink_set_null_fs(ctx); } } static bool zink_begin_query(struct pipe_context *pctx, struct pipe_query *q) { struct zink_query *query = (struct zink_query *)q; struct zink_context *ctx = zink_context(pctx); /* drop all past results */ reset_qbo(query); if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION) ctx->occlusion_query_active = true; if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS) ctx->fs_query_active = true; query->predicate_dirty = true; util_dynarray_clear(&query->starts); query->start_offset = 0; if (ctx->in_rp || (query->type == PIPE_QUERY_TIME_ELAPSED)) { begin_query(ctx, query); } else { /* never directly start queries out of renderpass, always defer */ list_addtail(&query->active_list, &ctx->suspended_queries); query->suspended = true; if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) ctx->primitives_generated_suspended = query->needs_rast_discard_workaround; } return true; } static void update_query_id(struct zink_context *ctx, struct zink_query *q) { query_pool_get_range(ctx, q); ctx->bs->has_work = true; q->has_draws = false; } static void end_query(struct zink_context *ctx, struct zink_query *q) { if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC) return; ASSERTED struct zink_query_buffer *qbo = q->curr_qbo; assert(qbo); assert(!is_time_query(q)); q->active = false; assert(q->started_in_rp == ctx->in_rp); struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start); if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || is_emulated_primgen(q) || q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0]; end_vk_query_indexed(ctx, vkq, q->index); ctx->curr_xfb_queries[q->index] = NULL; } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) { end_vk_query_indexed(ctx, start->vkq[i], i); ctx->curr_xfb_queries[i] = NULL; } } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) { end_vk_query_indexed(ctx, start->vkq[0], q->index); } if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q)) VKCTX(CmdEndQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) ctx->vertices_query = NULL; if (needs_stats_list(q)) list_delinit(&q->stats_list); q->needs_update = true; if (q->needs_rast_discard_workaround) { ctx->primitives_generated_active = false; if (zink_set_rasterizer_discard(ctx, false)) zink_set_null_fs(ctx); } } static bool zink_end_query(struct pipe_context *pctx, struct pipe_query *q) { struct zink_context *ctx = zink_context(pctx); struct zink_query *query = (struct zink_query *)q; if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC) return true; if (query->type == PIPE_QUERY_GPU_FINISHED) { pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED); return true; } /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */ threaded_context_unwrap_sync(pctx); if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION) ctx->occlusion_query_active = true; if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS) ctx->fs_query_active = true; bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active); if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) ctx->primitives_generated_suspended = false; if (list_is_linked(&query->stats_list)) list_delinit(&query->stats_list); if (query->suspended) { list_delinit(&query->active_list); query->suspended = false; } if (is_time_query(query)) { update_query_id(ctx, query); if (query->needs_reset) reset_qbos(ctx, query); reset_query_range(ctx, query); struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id); ctx->bs->has_work = true; zink_batch_usage_set(&query->batch_uses, ctx->bs); _mesa_set_add(&ctx->bs->active_queries, query); query->needs_update = true; } else if (query->active) { /* this should be a tc-optimized query end that doesn't split a renderpass */ if (!query->started_in_rp) zink_batch_no_rp(ctx); end_query(ctx, query); } if (unset_null_fs) zink_set_null_fs(ctx); return true; } static bool zink_get_query_result(struct pipe_context *pctx, struct pipe_query *q, bool wait, union pipe_query_result *result) { struct zink_query *query = (void*)q; struct zink_context *ctx = zink_context(pctx); if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) { result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0; result->timestamp_disjoint.disjoint = false; return true; } if (query->type == PIPE_QUERY_GPU_FINISHED) { struct pipe_screen *screen = pctx->screen; result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx, query->fence, wait ? OS_TIMEOUT_INFINITE : 0); return result->b; } if (query->type == ZINK_QUERY_RENDER_PASSES) { result->u64 = ctx->hud.render_passes; ctx->hud.render_passes = 0; return true; } if (query->needs_update) { assert(!ctx->tc || !threaded_query(q)->flushed); update_qbo(ctx, query); } if (zink_batch_usage_is_unflushed(query->batch_uses)) { if (!threaded_query(q)->flushed) pctx->flush(pctx, NULL, 0); if (!wait) return false; } return get_query_result(pctx, q, wait, result); } static void suspend_query(struct zink_context *ctx, struct zink_query *query) { /* if a query isn't active here then we don't need to reactivate it on the next batch */ if (query->active && !is_time_query(query)) end_query(ctx, query); if (query->needs_update && !ctx->in_rp) update_qbo(ctx, query); } static void suspend_queries(struct zink_context *ctx, bool rp_only) { set_foreach(&ctx->bs->active_queries, entry) { struct zink_query *query = (void*)entry->key; if (query->suspended || (rp_only && !query->started_in_rp)) continue; if (query->active && !is_time_query(query)) { /* the fence is going to steal the set off the batch, so we have to copy * the active queries onto a list */ list_addtail(&query->active_list, &ctx->suspended_queries); query->suspended = true; if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) ctx->primitives_generated_suspended = query->needs_rast_discard_workaround; } suspend_query(ctx, query); } } void zink_suspend_queries(struct zink_context *ctx) { suspend_queries(ctx, false); } void zink_resume_queries(struct zink_context *ctx) { struct zink_query *query, *next; LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { list_delinit(&query->active_list); query->suspended = false; if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) ctx->primitives_generated_suspended = false; if (query->needs_update && !ctx->in_rp) update_qbo(ctx, query); begin_query(ctx, query); } } void zink_resume_cs_query(struct zink_context *ctx) { struct zink_query *query, *next; LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) { list_delinit(&query->active_list); query->suspended = false; begin_query(ctx, query); } } } void zink_query_renderpass_suspend(struct zink_context *ctx) { suspend_queries(ctx, true); } void zink_query_update_gs_states(struct zink_context *ctx) { struct zink_query *query; bool suspendall = false; bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY]; bool have_xfb = !!ctx->num_so_targets; LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) { struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); assert(query->active); if (query->has_draws) { if (last_start->have_gs != have_gs || last_start->have_xfb != have_xfb) { suspendall = true; } } } if (ctx->vertices_query) { query = ctx->vertices_query; struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); assert(query->active); if (last_start->was_line_loop != ctx->was_line_loop) { suspendall = true; } } if (suspendall) { zink_suspend_queries(ctx); zink_resume_queries(ctx); } LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) { struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); last_start->have_gs = have_gs; last_start->have_xfb = have_xfb; query->has_draws = true; } if (ctx->vertices_query) { query = ctx->vertices_query; struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); last_start->was_line_loop = ctx->was_line_loop; query->has_draws = true; } } static void zink_set_active_query_state(struct pipe_context *pctx, bool enable) { struct zink_context *ctx = zink_context(pctx); /* unordered blits already disable queries */ if (ctx->unordered_blitting) return; ctx->queries_disabled = !enable; if (ctx->queries_disabled) zink_suspend_queries(ctx); else if (ctx->in_rp) zink_resume_queries(ctx); } void zink_query_sync(struct zink_context *ctx, struct zink_query *query) { if (query->needs_update) update_qbo(ctx, query); } void zink_start_conditional_render(struct zink_context *ctx) { if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active) return; VkConditionalRenderingFlagsEXT begin_flags = 0; if (ctx->render_condition.inverted) begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; VkConditionalRenderingBeginInfoEXT begin_info = {0}; begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer; begin_info.flags = begin_flags; ctx->render_condition.query->predicate->obj->unordered_read = false; VKCTX(CmdBeginConditionalRenderingEXT)(ctx->bs->cmdbuf, &begin_info); zink_batch_reference_resource_rw(ctx, ctx->render_condition.query->predicate, false); ctx->render_condition.active = true; } void zink_stop_conditional_render(struct zink_context *ctx) { zink_clear_apply_conditionals(ctx); if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active) return; VKCTX(CmdEndConditionalRenderingEXT)(ctx->bs->cmdbuf); ctx->render_condition.active = false; } static void zink_render_condition(struct pipe_context *pctx, struct pipe_query *pquery, bool condition, enum pipe_render_cond_flag mode) { struct zink_context *ctx = zink_context(pctx); struct zink_query *query = (struct zink_query *)pquery; zink_batch_no_rp(ctx); VkQueryResultFlagBits flags = 0; ctx->bs->has_work = true; if (query == NULL) { /* force conditional clears if they exist */ if (ctx->clears_enabled && !ctx->in_rp) zink_batch_rp(ctx); zink_stop_conditional_render(ctx); ctx->render_condition_active = false; ctx->render_condition.query = NULL; return; } if (!query->predicate) { struct pipe_resource *pres; /* need to create a vulkan buffer to copy the data into */ pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t)); if (!pres) return; query->predicate = zink_resource(pres); } if (query->predicate_dirty) { struct zink_resource *res = query->predicate; if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) flags |= VK_QUERY_RESULT_WAIT_BIT; flags |= VK_QUERY_RESULT_64_BIT; int num_results = get_num_starts(query); if (num_results) { if (!is_emulated_primgen(query) && !is_so_overflow_query(query) && num_results == 1) { copy_results_to_buffer(ctx, query, res, 0, num_results, flags); } else { /* these need special handling */ force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0); } } else { uint64_t zero = 0; tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero); } zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT); query->predicate_dirty = false; } ctx->render_condition.inverted = condition; ctx->render_condition_active = true; ctx->render_condition.query = query; if (ctx->in_rp) zink_start_conditional_render(ctx); } static void zink_get_query_result_resource(struct pipe_context *pctx, struct pipe_query *pquery, enum pipe_query_flags flags, enum pipe_query_value_type result_type, int index, struct pipe_resource *pres, unsigned offset) { struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_query *query = (struct zink_query*)pquery; struct zink_resource *res = zink_resource(pres); unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT; unsigned num_queries = get_num_starts(query); /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */ uint64_t u64[4] = {0}; unsigned src_offset = result_size * get_num_results(query); if (!num_queries) { tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset); return; } if (index == -1) { /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data * in addition to the availability result, which is a problem if we're just trying to get availability data * * if we know that there's no valid buffer data in the preceding buffer range, then we can just * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the * buffer */ VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT; if (zink_batch_usage_check_completion(ctx, query->batch_uses)) { struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start); unsigned query_id = start->vkq[0]->query_id; VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1, sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); if (result == VK_SUCCESS) { tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset); return; } else { mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result)); } } struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size); pipe_resource_reference(&staging, NULL); return; } /* there is an implicit execution dependency from each such query command to all query commands previously submitted to the same queue. There is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before the results of vkCmdEndQuery are available. * - Chapter 18. Queries */ size_flags |= VK_QUERY_RESULT_WAIT_BIT; if (!is_time_query(query) && !is_bool_query(query)) { if (num_queries == 1 && !is_emulated_primgen(query) && query->type != PIPE_QUERY_PRIMITIVES_EMITTED && !is_bool_query(query)) { if (size_flags == VK_QUERY_RESULT_64_BIT) { if (query->needs_update) update_qbo(ctx, query); /* internal qbo always writes 64bit value so we can just direct copy */ zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset, get_buffer_offset(query), result_size); } else /* have to do a new copy for 32bit */ copy_results_to_buffer(ctx, query, res, offset, 1, size_flags); return; } } /* TODO: use CS to aggregate results */ /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either * clobbering all but the last result or writing the results sequentially, so we have to manually write the result */ force_cpu_read(ctx, pquery, result_type, pres, offset); } uint64_t zink_get_timestamp(struct pipe_screen *pscreen) { struct zink_screen *screen = zink_screen(pscreen); uint64_t timestamp, deviation; if (screen->info.have_EXT_calibrated_timestamps) { VkCalibratedTimestampInfoEXT cti = {0}; cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation); if (result != VK_SUCCESS) { mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result)); } } else { zink_screen_lock_context(screen); struct pipe_context *pctx = &screen->copy_context->base; struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0); if (!pquery) return 0; union pipe_query_result result = {0}; pctx->begin_query(pctx, pquery); pctx->end_query(pctx, pquery); pctx->get_query_result(pctx, pquery, true, &result); pctx->destroy_query(pctx, pquery); zink_screen_unlock_context(screen); timestamp = result.u64; } timestamp_to_nanoseconds(screen, ×tamp); return timestamp; } void zink_context_query_init(struct pipe_context *pctx) { struct zink_context *ctx = zink_context(pctx); list_inithead(&ctx->suspended_queries); list_inithead(&ctx->primitives_generated_queries); pctx->create_query = zink_create_query; pctx->destroy_query = zink_destroy_query; pctx->begin_query = zink_begin_query; pctx->end_query = zink_end_query; pctx->get_query_result = zink_get_query_result; pctx->get_query_result_resource = zink_get_query_result_resource; pctx->set_active_query_state = zink_set_active_query_state; pctx->render_condition = zink_render_condition; } int zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_group_info *info) { if (!info) return 1; assert(index == 0); info->name = "Zink counters"; info->max_active_queries = ARRAY_SIZE(zink_specific_queries); info->num_queries = ARRAY_SIZE(zink_specific_queries); return 1; } int zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) { if (!info) return ARRAY_SIZE(zink_specific_queries); assert(index < ARRAY_SIZE(zink_specific_queries)); *info = zink_specific_queries[index]; return 1; }