1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2021 Igalia S.L.
3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker */
5*61046927SAndroid Build Coastguard Worker
6*61046927SAndroid Build Coastguard Worker #include "tu_autotune.h"
7*61046927SAndroid Build Coastguard Worker
8*61046927SAndroid Build Coastguard Worker #include "tu_cmd_buffer.h"
9*61046927SAndroid Build Coastguard Worker #include "tu_cs.h"
10*61046927SAndroid Build Coastguard Worker #include "tu_device.h"
11*61046927SAndroid Build Coastguard Worker #include "tu_image.h"
12*61046927SAndroid Build Coastguard Worker #include "tu_pass.h"
13*61046927SAndroid Build Coastguard Worker
14*61046927SAndroid Build Coastguard Worker /* How does it work?
15*61046927SAndroid Build Coastguard Worker *
16*61046927SAndroid Build Coastguard Worker * - For each renderpass we calculate the number of samples passed
17*61046927SAndroid Build Coastguard Worker * by storing the number before and after in GPU memory.
18*61046927SAndroid Build Coastguard Worker * - To store the values each command buffer holds GPU memory which
19*61046927SAndroid Build Coastguard Worker * expands with more renderpasses being written.
20*61046927SAndroid Build Coastguard Worker * - For each renderpass we create tu_renderpass_result entry which
21*61046927SAndroid Build Coastguard Worker * points to the results in GPU memory.
22*61046927SAndroid Build Coastguard Worker * - Later on tu_renderpass_result would be added to the
23*61046927SAndroid Build Coastguard Worker * tu_renderpass_history entry which aggregate results for a
24*61046927SAndroid Build Coastguard Worker * given renderpass.
25*61046927SAndroid Build Coastguard Worker * - On submission:
26*61046927SAndroid Build Coastguard Worker * - Process results which fence was signalled.
27*61046927SAndroid Build Coastguard Worker * - Free per-submission data which we now don't need.
28*61046927SAndroid Build Coastguard Worker *
29*61046927SAndroid Build Coastguard Worker * - Create a command stream to write a fence value. This way we would
30*61046927SAndroid Build Coastguard Worker * know when we could safely read the results.
31*61046927SAndroid Build Coastguard Worker * - We cannot rely on the command buffer's lifetime when referencing
32*61046927SAndroid Build Coastguard Worker * its resources since the buffer could be destroyed before we process
33*61046927SAndroid Build Coastguard Worker * the results.
34*61046927SAndroid Build Coastguard Worker * - For each command buffer:
35*61046927SAndroid Build Coastguard Worker * - Reference its GPU memory.
36*61046927SAndroid Build Coastguard Worker * - Move if ONE_TIME_SUBMIT or copy all tu_renderpass_result to the queue.
37*61046927SAndroid Build Coastguard Worker *
38*61046927SAndroid Build Coastguard Worker * Since the command buffers could be recorded on different threads
39*61046927SAndroid Build Coastguard Worker * we have to maintaining some amount of locking history table,
40*61046927SAndroid Build Coastguard Worker * however we change the table only in a single thread at the submission
41*61046927SAndroid Build Coastguard Worker * time, so in most cases there will be no locking.
42*61046927SAndroid Build Coastguard Worker */
43*61046927SAndroid Build Coastguard Worker
44*61046927SAndroid Build Coastguard Worker void
45*61046927SAndroid Build Coastguard Worker tu_autotune_free_results_locked(struct tu_device *dev, struct list_head *results);
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker #define TU_AUTOTUNE_DEBUG_LOG 0
48*61046927SAndroid Build Coastguard Worker /* Dump history entries on autotuner finish,
49*61046927SAndroid Build Coastguard Worker * could be used to gather data from traces.
50*61046927SAndroid Build Coastguard Worker */
51*61046927SAndroid Build Coastguard Worker #define TU_AUTOTUNE_LOG_AT_FINISH 0
52*61046927SAndroid Build Coastguard Worker
53*61046927SAndroid Build Coastguard Worker /* How many last renderpass stats are taken into account. */
54*61046927SAndroid Build Coastguard Worker #define MAX_HISTORY_RESULTS 5
55*61046927SAndroid Build Coastguard Worker /* For how many submissions we store renderpass stats. */
56*61046927SAndroid Build Coastguard Worker #define MAX_HISTORY_LIFETIME 128
57*61046927SAndroid Build Coastguard Worker
58*61046927SAndroid Build Coastguard Worker
59*61046927SAndroid Build Coastguard Worker /**
60*61046927SAndroid Build Coastguard Worker * Tracks results for a given renderpass key
61*61046927SAndroid Build Coastguard Worker */
62*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history {
63*61046927SAndroid Build Coastguard Worker uint64_t key;
64*61046927SAndroid Build Coastguard Worker
65*61046927SAndroid Build Coastguard Worker /* We would delete old history entries */
66*61046927SAndroid Build Coastguard Worker uint32_t last_fence;
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker /**
69*61046927SAndroid Build Coastguard Worker * List of recent fd_renderpass_result's
70*61046927SAndroid Build Coastguard Worker */
71*61046927SAndroid Build Coastguard Worker struct list_head results;
72*61046927SAndroid Build Coastguard Worker uint32_t num_results;
73*61046927SAndroid Build Coastguard Worker
74*61046927SAndroid Build Coastguard Worker uint32_t avg_samples;
75*61046927SAndroid Build Coastguard Worker };
76*61046927SAndroid Build Coastguard Worker
77*61046927SAndroid Build Coastguard Worker /* Holds per-submission cs which writes the fence. */
78*61046927SAndroid Build Coastguard Worker struct tu_submission_data {
79*61046927SAndroid Build Coastguard Worker struct list_head node;
80*61046927SAndroid Build Coastguard Worker uint32_t fence;
81*61046927SAndroid Build Coastguard Worker
82*61046927SAndroid Build Coastguard Worker struct tu_cs fence_cs;
83*61046927SAndroid Build Coastguard Worker };
84*61046927SAndroid Build Coastguard Worker
85*61046927SAndroid Build Coastguard Worker static bool
fence_before(uint32_t a,uint32_t b)86*61046927SAndroid Build Coastguard Worker fence_before(uint32_t a, uint32_t b)
87*61046927SAndroid Build Coastguard Worker {
88*61046927SAndroid Build Coastguard Worker /* essentially a < b, but handle wrapped values */
89*61046927SAndroid Build Coastguard Worker return (int32_t)(a - b) < 0;
90*61046927SAndroid Build Coastguard Worker }
91*61046927SAndroid Build Coastguard Worker
92*61046927SAndroid Build Coastguard Worker static uint32_t
get_autotune_fence(struct tu_autotune * at)93*61046927SAndroid Build Coastguard Worker get_autotune_fence(struct tu_autotune *at)
94*61046927SAndroid Build Coastguard Worker {
95*61046927SAndroid Build Coastguard Worker return at->device->global_bo_map->autotune_fence;
96*61046927SAndroid Build Coastguard Worker }
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker template <chip CHIP>
99*61046927SAndroid Build Coastguard Worker static void
create_submission_fence(struct tu_device * dev,struct tu_cs * cs,uint32_t fence)100*61046927SAndroid Build Coastguard Worker create_submission_fence(struct tu_device *dev,
101*61046927SAndroid Build Coastguard Worker struct tu_cs *cs,
102*61046927SAndroid Build Coastguard Worker uint32_t fence)
103*61046927SAndroid Build Coastguard Worker {
104*61046927SAndroid Build Coastguard Worker uint64_t dst_iova = dev->global_bo->iova + gb_offset(autotune_fence);
105*61046927SAndroid Build Coastguard Worker if (CHIP >= A7XX) {
106*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4);
107*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs,
108*61046927SAndroid Build Coastguard Worker CP_EVENT_WRITE7_0(.event = CACHE_FLUSH_TS,
109*61046927SAndroid Build Coastguard Worker .write_src = EV_WRITE_USER_32B,
110*61046927SAndroid Build Coastguard Worker .write_dst = EV_DST_RAM,
111*61046927SAndroid Build Coastguard Worker .write_enabled = true).value);
112*61046927SAndroid Build Coastguard Worker } else {
113*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4);
114*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
115*61046927SAndroid Build Coastguard Worker }
116*61046927SAndroid Build Coastguard Worker
117*61046927SAndroid Build Coastguard Worker tu_cs_emit_qw(cs, dst_iova);
118*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, fence);
119*61046927SAndroid Build Coastguard Worker }
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker static struct tu_submission_data *
create_submission_data(struct tu_device * dev,struct tu_autotune * at,uint32_t fence)122*61046927SAndroid Build Coastguard Worker create_submission_data(struct tu_device *dev, struct tu_autotune *at,
123*61046927SAndroid Build Coastguard Worker uint32_t fence)
124*61046927SAndroid Build Coastguard Worker {
125*61046927SAndroid Build Coastguard Worker struct tu_submission_data *submission_data = NULL;
126*61046927SAndroid Build Coastguard Worker if (!list_is_empty(&at->submission_data_pool)) {
127*61046927SAndroid Build Coastguard Worker submission_data = list_first_entry(&at->submission_data_pool,
128*61046927SAndroid Build Coastguard Worker struct tu_submission_data, node);
129*61046927SAndroid Build Coastguard Worker list_del(&submission_data->node);
130*61046927SAndroid Build Coastguard Worker } else {
131*61046927SAndroid Build Coastguard Worker submission_data = (struct tu_submission_data *) calloc(
132*61046927SAndroid Build Coastguard Worker 1, sizeof(struct tu_submission_data));
133*61046927SAndroid Build Coastguard Worker tu_cs_init(&submission_data->fence_cs, dev, TU_CS_MODE_GROW, 5, "autotune fence cs");
134*61046927SAndroid Build Coastguard Worker }
135*61046927SAndroid Build Coastguard Worker submission_data->fence = fence;
136*61046927SAndroid Build Coastguard Worker
137*61046927SAndroid Build Coastguard Worker struct tu_cs* fence_cs = &submission_data->fence_cs;
138*61046927SAndroid Build Coastguard Worker tu_cs_begin(fence_cs);
139*61046927SAndroid Build Coastguard Worker TU_CALLX(dev, create_submission_fence)(dev, fence_cs, fence);
140*61046927SAndroid Build Coastguard Worker tu_cs_end(fence_cs);
141*61046927SAndroid Build Coastguard Worker
142*61046927SAndroid Build Coastguard Worker list_addtail(&submission_data->node, &at->pending_submission_data);
143*61046927SAndroid Build Coastguard Worker
144*61046927SAndroid Build Coastguard Worker return submission_data;
145*61046927SAndroid Build Coastguard Worker }
146*61046927SAndroid Build Coastguard Worker
147*61046927SAndroid Build Coastguard Worker static void
finish_submission_data(struct tu_autotune * at,struct tu_submission_data * data)148*61046927SAndroid Build Coastguard Worker finish_submission_data(struct tu_autotune *at,
149*61046927SAndroid Build Coastguard Worker struct tu_submission_data *data)
150*61046927SAndroid Build Coastguard Worker {
151*61046927SAndroid Build Coastguard Worker list_del(&data->node);
152*61046927SAndroid Build Coastguard Worker list_addtail(&data->node, &at->submission_data_pool);
153*61046927SAndroid Build Coastguard Worker tu_cs_reset(&data->fence_cs);
154*61046927SAndroid Build Coastguard Worker }
155*61046927SAndroid Build Coastguard Worker
156*61046927SAndroid Build Coastguard Worker static void
free_submission_data(struct tu_submission_data * data)157*61046927SAndroid Build Coastguard Worker free_submission_data(struct tu_submission_data *data)
158*61046927SAndroid Build Coastguard Worker {
159*61046927SAndroid Build Coastguard Worker list_del(&data->node);
160*61046927SAndroid Build Coastguard Worker tu_cs_finish(&data->fence_cs);
161*61046927SAndroid Build Coastguard Worker
162*61046927SAndroid Build Coastguard Worker free(data);
163*61046927SAndroid Build Coastguard Worker }
164*61046927SAndroid Build Coastguard Worker
165*61046927SAndroid Build Coastguard Worker static uint64_t
hash_renderpass_instance(const struct tu_render_pass * pass,const struct tu_framebuffer * framebuffer,const struct tu_cmd_buffer * cmd)166*61046927SAndroid Build Coastguard Worker hash_renderpass_instance(const struct tu_render_pass *pass,
167*61046927SAndroid Build Coastguard Worker const struct tu_framebuffer *framebuffer,
168*61046927SAndroid Build Coastguard Worker const struct tu_cmd_buffer *cmd) {
169*61046927SAndroid Build Coastguard Worker uint32_t data[3 + pass->attachment_count * 5];
170*61046927SAndroid Build Coastguard Worker uint32_t* ptr = data;
171*61046927SAndroid Build Coastguard Worker
172*61046927SAndroid Build Coastguard Worker *ptr++ = framebuffer->width;
173*61046927SAndroid Build Coastguard Worker *ptr++ = framebuffer->height;
174*61046927SAndroid Build Coastguard Worker *ptr++ = framebuffer->layers;
175*61046927SAndroid Build Coastguard Worker
176*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pass->attachment_count; i++) {
177*61046927SAndroid Build Coastguard Worker *ptr++ = cmd->state.attachments[i]->view.width;
178*61046927SAndroid Build Coastguard Worker *ptr++ = cmd->state.attachments[i]->view.height;
179*61046927SAndroid Build Coastguard Worker *ptr++ = cmd->state.attachments[i]->image->vk.format;
180*61046927SAndroid Build Coastguard Worker *ptr++ = cmd->state.attachments[i]->image->vk.array_layers;
181*61046927SAndroid Build Coastguard Worker *ptr++ = cmd->state.attachments[i]->image->vk.mip_levels;
182*61046927SAndroid Build Coastguard Worker }
183*61046927SAndroid Build Coastguard Worker
184*61046927SAndroid Build Coastguard Worker return XXH64(data, sizeof(data), pass->autotune_hash);
185*61046927SAndroid Build Coastguard Worker }
186*61046927SAndroid Build Coastguard Worker
187*61046927SAndroid Build Coastguard Worker static void
free_result(struct tu_device * dev,struct tu_renderpass_result * result)188*61046927SAndroid Build Coastguard Worker free_result(struct tu_device *dev, struct tu_renderpass_result *result)
189*61046927SAndroid Build Coastguard Worker {
190*61046927SAndroid Build Coastguard Worker tu_suballoc_bo_free(&dev->autotune_suballoc, &result->bo);
191*61046927SAndroid Build Coastguard Worker list_del(&result->node);
192*61046927SAndroid Build Coastguard Worker free(result);
193*61046927SAndroid Build Coastguard Worker }
194*61046927SAndroid Build Coastguard Worker
195*61046927SAndroid Build Coastguard Worker static void
free_history(struct tu_device * dev,struct tu_renderpass_history * history)196*61046927SAndroid Build Coastguard Worker free_history(struct tu_device *dev, struct tu_renderpass_history *history)
197*61046927SAndroid Build Coastguard Worker {
198*61046927SAndroid Build Coastguard Worker tu_autotune_free_results_locked(dev, &history->results);
199*61046927SAndroid Build Coastguard Worker free(history);
200*61046927SAndroid Build Coastguard Worker }
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker static bool
get_history(struct tu_autotune * at,uint64_t rp_key,uint32_t * avg_samples)203*61046927SAndroid Build Coastguard Worker get_history(struct tu_autotune *at, uint64_t rp_key, uint32_t *avg_samples)
204*61046927SAndroid Build Coastguard Worker {
205*61046927SAndroid Build Coastguard Worker bool has_history = false;
206*61046927SAndroid Build Coastguard Worker
207*61046927SAndroid Build Coastguard Worker /* If the lock contantion would be found in the wild -
208*61046927SAndroid Build Coastguard Worker * we could use try_lock here.
209*61046927SAndroid Build Coastguard Worker */
210*61046927SAndroid Build Coastguard Worker u_rwlock_rdlock(&at->ht_lock);
211*61046927SAndroid Build Coastguard Worker struct hash_entry *entry =
212*61046927SAndroid Build Coastguard Worker _mesa_hash_table_search(at->ht, &rp_key);
213*61046927SAndroid Build Coastguard Worker if (entry) {
214*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history =
215*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_history *) entry->data;
216*61046927SAndroid Build Coastguard Worker if (history->num_results > 0) {
217*61046927SAndroid Build Coastguard Worker *avg_samples = p_atomic_read(&history->avg_samples);
218*61046927SAndroid Build Coastguard Worker has_history = true;
219*61046927SAndroid Build Coastguard Worker }
220*61046927SAndroid Build Coastguard Worker }
221*61046927SAndroid Build Coastguard Worker u_rwlock_rdunlock(&at->ht_lock);
222*61046927SAndroid Build Coastguard Worker
223*61046927SAndroid Build Coastguard Worker return has_history;
224*61046927SAndroid Build Coastguard Worker }
225*61046927SAndroid Build Coastguard Worker
226*61046927SAndroid Build Coastguard Worker static struct tu_renderpass_result *
create_history_result(struct tu_autotune * at,uint64_t rp_key)227*61046927SAndroid Build Coastguard Worker create_history_result(struct tu_autotune *at, uint64_t rp_key)
228*61046927SAndroid Build Coastguard Worker {
229*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *result =
230*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_result *) calloc(1, sizeof(*result));
231*61046927SAndroid Build Coastguard Worker result->rp_key = rp_key;
232*61046927SAndroid Build Coastguard Worker
233*61046927SAndroid Build Coastguard Worker return result;
234*61046927SAndroid Build Coastguard Worker }
235*61046927SAndroid Build Coastguard Worker
236*61046927SAndroid Build Coastguard Worker static void
history_add_result(struct tu_device * dev,struct tu_renderpass_history * history,struct tu_renderpass_result * result)237*61046927SAndroid Build Coastguard Worker history_add_result(struct tu_device *dev, struct tu_renderpass_history *history,
238*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *result)
239*61046927SAndroid Build Coastguard Worker {
240*61046927SAndroid Build Coastguard Worker list_delinit(&result->node);
241*61046927SAndroid Build Coastguard Worker list_add(&result->node, &history->results);
242*61046927SAndroid Build Coastguard Worker
243*61046927SAndroid Build Coastguard Worker if (history->num_results < MAX_HISTORY_RESULTS) {
244*61046927SAndroid Build Coastguard Worker history->num_results++;
245*61046927SAndroid Build Coastguard Worker } else {
246*61046927SAndroid Build Coastguard Worker /* Once above the limit, start popping old results off the
247*61046927SAndroid Build Coastguard Worker * tail of the list:
248*61046927SAndroid Build Coastguard Worker */
249*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *old_result =
250*61046927SAndroid Build Coastguard Worker list_last_entry(&history->results, struct tu_renderpass_result, node);
251*61046927SAndroid Build Coastguard Worker mtx_lock(&dev->autotune_mutex);
252*61046927SAndroid Build Coastguard Worker free_result(dev, old_result);
253*61046927SAndroid Build Coastguard Worker mtx_unlock(&dev->autotune_mutex);
254*61046927SAndroid Build Coastguard Worker }
255*61046927SAndroid Build Coastguard Worker
256*61046927SAndroid Build Coastguard Worker /* Do calculations here to avoid locking history in tu_autotune_use_bypass */
257*61046927SAndroid Build Coastguard Worker uint32_t total_samples = 0;
258*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct tu_renderpass_result, result,
259*61046927SAndroid Build Coastguard Worker &history->results, node) {
260*61046927SAndroid Build Coastguard Worker total_samples += result->samples_passed;
261*61046927SAndroid Build Coastguard Worker }
262*61046927SAndroid Build Coastguard Worker
263*61046927SAndroid Build Coastguard Worker float avg_samples = (float)total_samples / (float)history->num_results;
264*61046927SAndroid Build Coastguard Worker p_atomic_set(&history->avg_samples, (uint32_t)avg_samples);
265*61046927SAndroid Build Coastguard Worker }
266*61046927SAndroid Build Coastguard Worker
267*61046927SAndroid Build Coastguard Worker static void
process_results(struct tu_autotune * at,uint32_t current_fence)268*61046927SAndroid Build Coastguard Worker process_results(struct tu_autotune *at, uint32_t current_fence)
269*61046927SAndroid Build Coastguard Worker {
270*61046927SAndroid Build Coastguard Worker struct tu_device *dev = at->device;
271*61046927SAndroid Build Coastguard Worker
272*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_renderpass_result, result,
273*61046927SAndroid Build Coastguard Worker &at->pending_results, node) {
274*61046927SAndroid Build Coastguard Worker if (fence_before(current_fence, result->fence))
275*61046927SAndroid Build Coastguard Worker break;
276*61046927SAndroid Build Coastguard Worker
277*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history = result->history;
278*61046927SAndroid Build Coastguard Worker result->samples_passed =
279*61046927SAndroid Build Coastguard Worker result->samples->samples_end - result->samples->samples_start;
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker history_add_result(dev, history, result);
282*61046927SAndroid Build Coastguard Worker }
283*61046927SAndroid Build Coastguard Worker
284*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_submission_data, submission_data,
285*61046927SAndroid Build Coastguard Worker &at->pending_submission_data, node) {
286*61046927SAndroid Build Coastguard Worker if (fence_before(current_fence, submission_data->fence))
287*61046927SAndroid Build Coastguard Worker break;
288*61046927SAndroid Build Coastguard Worker
289*61046927SAndroid Build Coastguard Worker finish_submission_data(at, submission_data);
290*61046927SAndroid Build Coastguard Worker }
291*61046927SAndroid Build Coastguard Worker }
292*61046927SAndroid Build Coastguard Worker
293*61046927SAndroid Build Coastguard Worker static void
queue_pending_results(struct tu_autotune * at,struct tu_cmd_buffer * cmdbuf)294*61046927SAndroid Build Coastguard Worker queue_pending_results(struct tu_autotune *at, struct tu_cmd_buffer *cmdbuf)
295*61046927SAndroid Build Coastguard Worker {
296*61046927SAndroid Build Coastguard Worker bool one_time_submit = cmdbuf->usage_flags &
297*61046927SAndroid Build Coastguard Worker VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
298*61046927SAndroid Build Coastguard Worker
299*61046927SAndroid Build Coastguard Worker if (one_time_submit) {
300*61046927SAndroid Build Coastguard Worker /* We can just steal the list since it won't be resubmitted again */
301*61046927SAndroid Build Coastguard Worker list_splicetail(&cmdbuf->renderpass_autotune_results,
302*61046927SAndroid Build Coastguard Worker &at->pending_results);
303*61046927SAndroid Build Coastguard Worker list_inithead(&cmdbuf->renderpass_autotune_results);
304*61046927SAndroid Build Coastguard Worker } else {
305*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_renderpass_result, result,
306*61046927SAndroid Build Coastguard Worker &cmdbuf->renderpass_autotune_results, node) {
307*61046927SAndroid Build Coastguard Worker /* TODO: copying each result isn't nice */
308*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *copy =
309*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_result *) malloc(sizeof(*result));
310*61046927SAndroid Build Coastguard Worker *copy = *result;
311*61046927SAndroid Build Coastguard Worker tu_bo_get_ref(copy->bo.bo);
312*61046927SAndroid Build Coastguard Worker list_addtail(©->node, &at->pending_results);
313*61046927SAndroid Build Coastguard Worker }
314*61046927SAndroid Build Coastguard Worker }
315*61046927SAndroid Build Coastguard Worker }
316*61046927SAndroid Build Coastguard Worker
317*61046927SAndroid Build Coastguard Worker struct tu_cs *
tu_autotune_on_submit(struct tu_device * dev,struct tu_autotune * at,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count)318*61046927SAndroid Build Coastguard Worker tu_autotune_on_submit(struct tu_device *dev,
319*61046927SAndroid Build Coastguard Worker struct tu_autotune *at,
320*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer **cmd_buffers,
321*61046927SAndroid Build Coastguard Worker uint32_t cmd_buffer_count)
322*61046927SAndroid Build Coastguard Worker {
323*61046927SAndroid Build Coastguard Worker /* We are single-threaded here */
324*61046927SAndroid Build Coastguard Worker
325*61046927SAndroid Build Coastguard Worker const uint32_t gpu_fence = get_autotune_fence(at);
326*61046927SAndroid Build Coastguard Worker const uint32_t new_fence = at->fence_counter++;
327*61046927SAndroid Build Coastguard Worker
328*61046927SAndroid Build Coastguard Worker process_results(at, gpu_fence);
329*61046927SAndroid Build Coastguard Worker
330*61046927SAndroid Build Coastguard Worker /* Create history entries here to minimize work and locking being
331*61046927SAndroid Build Coastguard Worker * done on renderpass end.
332*61046927SAndroid Build Coastguard Worker */
333*61046927SAndroid Build Coastguard Worker for (uint32_t i = 0; i < cmd_buffer_count; i++) {
334*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
335*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_renderpass_result, result,
336*61046927SAndroid Build Coastguard Worker &cmdbuf->renderpass_autotune_results, node) {
337*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history;
338*61046927SAndroid Build Coastguard Worker struct hash_entry *entry =
339*61046927SAndroid Build Coastguard Worker _mesa_hash_table_search(at->ht, &result->rp_key);
340*61046927SAndroid Build Coastguard Worker if (!entry) {
341*61046927SAndroid Build Coastguard Worker history =
342*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_history *) calloc(1, sizeof(*history));
343*61046927SAndroid Build Coastguard Worker history->key = result->rp_key;
344*61046927SAndroid Build Coastguard Worker list_inithead(&history->results);
345*61046927SAndroid Build Coastguard Worker
346*61046927SAndroid Build Coastguard Worker u_rwlock_wrlock(&at->ht_lock);
347*61046927SAndroid Build Coastguard Worker _mesa_hash_table_insert(at->ht, &history->key, history);
348*61046927SAndroid Build Coastguard Worker u_rwlock_wrunlock(&at->ht_lock);
349*61046927SAndroid Build Coastguard Worker } else {
350*61046927SAndroid Build Coastguard Worker history = (struct tu_renderpass_history *) entry->data;
351*61046927SAndroid Build Coastguard Worker }
352*61046927SAndroid Build Coastguard Worker
353*61046927SAndroid Build Coastguard Worker history->last_fence = new_fence;
354*61046927SAndroid Build Coastguard Worker
355*61046927SAndroid Build Coastguard Worker result->fence = new_fence;
356*61046927SAndroid Build Coastguard Worker result->history = history;
357*61046927SAndroid Build Coastguard Worker }
358*61046927SAndroid Build Coastguard Worker }
359*61046927SAndroid Build Coastguard Worker
360*61046927SAndroid Build Coastguard Worker struct tu_submission_data *submission_data =
361*61046927SAndroid Build Coastguard Worker create_submission_data(dev, at, new_fence);
362*61046927SAndroid Build Coastguard Worker
363*61046927SAndroid Build Coastguard Worker for (uint32_t i = 0; i < cmd_buffer_count; i++) {
364*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
365*61046927SAndroid Build Coastguard Worker if (list_is_empty(&cmdbuf->renderpass_autotune_results))
366*61046927SAndroid Build Coastguard Worker continue;
367*61046927SAndroid Build Coastguard Worker
368*61046927SAndroid Build Coastguard Worker queue_pending_results(at, cmdbuf);
369*61046927SAndroid Build Coastguard Worker }
370*61046927SAndroid Build Coastguard Worker
371*61046927SAndroid Build Coastguard Worker if (TU_AUTOTUNE_DEBUG_LOG)
372*61046927SAndroid Build Coastguard Worker mesa_logi("Total history entries: %u", at->ht->entries);
373*61046927SAndroid Build Coastguard Worker
374*61046927SAndroid Build Coastguard Worker /* Cleanup old entries from history table. The assumption
375*61046927SAndroid Build Coastguard Worker * here is that application doesn't hold many old unsubmitted
376*61046927SAndroid Build Coastguard Worker * command buffers, otherwise this table may grow big.
377*61046927SAndroid Build Coastguard Worker */
378*61046927SAndroid Build Coastguard Worker hash_table_foreach(at->ht, entry) {
379*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history =
380*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_history *) entry->data;
381*61046927SAndroid Build Coastguard Worker if (fence_before(gpu_fence, history->last_fence + MAX_HISTORY_LIFETIME))
382*61046927SAndroid Build Coastguard Worker continue;
383*61046927SAndroid Build Coastguard Worker
384*61046927SAndroid Build Coastguard Worker if (TU_AUTOTUNE_DEBUG_LOG)
385*61046927SAndroid Build Coastguard Worker mesa_logi("Removed old history entry %016" PRIx64 "", history->key);
386*61046927SAndroid Build Coastguard Worker
387*61046927SAndroid Build Coastguard Worker u_rwlock_wrlock(&at->ht_lock);
388*61046927SAndroid Build Coastguard Worker _mesa_hash_table_remove_key(at->ht, &history->key);
389*61046927SAndroid Build Coastguard Worker u_rwlock_wrunlock(&at->ht_lock);
390*61046927SAndroid Build Coastguard Worker
391*61046927SAndroid Build Coastguard Worker mtx_lock(&dev->autotune_mutex);
392*61046927SAndroid Build Coastguard Worker free_history(dev, history);
393*61046927SAndroid Build Coastguard Worker mtx_unlock(&dev->autotune_mutex);
394*61046927SAndroid Build Coastguard Worker }
395*61046927SAndroid Build Coastguard Worker
396*61046927SAndroid Build Coastguard Worker return &submission_data->fence_cs;
397*61046927SAndroid Build Coastguard Worker }
398*61046927SAndroid Build Coastguard Worker
399*61046927SAndroid Build Coastguard Worker static bool
renderpass_key_equals(const void * _a,const void * _b)400*61046927SAndroid Build Coastguard Worker renderpass_key_equals(const void *_a, const void *_b)
401*61046927SAndroid Build Coastguard Worker {
402*61046927SAndroid Build Coastguard Worker return *(uint64_t *)_a == *(uint64_t *)_b;
403*61046927SAndroid Build Coastguard Worker }
404*61046927SAndroid Build Coastguard Worker
405*61046927SAndroid Build Coastguard Worker static uint32_t
renderpass_key_hash(const void * _a)406*61046927SAndroid Build Coastguard Worker renderpass_key_hash(const void *_a)
407*61046927SAndroid Build Coastguard Worker {
408*61046927SAndroid Build Coastguard Worker return *((uint64_t *) _a) & 0xffffffff;
409*61046927SAndroid Build Coastguard Worker }
410*61046927SAndroid Build Coastguard Worker
411*61046927SAndroid Build Coastguard Worker VkResult
tu_autotune_init(struct tu_autotune * at,struct tu_device * dev)412*61046927SAndroid Build Coastguard Worker tu_autotune_init(struct tu_autotune *at, struct tu_device *dev)
413*61046927SAndroid Build Coastguard Worker {
414*61046927SAndroid Build Coastguard Worker at->enabled = true;
415*61046927SAndroid Build Coastguard Worker at->device = dev;
416*61046927SAndroid Build Coastguard Worker at->ht = _mesa_hash_table_create(NULL,
417*61046927SAndroid Build Coastguard Worker renderpass_key_hash,
418*61046927SAndroid Build Coastguard Worker renderpass_key_equals);
419*61046927SAndroid Build Coastguard Worker u_rwlock_init(&at->ht_lock);
420*61046927SAndroid Build Coastguard Worker
421*61046927SAndroid Build Coastguard Worker list_inithead(&at->pending_results);
422*61046927SAndroid Build Coastguard Worker list_inithead(&at->pending_submission_data);
423*61046927SAndroid Build Coastguard Worker list_inithead(&at->submission_data_pool);
424*61046927SAndroid Build Coastguard Worker
425*61046927SAndroid Build Coastguard Worker /* start from 1 because tu6_global::autotune_fence is initialized to 0 */
426*61046927SAndroid Build Coastguard Worker at->fence_counter = 1;
427*61046927SAndroid Build Coastguard Worker
428*61046927SAndroid Build Coastguard Worker return VK_SUCCESS;
429*61046927SAndroid Build Coastguard Worker }
430*61046927SAndroid Build Coastguard Worker
431*61046927SAndroid Build Coastguard Worker void
tu_autotune_fini(struct tu_autotune * at,struct tu_device * dev)432*61046927SAndroid Build Coastguard Worker tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev)
433*61046927SAndroid Build Coastguard Worker {
434*61046927SAndroid Build Coastguard Worker if (TU_AUTOTUNE_LOG_AT_FINISH) {
435*61046927SAndroid Build Coastguard Worker while (!list_is_empty(&at->pending_results)) {
436*61046927SAndroid Build Coastguard Worker const uint32_t gpu_fence = get_autotune_fence(at);
437*61046927SAndroid Build Coastguard Worker process_results(at, gpu_fence);
438*61046927SAndroid Build Coastguard Worker }
439*61046927SAndroid Build Coastguard Worker
440*61046927SAndroid Build Coastguard Worker hash_table_foreach(at->ht, entry) {
441*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history =
442*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_history *) entry->data;
443*61046927SAndroid Build Coastguard Worker
444*61046927SAndroid Build Coastguard Worker mesa_logi("%016" PRIx64 " \tavg_passed=%u results=%u",
445*61046927SAndroid Build Coastguard Worker history->key, history->avg_samples, history->num_results);
446*61046927SAndroid Build Coastguard Worker }
447*61046927SAndroid Build Coastguard Worker }
448*61046927SAndroid Build Coastguard Worker
449*61046927SAndroid Build Coastguard Worker tu_autotune_free_results(dev, &at->pending_results);
450*61046927SAndroid Build Coastguard Worker
451*61046927SAndroid Build Coastguard Worker mtx_lock(&dev->autotune_mutex);
452*61046927SAndroid Build Coastguard Worker hash_table_foreach(at->ht, entry) {
453*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history =
454*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_history *) entry->data;
455*61046927SAndroid Build Coastguard Worker free_history(dev, history);
456*61046927SAndroid Build Coastguard Worker }
457*61046927SAndroid Build Coastguard Worker mtx_unlock(&dev->autotune_mutex);
458*61046927SAndroid Build Coastguard Worker
459*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_submission_data, submission_data,
460*61046927SAndroid Build Coastguard Worker &at->pending_submission_data, node) {
461*61046927SAndroid Build Coastguard Worker free_submission_data(submission_data);
462*61046927SAndroid Build Coastguard Worker }
463*61046927SAndroid Build Coastguard Worker
464*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_submission_data, submission_data,
465*61046927SAndroid Build Coastguard Worker &at->submission_data_pool, node) {
466*61046927SAndroid Build Coastguard Worker free_submission_data(submission_data);
467*61046927SAndroid Build Coastguard Worker }
468*61046927SAndroid Build Coastguard Worker
469*61046927SAndroid Build Coastguard Worker _mesa_hash_table_destroy(at->ht, NULL);
470*61046927SAndroid Build Coastguard Worker u_rwlock_destroy(&at->ht_lock);
471*61046927SAndroid Build Coastguard Worker }
472*61046927SAndroid Build Coastguard Worker
473*61046927SAndroid Build Coastguard Worker bool
tu_autotune_submit_requires_fence(struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count)474*61046927SAndroid Build Coastguard Worker tu_autotune_submit_requires_fence(struct tu_cmd_buffer **cmd_buffers,
475*61046927SAndroid Build Coastguard Worker uint32_t cmd_buffer_count)
476*61046927SAndroid Build Coastguard Worker {
477*61046927SAndroid Build Coastguard Worker for (uint32_t i = 0; i < cmd_buffer_count; i++) {
478*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
479*61046927SAndroid Build Coastguard Worker if (!list_is_empty(&cmdbuf->renderpass_autotune_results))
480*61046927SAndroid Build Coastguard Worker return true;
481*61046927SAndroid Build Coastguard Worker }
482*61046927SAndroid Build Coastguard Worker
483*61046927SAndroid Build Coastguard Worker return false;
484*61046927SAndroid Build Coastguard Worker }
485*61046927SAndroid Build Coastguard Worker
486*61046927SAndroid Build Coastguard Worker void
tu_autotune_free_results_locked(struct tu_device * dev,struct list_head * results)487*61046927SAndroid Build Coastguard Worker tu_autotune_free_results_locked(struct tu_device *dev, struct list_head *results)
488*61046927SAndroid Build Coastguard Worker {
489*61046927SAndroid Build Coastguard Worker list_for_each_entry_safe(struct tu_renderpass_result, result,
490*61046927SAndroid Build Coastguard Worker results, node) {
491*61046927SAndroid Build Coastguard Worker free_result(dev, result);
492*61046927SAndroid Build Coastguard Worker }
493*61046927SAndroid Build Coastguard Worker }
494*61046927SAndroid Build Coastguard Worker
495*61046927SAndroid Build Coastguard Worker void
tu_autotune_free_results(struct tu_device * dev,struct list_head * results)496*61046927SAndroid Build Coastguard Worker tu_autotune_free_results(struct tu_device *dev, struct list_head *results)
497*61046927SAndroid Build Coastguard Worker {
498*61046927SAndroid Build Coastguard Worker mtx_lock(&dev->autotune_mutex);
499*61046927SAndroid Build Coastguard Worker tu_autotune_free_results_locked(dev, results);
500*61046927SAndroid Build Coastguard Worker mtx_unlock(&dev->autotune_mutex);
501*61046927SAndroid Build Coastguard Worker }
502*61046927SAndroid Build Coastguard Worker
503*61046927SAndroid Build Coastguard Worker static bool
fallback_use_bypass(const struct tu_render_pass * pass,const struct tu_framebuffer * framebuffer,const struct tu_cmd_buffer * cmd_buffer)504*61046927SAndroid Build Coastguard Worker fallback_use_bypass(const struct tu_render_pass *pass,
505*61046927SAndroid Build Coastguard Worker const struct tu_framebuffer *framebuffer,
506*61046927SAndroid Build Coastguard Worker const struct tu_cmd_buffer *cmd_buffer)
507*61046927SAndroid Build Coastguard Worker {
508*61046927SAndroid Build Coastguard Worker if (cmd_buffer->state.rp.drawcall_count > 5)
509*61046927SAndroid Build Coastguard Worker return false;
510*61046927SAndroid Build Coastguard Worker
511*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pass->subpass_count; i++) {
512*61046927SAndroid Build Coastguard Worker if (pass->subpasses[i].samples != VK_SAMPLE_COUNT_1_BIT)
513*61046927SAndroid Build Coastguard Worker return false;
514*61046927SAndroid Build Coastguard Worker }
515*61046927SAndroid Build Coastguard Worker
516*61046927SAndroid Build Coastguard Worker return true;
517*61046927SAndroid Build Coastguard Worker }
518*61046927SAndroid Build Coastguard Worker
519*61046927SAndroid Build Coastguard Worker static uint32_t
get_render_pass_pixel_count(const struct tu_cmd_buffer * cmd)520*61046927SAndroid Build Coastguard Worker get_render_pass_pixel_count(const struct tu_cmd_buffer *cmd)
521*61046927SAndroid Build Coastguard Worker {
522*61046927SAndroid Build Coastguard Worker const VkExtent2D *extent = &cmd->state.render_area.extent;
523*61046927SAndroid Build Coastguard Worker return extent->width * extent->height;
524*61046927SAndroid Build Coastguard Worker }
525*61046927SAndroid Build Coastguard Worker
526*61046927SAndroid Build Coastguard Worker static uint64_t
estimate_drawcall_bandwidth(const struct tu_cmd_buffer * cmd,uint32_t avg_renderpass_sample_count)527*61046927SAndroid Build Coastguard Worker estimate_drawcall_bandwidth(const struct tu_cmd_buffer *cmd,
528*61046927SAndroid Build Coastguard Worker uint32_t avg_renderpass_sample_count)
529*61046927SAndroid Build Coastguard Worker {
530*61046927SAndroid Build Coastguard Worker const struct tu_cmd_state *state = &cmd->state;
531*61046927SAndroid Build Coastguard Worker
532*61046927SAndroid Build Coastguard Worker if (!state->rp.drawcall_count)
533*61046927SAndroid Build Coastguard Worker return 0;
534*61046927SAndroid Build Coastguard Worker
535*61046927SAndroid Build Coastguard Worker /* sample count times drawcall_bandwidth_per_sample */
536*61046927SAndroid Build Coastguard Worker return (uint64_t)avg_renderpass_sample_count *
537*61046927SAndroid Build Coastguard Worker state->rp.drawcall_bandwidth_per_sample_sum / state->rp.drawcall_count;
538*61046927SAndroid Build Coastguard Worker }
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker bool
tu_autotune_use_bypass(struct tu_autotune * at,struct tu_cmd_buffer * cmd_buffer,struct tu_renderpass_result ** autotune_result)541*61046927SAndroid Build Coastguard Worker tu_autotune_use_bypass(struct tu_autotune *at,
542*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer *cmd_buffer,
543*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result **autotune_result)
544*61046927SAndroid Build Coastguard Worker {
545*61046927SAndroid Build Coastguard Worker const struct tu_render_pass *pass = cmd_buffer->state.pass;
546*61046927SAndroid Build Coastguard Worker const struct tu_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
547*61046927SAndroid Build Coastguard Worker
548*61046927SAndroid Build Coastguard Worker /* If a feedback loop in the subpass caused one of the pipelines used to set
549*61046927SAndroid Build Coastguard Worker * SINGLE_PRIM_MODE(FLUSH_PER_OVERLAP_AND_OVERWRITE) or even
550*61046927SAndroid Build Coastguard Worker * SINGLE_PRIM_MODE(FLUSH), then that should cause significantly increased
551*61046927SAndroid Build Coastguard Worker * sysmem bandwidth (though we haven't quantified it).
552*61046927SAndroid Build Coastguard Worker */
553*61046927SAndroid Build Coastguard Worker if (cmd_buffer->state.rp.sysmem_single_prim_mode)
554*61046927SAndroid Build Coastguard Worker return false;
555*61046927SAndroid Build Coastguard Worker
556*61046927SAndroid Build Coastguard Worker /* If the user is using a fragment density map, then this will cause less
557*61046927SAndroid Build Coastguard Worker * FS invocations with GMEM, which has a hard-to-measure impact on
558*61046927SAndroid Build Coastguard Worker * performance because it depends on how heavy the FS is in addition to how
559*61046927SAndroid Build Coastguard Worker * many invocations there were and the density. Let's assume the user knows
560*61046927SAndroid Build Coastguard Worker * what they're doing when they added the map, because if sysmem is
561*61046927SAndroid Build Coastguard Worker * actually faster then they could've just not used the fragment density
562*61046927SAndroid Build Coastguard Worker * map.
563*61046927SAndroid Build Coastguard Worker */
564*61046927SAndroid Build Coastguard Worker if (pass->has_fdm)
565*61046927SAndroid Build Coastguard Worker return false;
566*61046927SAndroid Build Coastguard Worker
567*61046927SAndroid Build Coastguard Worker /* For VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT buffers
568*61046927SAndroid Build Coastguard Worker * we would have to allocate GPU memory at the submit time and copy
569*61046927SAndroid Build Coastguard Worker * results into it.
570*61046927SAndroid Build Coastguard Worker * Native games ususally don't use it, Zink and DXVK don't use it,
571*61046927SAndroid Build Coastguard Worker * D3D12 doesn't have such concept.
572*61046927SAndroid Build Coastguard Worker */
573*61046927SAndroid Build Coastguard Worker bool simultaneous_use =
574*61046927SAndroid Build Coastguard Worker cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
575*61046927SAndroid Build Coastguard Worker
576*61046927SAndroid Build Coastguard Worker if (!at->enabled || simultaneous_use)
577*61046927SAndroid Build Coastguard Worker return fallback_use_bypass(pass, framebuffer, cmd_buffer);
578*61046927SAndroid Build Coastguard Worker
579*61046927SAndroid Build Coastguard Worker /* We use 64bit hash as a key since we don't fear rare hash collision,
580*61046927SAndroid Build Coastguard Worker * the worst that would happen is sysmem being selected when it should
581*61046927SAndroid Build Coastguard Worker * have not, and with 64bit it would be extremely rare.
582*61046927SAndroid Build Coastguard Worker *
583*61046927SAndroid Build Coastguard Worker * Q: Why not make the key from framebuffer + renderpass pointers?
584*61046927SAndroid Build Coastguard Worker * A: At least DXVK creates new framebuffers each frame while keeping
585*61046927SAndroid Build Coastguard Worker * renderpasses the same. Also we want to support replaying a single
586*61046927SAndroid Build Coastguard Worker * frame in a loop for testing.
587*61046927SAndroid Build Coastguard Worker */
588*61046927SAndroid Build Coastguard Worker uint64_t renderpass_key = hash_renderpass_instance(pass, framebuffer, cmd_buffer);
589*61046927SAndroid Build Coastguard Worker
590*61046927SAndroid Build Coastguard Worker *autotune_result = create_history_result(at, renderpass_key);
591*61046927SAndroid Build Coastguard Worker
592*61046927SAndroid Build Coastguard Worker uint32_t avg_samples = 0;
593*61046927SAndroid Build Coastguard Worker if (get_history(at, renderpass_key, &avg_samples)) {
594*61046927SAndroid Build Coastguard Worker const uint32_t pass_pixel_count =
595*61046927SAndroid Build Coastguard Worker get_render_pass_pixel_count(cmd_buffer);
596*61046927SAndroid Build Coastguard Worker uint64_t sysmem_bandwidth =
597*61046927SAndroid Build Coastguard Worker (uint64_t)pass->sysmem_bandwidth_per_pixel * pass_pixel_count;
598*61046927SAndroid Build Coastguard Worker uint64_t gmem_bandwidth =
599*61046927SAndroid Build Coastguard Worker (uint64_t)pass->gmem_bandwidth_per_pixel * pass_pixel_count;
600*61046927SAndroid Build Coastguard Worker
601*61046927SAndroid Build Coastguard Worker const uint64_t total_draw_call_bandwidth =
602*61046927SAndroid Build Coastguard Worker estimate_drawcall_bandwidth(cmd_buffer, avg_samples);
603*61046927SAndroid Build Coastguard Worker
604*61046927SAndroid Build Coastguard Worker /* drawcalls access the memory in sysmem rendering (ignoring CCU) */
605*61046927SAndroid Build Coastguard Worker sysmem_bandwidth += total_draw_call_bandwidth;
606*61046927SAndroid Build Coastguard Worker
607*61046927SAndroid Build Coastguard Worker /* drawcalls access gmem in gmem rendering, but we do not want to ignore
608*61046927SAndroid Build Coastguard Worker * them completely. The state changes between tiles also have an
609*61046927SAndroid Build Coastguard Worker * overhead. The magic numbers of 11 and 10 are randomly chosen.
610*61046927SAndroid Build Coastguard Worker */
611*61046927SAndroid Build Coastguard Worker gmem_bandwidth = (gmem_bandwidth * 11 + total_draw_call_bandwidth) / 10;
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker const bool select_sysmem = sysmem_bandwidth <= gmem_bandwidth;
614*61046927SAndroid Build Coastguard Worker if (TU_AUTOTUNE_DEBUG_LOG) {
615*61046927SAndroid Build Coastguard Worker const VkExtent2D *extent = &cmd_buffer->state.render_area.extent;
616*61046927SAndroid Build Coastguard Worker const float drawcall_bandwidth_per_sample =
617*61046927SAndroid Build Coastguard Worker (float)cmd_buffer->state.rp.drawcall_bandwidth_per_sample_sum /
618*61046927SAndroid Build Coastguard Worker cmd_buffer->state.rp.drawcall_count;
619*61046927SAndroid Build Coastguard Worker
620*61046927SAndroid Build Coastguard Worker mesa_logi("autotune %016" PRIx64 ":%u selecting %s",
621*61046927SAndroid Build Coastguard Worker renderpass_key,
622*61046927SAndroid Build Coastguard Worker cmd_buffer->state.rp.drawcall_count,
623*61046927SAndroid Build Coastguard Worker select_sysmem ? "sysmem" : "gmem");
624*61046927SAndroid Build Coastguard Worker mesa_logi(" avg_samples=%u, draw_bandwidth_per_sample=%.2f, total_draw_call_bandwidth=%" PRIu64,
625*61046927SAndroid Build Coastguard Worker avg_samples,
626*61046927SAndroid Build Coastguard Worker drawcall_bandwidth_per_sample,
627*61046927SAndroid Build Coastguard Worker total_draw_call_bandwidth);
628*61046927SAndroid Build Coastguard Worker mesa_logi(" render_area=%ux%u, sysmem_bandwidth_per_pixel=%u, gmem_bandwidth_per_pixel=%u",
629*61046927SAndroid Build Coastguard Worker extent->width, extent->height,
630*61046927SAndroid Build Coastguard Worker pass->sysmem_bandwidth_per_pixel,
631*61046927SAndroid Build Coastguard Worker pass->gmem_bandwidth_per_pixel);
632*61046927SAndroid Build Coastguard Worker mesa_logi(" sysmem_bandwidth=%" PRIu64 ", gmem_bandwidth=%" PRIu64,
633*61046927SAndroid Build Coastguard Worker sysmem_bandwidth, gmem_bandwidth);
634*61046927SAndroid Build Coastguard Worker }
635*61046927SAndroid Build Coastguard Worker
636*61046927SAndroid Build Coastguard Worker return select_sysmem;
637*61046927SAndroid Build Coastguard Worker }
638*61046927SAndroid Build Coastguard Worker
639*61046927SAndroid Build Coastguard Worker return fallback_use_bypass(pass, framebuffer, cmd_buffer);
640*61046927SAndroid Build Coastguard Worker }
641*61046927SAndroid Build Coastguard Worker
642*61046927SAndroid Build Coastguard Worker template <chip CHIP>
643*61046927SAndroid Build Coastguard Worker void
tu_autotune_begin_renderpass(struct tu_cmd_buffer * cmd,struct tu_cs * cs,struct tu_renderpass_result * autotune_result)644*61046927SAndroid Build Coastguard Worker tu_autotune_begin_renderpass(struct tu_cmd_buffer *cmd,
645*61046927SAndroid Build Coastguard Worker struct tu_cs *cs,
646*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *autotune_result)
647*61046927SAndroid Build Coastguard Worker {
648*61046927SAndroid Build Coastguard Worker if (!autotune_result)
649*61046927SAndroid Build Coastguard Worker return;
650*61046927SAndroid Build Coastguard Worker
651*61046927SAndroid Build Coastguard Worker struct tu_device *dev = cmd->device;
652*61046927SAndroid Build Coastguard Worker
653*61046927SAndroid Build Coastguard Worker static const uint32_t size = sizeof(struct tu_renderpass_samples);
654*61046927SAndroid Build Coastguard Worker
655*61046927SAndroid Build Coastguard Worker mtx_lock(&dev->autotune_mutex);
656*61046927SAndroid Build Coastguard Worker VkResult ret = tu_suballoc_bo_alloc(&autotune_result->bo, &dev->autotune_suballoc, size, size);
657*61046927SAndroid Build Coastguard Worker mtx_unlock(&dev->autotune_mutex);
658*61046927SAndroid Build Coastguard Worker if (ret != VK_SUCCESS) {
659*61046927SAndroid Build Coastguard Worker autotune_result->bo.iova = 0;
660*61046927SAndroid Build Coastguard Worker return;
661*61046927SAndroid Build Coastguard Worker }
662*61046927SAndroid Build Coastguard Worker
663*61046927SAndroid Build Coastguard Worker uint64_t result_iova = autotune_result->bo.iova;
664*61046927SAndroid Build Coastguard Worker
665*61046927SAndroid Build Coastguard Worker autotune_result->samples =
666*61046927SAndroid Build Coastguard Worker (struct tu_renderpass_samples *) tu_suballoc_bo_map(
667*61046927SAndroid Build Coastguard Worker &autotune_result->bo);
668*61046927SAndroid Build Coastguard Worker
669*61046927SAndroid Build Coastguard Worker tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true));
670*61046927SAndroid Build Coastguard Worker if (cmd->device->physical_device->info->a7xx.has_event_write_sample_count) {
671*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
672*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
673*61046927SAndroid Build Coastguard Worker .write_sample_count = true).value);
674*61046927SAndroid Build Coastguard Worker tu_cs_emit_qw(cs, result_iova);
675*61046927SAndroid Build Coastguard Worker
676*61046927SAndroid Build Coastguard Worker /* If the renderpass contains an occlusion query with its own ZPASS_DONE,
677*61046927SAndroid Build Coastguard Worker * we have to provide a fake ZPASS_DONE event here to logically close the
678*61046927SAndroid Build Coastguard Worker * previous one, preventing firmware from misbehaving due to nested events.
679*61046927SAndroid Build Coastguard Worker * This writes into the samples_end field, which will be overwritten in
680*61046927SAndroid Build Coastguard Worker * tu_autotune_end_renderpass.
681*61046927SAndroid Build Coastguard Worker */
682*61046927SAndroid Build Coastguard Worker if (cmd->state.rp.has_zpass_done_sample_count_write_in_rp) {
683*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
684*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
685*61046927SAndroid Build Coastguard Worker .write_sample_count = true,
686*61046927SAndroid Build Coastguard Worker .sample_count_end_offset = true,
687*61046927SAndroid Build Coastguard Worker .write_accum_sample_count_diff = true).value);
688*61046927SAndroid Build Coastguard Worker tu_cs_emit_qw(cs, result_iova);
689*61046927SAndroid Build Coastguard Worker }
690*61046927SAndroid Build Coastguard Worker } else {
691*61046927SAndroid Build Coastguard Worker tu_cs_emit_regs(cs,
692*61046927SAndroid Build Coastguard Worker A6XX_RB_SAMPLE_COUNT_ADDR(.qword = result_iova));
693*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
694*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, ZPASS_DONE);
695*61046927SAndroid Build Coastguard Worker }
696*61046927SAndroid Build Coastguard Worker }
697*61046927SAndroid Build Coastguard Worker TU_GENX(tu_autotune_begin_renderpass);
698*61046927SAndroid Build Coastguard Worker
699*61046927SAndroid Build Coastguard Worker template <chip CHIP>
tu_autotune_end_renderpass(struct tu_cmd_buffer * cmd,struct tu_cs * cs,struct tu_renderpass_result * autotune_result)700*61046927SAndroid Build Coastguard Worker void tu_autotune_end_renderpass(struct tu_cmd_buffer *cmd,
701*61046927SAndroid Build Coastguard Worker struct tu_cs *cs,
702*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *autotune_result)
703*61046927SAndroid Build Coastguard Worker {
704*61046927SAndroid Build Coastguard Worker if (!autotune_result)
705*61046927SAndroid Build Coastguard Worker return;
706*61046927SAndroid Build Coastguard Worker
707*61046927SAndroid Build Coastguard Worker if (!autotune_result->bo.iova)
708*61046927SAndroid Build Coastguard Worker return;
709*61046927SAndroid Build Coastguard Worker
710*61046927SAndroid Build Coastguard Worker uint64_t result_iova = autotune_result->bo.iova;
711*61046927SAndroid Build Coastguard Worker
712*61046927SAndroid Build Coastguard Worker tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true));
713*61046927SAndroid Build Coastguard Worker
714*61046927SAndroid Build Coastguard Worker if (cmd->device->physical_device->info->a7xx.has_event_write_sample_count) {
715*61046927SAndroid Build Coastguard Worker /* If the renderpass contains ZPASS_DONE events we emit a fake ZPASS_DONE
716*61046927SAndroid Build Coastguard Worker * event here, composing a pair of these events that firmware handles without
717*61046927SAndroid Build Coastguard Worker * issue. This first event writes into the samples_end field and the second
718*61046927SAndroid Build Coastguard Worker * event overwrites it. The second event also enables the accumulation flag
719*61046927SAndroid Build Coastguard Worker * even when we don't use that result because the blob always sets it.
720*61046927SAndroid Build Coastguard Worker */
721*61046927SAndroid Build Coastguard Worker if (cmd->state.rp.has_zpass_done_sample_count_write_in_rp) {
722*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
723*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
724*61046927SAndroid Build Coastguard Worker .write_sample_count = true).value);
725*61046927SAndroid Build Coastguard Worker tu_cs_emit_qw(cs, result_iova + offsetof(struct tu_renderpass_samples, samples_end));
726*61046927SAndroid Build Coastguard Worker }
727*61046927SAndroid Build Coastguard Worker
728*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
729*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
730*61046927SAndroid Build Coastguard Worker .write_sample_count = true,
731*61046927SAndroid Build Coastguard Worker .sample_count_end_offset = true,
732*61046927SAndroid Build Coastguard Worker .write_accum_sample_count_diff = true).value);
733*61046927SAndroid Build Coastguard Worker tu_cs_emit_qw(cs, result_iova);
734*61046927SAndroid Build Coastguard Worker } else {
735*61046927SAndroid Build Coastguard Worker result_iova += offsetof(struct tu_renderpass_samples, samples_end);
736*61046927SAndroid Build Coastguard Worker
737*61046927SAndroid Build Coastguard Worker tu_cs_emit_regs(cs,
738*61046927SAndroid Build Coastguard Worker A6XX_RB_SAMPLE_COUNT_ADDR(.qword = result_iova));
739*61046927SAndroid Build Coastguard Worker tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
740*61046927SAndroid Build Coastguard Worker tu_cs_emit(cs, ZPASS_DONE);
741*61046927SAndroid Build Coastguard Worker }
742*61046927SAndroid Build Coastguard Worker }
743*61046927SAndroid Build Coastguard Worker TU_GENX(tu_autotune_end_renderpass);
744