xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_autotune.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2021 Igalia S.L.
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  */
5*61046927SAndroid Build Coastguard Worker 
6*61046927SAndroid Build Coastguard Worker #ifndef TU_AUTOTUNE_H
7*61046927SAndroid Build Coastguard Worker #define TU_AUTOTUNE_H
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "tu_common.h"
10*61046927SAndroid Build Coastguard Worker 
11*61046927SAndroid Build Coastguard Worker #include "util/hash_table.h"
12*61046927SAndroid Build Coastguard Worker #include "util/rwlock.h"
13*61046927SAndroid Build Coastguard Worker 
14*61046927SAndroid Build Coastguard Worker #include "tu_suballoc.h"
15*61046927SAndroid Build Coastguard Worker 
16*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history;
17*61046927SAndroid Build Coastguard Worker 
18*61046927SAndroid Build Coastguard Worker /**
19*61046927SAndroid Build Coastguard Worker  * "autotune" our decisions about bypass vs GMEM rendering, based on historical
20*61046927SAndroid Build Coastguard Worker  * data about a given render target.
21*61046927SAndroid Build Coastguard Worker  *
22*61046927SAndroid Build Coastguard Worker  * In deciding which path to take there are tradeoffs, including some that
23*61046927SAndroid Build Coastguard Worker  * are not reasonably estimateable without having some additional information:
24*61046927SAndroid Build Coastguard Worker  *
25*61046927SAndroid Build Coastguard Worker  *  (1) If you know you are touching every pixel (ie. there is a clear),
26*61046927SAndroid Build Coastguard Worker  *      then the GMEM path will at least not cost more memory bandwidth than
27*61046927SAndroid Build Coastguard Worker  *      sysmem[1]
28*61046927SAndroid Build Coastguard Worker  *
29*61046927SAndroid Build Coastguard Worker  *  (2) If there is no clear, GMEM could potentially cost *more* bandwidth
30*61046927SAndroid Build Coastguard Worker  *      if there is sysmem->GMEM restore pass.
31*61046927SAndroid Build Coastguard Worker  *
32*61046927SAndroid Build Coastguard Worker  *  (3) If you see a high draw count, that is an indication that there will be
33*61046927SAndroid Build Coastguard Worker  *      enough pixels accessed multiple times to benefit from the reduced
34*61046927SAndroid Build Coastguard Worker  *      memory bandwidth that GMEM brings
35*61046927SAndroid Build Coastguard Worker  *
36*61046927SAndroid Build Coastguard Worker  *  (4) But high draw count where there is not much overdraw can actually be
37*61046927SAndroid Build Coastguard Worker  *      faster in bypass mode if it is pushing a lot of state change, due to
38*61046927SAndroid Build Coastguard Worker  *      not having to go thru the state changes per-tile[1]
39*61046927SAndroid Build Coastguard Worker  *
40*61046927SAndroid Build Coastguard Worker  * The approach taken is to measure the samples-passed for the batch to estimate
41*61046927SAndroid Build Coastguard Worker  * the amount of overdraw to detect cases where the number of pixels touched is
42*61046927SAndroid Build Coastguard Worker  * low.
43*61046927SAndroid Build Coastguard Worker  *
44*61046927SAndroid Build Coastguard Worker  * [1] ignoring early-tile-exit optimizations, but any draw that touches all/
45*61046927SAndroid Build Coastguard Worker  *     most of the tiles late in the tile-pass can defeat that
46*61046927SAndroid Build Coastguard Worker  */
47*61046927SAndroid Build Coastguard Worker struct tu_autotune {
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker    /* We may have to disable autotuner if there are too many
50*61046927SAndroid Build Coastguard Worker     * renderpasses in-flight.
51*61046927SAndroid Build Coastguard Worker     */
52*61046927SAndroid Build Coastguard Worker    bool enabled;
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker    struct tu_device *device;
55*61046927SAndroid Build Coastguard Worker 
56*61046927SAndroid Build Coastguard Worker    /**
57*61046927SAndroid Build Coastguard Worker     * Cache to map renderpass key to historical information about
58*61046927SAndroid Build Coastguard Worker     * rendering to that particular render target.
59*61046927SAndroid Build Coastguard Worker     */
60*61046927SAndroid Build Coastguard Worker    struct hash_table *ht;
61*61046927SAndroid Build Coastguard Worker    struct u_rwlock ht_lock;
62*61046927SAndroid Build Coastguard Worker 
63*61046927SAndroid Build Coastguard Worker    /**
64*61046927SAndroid Build Coastguard Worker     * List of per-renderpass results that we are waiting for the GPU
65*61046927SAndroid Build Coastguard Worker     * to finish with before reading back the results.
66*61046927SAndroid Build Coastguard Worker     */
67*61046927SAndroid Build Coastguard Worker    struct list_head pending_results;
68*61046927SAndroid Build Coastguard Worker 
69*61046927SAndroid Build Coastguard Worker    /**
70*61046927SAndroid Build Coastguard Worker     * List of per-submission data that we may want to free after we
71*61046927SAndroid Build Coastguard Worker     * processed submission results.
72*61046927SAndroid Build Coastguard Worker     * This could happend after command buffers which were in the submission
73*61046927SAndroid Build Coastguard Worker     * are destroyed.
74*61046927SAndroid Build Coastguard Worker     */
75*61046927SAndroid Build Coastguard Worker    struct list_head pending_submission_data;
76*61046927SAndroid Build Coastguard Worker 
77*61046927SAndroid Build Coastguard Worker    /**
78*61046927SAndroid Build Coastguard Worker     * List of per-submission data that has been finished and can be reused.
79*61046927SAndroid Build Coastguard Worker     */
80*61046927SAndroid Build Coastguard Worker    struct list_head submission_data_pool;
81*61046927SAndroid Build Coastguard Worker 
82*61046927SAndroid Build Coastguard Worker    uint32_t fence_counter;
83*61046927SAndroid Build Coastguard Worker    uint32_t idx_counter;
84*61046927SAndroid Build Coastguard Worker };
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker /**
87*61046927SAndroid Build Coastguard Worker  * From the cmdstream, the captured samples-passed values are recorded
88*61046927SAndroid Build Coastguard Worker  * at the start and end of the batch.
89*61046927SAndroid Build Coastguard Worker  *
90*61046927SAndroid Build Coastguard Worker  * Note that we do the math on the CPU to avoid a WFI.  But pre-emption
91*61046927SAndroid Build Coastguard Worker  * may force us to revisit that.
92*61046927SAndroid Build Coastguard Worker  */
93*61046927SAndroid Build Coastguard Worker struct PACKED tu_renderpass_samples {
94*61046927SAndroid Build Coastguard Worker    uint64_t samples_start;
95*61046927SAndroid Build Coastguard Worker    /* hw requires the sample start/stop locations to be 128b aligned. */
96*61046927SAndroid Build Coastguard Worker    uint64_t __pad0;
97*61046927SAndroid Build Coastguard Worker    uint64_t samples_end;
98*61046927SAndroid Build Coastguard Worker    uint64_t __pad1;
99*61046927SAndroid Build Coastguard Worker };
100*61046927SAndroid Build Coastguard Worker 
101*61046927SAndroid Build Coastguard Worker /* Necessary when writing sample counts using CP_EVENT_WRITE7::ZPASS_DONE. */
102*61046927SAndroid Build Coastguard Worker static_assert(offsetof(struct tu_renderpass_samples, samples_end) == 16);
103*61046927SAndroid Build Coastguard Worker 
104*61046927SAndroid Build Coastguard Worker /**
105*61046927SAndroid Build Coastguard Worker  * Tracks the results from an individual renderpass. Initially created
106*61046927SAndroid Build Coastguard Worker  * per renderpass, and appended to the tail of at->pending_results. At a later
107*61046927SAndroid Build Coastguard Worker  * time, when the GPU has finished writing the results, we fill samples_passed.
108*61046927SAndroid Build Coastguard Worker  */
109*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result {
110*61046927SAndroid Build Coastguard Worker    /* Points into GPU memory */
111*61046927SAndroid Build Coastguard Worker    struct tu_renderpass_samples* samples;
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker    struct tu_suballoc_bo bo;
114*61046927SAndroid Build Coastguard Worker 
115*61046927SAndroid Build Coastguard Worker    /*
116*61046927SAndroid Build Coastguard Worker     * Below here, only used internally within autotune
117*61046927SAndroid Build Coastguard Worker     */
118*61046927SAndroid Build Coastguard Worker    uint64_t rp_key;
119*61046927SAndroid Build Coastguard Worker    struct tu_renderpass_history *history;
120*61046927SAndroid Build Coastguard Worker    struct list_head node;
121*61046927SAndroid Build Coastguard Worker    uint32_t fence;
122*61046927SAndroid Build Coastguard Worker    uint64_t samples_passed;
123*61046927SAndroid Build Coastguard Worker };
124*61046927SAndroid Build Coastguard Worker 
125*61046927SAndroid Build Coastguard Worker VkResult tu_autotune_init(struct tu_autotune *at, struct tu_device *dev);
126*61046927SAndroid Build Coastguard Worker void tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev);
127*61046927SAndroid Build Coastguard Worker 
128*61046927SAndroid Build Coastguard Worker bool tu_autotune_use_bypass(struct tu_autotune *at,
129*61046927SAndroid Build Coastguard Worker                             struct tu_cmd_buffer *cmd_buffer,
130*61046927SAndroid Build Coastguard Worker                             struct tu_renderpass_result **autotune_result);
131*61046927SAndroid Build Coastguard Worker void tu_autotune_free_results(struct tu_device *dev, struct list_head *results);
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker bool tu_autotune_submit_requires_fence(struct tu_cmd_buffer **cmd_buffers,
134*61046927SAndroid Build Coastguard Worker                                        uint32_t cmd_buffer_count);
135*61046927SAndroid Build Coastguard Worker 
136*61046927SAndroid Build Coastguard Worker /**
137*61046927SAndroid Build Coastguard Worker  * A magic 8-ball that tells the gmem code whether we should do bypass mode
138*61046927SAndroid Build Coastguard Worker  * for moar fps.
139*61046927SAndroid Build Coastguard Worker  */
140*61046927SAndroid Build Coastguard Worker struct tu_cs *tu_autotune_on_submit(struct tu_device *dev,
141*61046927SAndroid Build Coastguard Worker                                     struct tu_autotune *at,
142*61046927SAndroid Build Coastguard Worker                                     struct tu_cmd_buffer **cmd_buffers,
143*61046927SAndroid Build Coastguard Worker                                     uint32_t cmd_buffer_count);
144*61046927SAndroid Build Coastguard Worker 
145*61046927SAndroid Build Coastguard Worker struct tu_autotune_results_buffer;
146*61046927SAndroid Build Coastguard Worker 
147*61046927SAndroid Build Coastguard Worker template <chip CHIP>
148*61046927SAndroid Build Coastguard Worker void tu_autotune_begin_renderpass(struct tu_cmd_buffer *cmd,
149*61046927SAndroid Build Coastguard Worker                                   struct tu_cs *cs,
150*61046927SAndroid Build Coastguard Worker                                   struct tu_renderpass_result *autotune_result);
151*61046927SAndroid Build Coastguard Worker 
152*61046927SAndroid Build Coastguard Worker template <chip CHIP>
153*61046927SAndroid Build Coastguard Worker void tu_autotune_end_renderpass(struct tu_cmd_buffer *cmd,
154*61046927SAndroid Build Coastguard Worker                                 struct tu_cs *cs,
155*61046927SAndroid Build Coastguard Worker                                 struct tu_renderpass_result *autotune_result);
156*61046927SAndroid Build Coastguard Worker 
157*61046927SAndroid Build Coastguard Worker #endif /* TU_AUTOTUNE_H */
158