1*61046927SAndroid Build Coastguard Worker /* 2*61046927SAndroid Build Coastguard Worker * Copyright © 2021 Igalia S.L. 3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT 4*61046927SAndroid Build Coastguard Worker */ 5*61046927SAndroid Build Coastguard Worker 6*61046927SAndroid Build Coastguard Worker #ifndef TU_AUTOTUNE_H 7*61046927SAndroid Build Coastguard Worker #define TU_AUTOTUNE_H 8*61046927SAndroid Build Coastguard Worker 9*61046927SAndroid Build Coastguard Worker #include "tu_common.h" 10*61046927SAndroid Build Coastguard Worker 11*61046927SAndroid Build Coastguard Worker #include "util/hash_table.h" 12*61046927SAndroid Build Coastguard Worker #include "util/rwlock.h" 13*61046927SAndroid Build Coastguard Worker 14*61046927SAndroid Build Coastguard Worker #include "tu_suballoc.h" 15*61046927SAndroid Build Coastguard Worker 16*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history; 17*61046927SAndroid Build Coastguard Worker 18*61046927SAndroid Build Coastguard Worker /** 19*61046927SAndroid Build Coastguard Worker * "autotune" our decisions about bypass vs GMEM rendering, based on historical 20*61046927SAndroid Build Coastguard Worker * data about a given render target. 21*61046927SAndroid Build Coastguard Worker * 22*61046927SAndroid Build Coastguard Worker * In deciding which path to take there are tradeoffs, including some that 23*61046927SAndroid Build Coastguard Worker * are not reasonably estimateable without having some additional information: 24*61046927SAndroid Build Coastguard Worker * 25*61046927SAndroid Build Coastguard Worker * (1) If you know you are touching every pixel (ie. there is a clear), 26*61046927SAndroid Build Coastguard Worker * then the GMEM path will at least not cost more memory bandwidth than 27*61046927SAndroid Build Coastguard Worker * sysmem[1] 28*61046927SAndroid Build Coastguard Worker * 29*61046927SAndroid Build Coastguard Worker * (2) If there is no clear, GMEM could potentially cost *more* bandwidth 30*61046927SAndroid Build Coastguard Worker * if there is sysmem->GMEM restore pass. 31*61046927SAndroid Build Coastguard Worker * 32*61046927SAndroid Build Coastguard Worker * (3) If you see a high draw count, that is an indication that there will be 33*61046927SAndroid Build Coastguard Worker * enough pixels accessed multiple times to benefit from the reduced 34*61046927SAndroid Build Coastguard Worker * memory bandwidth that GMEM brings 35*61046927SAndroid Build Coastguard Worker * 36*61046927SAndroid Build Coastguard Worker * (4) But high draw count where there is not much overdraw can actually be 37*61046927SAndroid Build Coastguard Worker * faster in bypass mode if it is pushing a lot of state change, due to 38*61046927SAndroid Build Coastguard Worker * not having to go thru the state changes per-tile[1] 39*61046927SAndroid Build Coastguard Worker * 40*61046927SAndroid Build Coastguard Worker * The approach taken is to measure the samples-passed for the batch to estimate 41*61046927SAndroid Build Coastguard Worker * the amount of overdraw to detect cases where the number of pixels touched is 42*61046927SAndroid Build Coastguard Worker * low. 43*61046927SAndroid Build Coastguard Worker * 44*61046927SAndroid Build Coastguard Worker * [1] ignoring early-tile-exit optimizations, but any draw that touches all/ 45*61046927SAndroid Build Coastguard Worker * most of the tiles late in the tile-pass can defeat that 46*61046927SAndroid Build Coastguard Worker */ 47*61046927SAndroid Build Coastguard Worker struct tu_autotune { 48*61046927SAndroid Build Coastguard Worker 49*61046927SAndroid Build Coastguard Worker /* We may have to disable autotuner if there are too many 50*61046927SAndroid Build Coastguard Worker * renderpasses in-flight. 51*61046927SAndroid Build Coastguard Worker */ 52*61046927SAndroid Build Coastguard Worker bool enabled; 53*61046927SAndroid Build Coastguard Worker 54*61046927SAndroid Build Coastguard Worker struct tu_device *device; 55*61046927SAndroid Build Coastguard Worker 56*61046927SAndroid Build Coastguard Worker /** 57*61046927SAndroid Build Coastguard Worker * Cache to map renderpass key to historical information about 58*61046927SAndroid Build Coastguard Worker * rendering to that particular render target. 59*61046927SAndroid Build Coastguard Worker */ 60*61046927SAndroid Build Coastguard Worker struct hash_table *ht; 61*61046927SAndroid Build Coastguard Worker struct u_rwlock ht_lock; 62*61046927SAndroid Build Coastguard Worker 63*61046927SAndroid Build Coastguard Worker /** 64*61046927SAndroid Build Coastguard Worker * List of per-renderpass results that we are waiting for the GPU 65*61046927SAndroid Build Coastguard Worker * to finish with before reading back the results. 66*61046927SAndroid Build Coastguard Worker */ 67*61046927SAndroid Build Coastguard Worker struct list_head pending_results; 68*61046927SAndroid Build Coastguard Worker 69*61046927SAndroid Build Coastguard Worker /** 70*61046927SAndroid Build Coastguard Worker * List of per-submission data that we may want to free after we 71*61046927SAndroid Build Coastguard Worker * processed submission results. 72*61046927SAndroid Build Coastguard Worker * This could happend after command buffers which were in the submission 73*61046927SAndroid Build Coastguard Worker * are destroyed. 74*61046927SAndroid Build Coastguard Worker */ 75*61046927SAndroid Build Coastguard Worker struct list_head pending_submission_data; 76*61046927SAndroid Build Coastguard Worker 77*61046927SAndroid Build Coastguard Worker /** 78*61046927SAndroid Build Coastguard Worker * List of per-submission data that has been finished and can be reused. 79*61046927SAndroid Build Coastguard Worker */ 80*61046927SAndroid Build Coastguard Worker struct list_head submission_data_pool; 81*61046927SAndroid Build Coastguard Worker 82*61046927SAndroid Build Coastguard Worker uint32_t fence_counter; 83*61046927SAndroid Build Coastguard Worker uint32_t idx_counter; 84*61046927SAndroid Build Coastguard Worker }; 85*61046927SAndroid Build Coastguard Worker 86*61046927SAndroid Build Coastguard Worker /** 87*61046927SAndroid Build Coastguard Worker * From the cmdstream, the captured samples-passed values are recorded 88*61046927SAndroid Build Coastguard Worker * at the start and end of the batch. 89*61046927SAndroid Build Coastguard Worker * 90*61046927SAndroid Build Coastguard Worker * Note that we do the math on the CPU to avoid a WFI. But pre-emption 91*61046927SAndroid Build Coastguard Worker * may force us to revisit that. 92*61046927SAndroid Build Coastguard Worker */ 93*61046927SAndroid Build Coastguard Worker struct PACKED tu_renderpass_samples { 94*61046927SAndroid Build Coastguard Worker uint64_t samples_start; 95*61046927SAndroid Build Coastguard Worker /* hw requires the sample start/stop locations to be 128b aligned. */ 96*61046927SAndroid Build Coastguard Worker uint64_t __pad0; 97*61046927SAndroid Build Coastguard Worker uint64_t samples_end; 98*61046927SAndroid Build Coastguard Worker uint64_t __pad1; 99*61046927SAndroid Build Coastguard Worker }; 100*61046927SAndroid Build Coastguard Worker 101*61046927SAndroid Build Coastguard Worker /* Necessary when writing sample counts using CP_EVENT_WRITE7::ZPASS_DONE. */ 102*61046927SAndroid Build Coastguard Worker static_assert(offsetof(struct tu_renderpass_samples, samples_end) == 16); 103*61046927SAndroid Build Coastguard Worker 104*61046927SAndroid Build Coastguard Worker /** 105*61046927SAndroid Build Coastguard Worker * Tracks the results from an individual renderpass. Initially created 106*61046927SAndroid Build Coastguard Worker * per renderpass, and appended to the tail of at->pending_results. At a later 107*61046927SAndroid Build Coastguard Worker * time, when the GPU has finished writing the results, we fill samples_passed. 108*61046927SAndroid Build Coastguard Worker */ 109*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result { 110*61046927SAndroid Build Coastguard Worker /* Points into GPU memory */ 111*61046927SAndroid Build Coastguard Worker struct tu_renderpass_samples* samples; 112*61046927SAndroid Build Coastguard Worker 113*61046927SAndroid Build Coastguard Worker struct tu_suballoc_bo bo; 114*61046927SAndroid Build Coastguard Worker 115*61046927SAndroid Build Coastguard Worker /* 116*61046927SAndroid Build Coastguard Worker * Below here, only used internally within autotune 117*61046927SAndroid Build Coastguard Worker */ 118*61046927SAndroid Build Coastguard Worker uint64_t rp_key; 119*61046927SAndroid Build Coastguard Worker struct tu_renderpass_history *history; 120*61046927SAndroid Build Coastguard Worker struct list_head node; 121*61046927SAndroid Build Coastguard Worker uint32_t fence; 122*61046927SAndroid Build Coastguard Worker uint64_t samples_passed; 123*61046927SAndroid Build Coastguard Worker }; 124*61046927SAndroid Build Coastguard Worker 125*61046927SAndroid Build Coastguard Worker VkResult tu_autotune_init(struct tu_autotune *at, struct tu_device *dev); 126*61046927SAndroid Build Coastguard Worker void tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev); 127*61046927SAndroid Build Coastguard Worker 128*61046927SAndroid Build Coastguard Worker bool tu_autotune_use_bypass(struct tu_autotune *at, 129*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer *cmd_buffer, 130*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result **autotune_result); 131*61046927SAndroid Build Coastguard Worker void tu_autotune_free_results(struct tu_device *dev, struct list_head *results); 132*61046927SAndroid Build Coastguard Worker 133*61046927SAndroid Build Coastguard Worker bool tu_autotune_submit_requires_fence(struct tu_cmd_buffer **cmd_buffers, 134*61046927SAndroid Build Coastguard Worker uint32_t cmd_buffer_count); 135*61046927SAndroid Build Coastguard Worker 136*61046927SAndroid Build Coastguard Worker /** 137*61046927SAndroid Build Coastguard Worker * A magic 8-ball that tells the gmem code whether we should do bypass mode 138*61046927SAndroid Build Coastguard Worker * for moar fps. 139*61046927SAndroid Build Coastguard Worker */ 140*61046927SAndroid Build Coastguard Worker struct tu_cs *tu_autotune_on_submit(struct tu_device *dev, 141*61046927SAndroid Build Coastguard Worker struct tu_autotune *at, 142*61046927SAndroid Build Coastguard Worker struct tu_cmd_buffer **cmd_buffers, 143*61046927SAndroid Build Coastguard Worker uint32_t cmd_buffer_count); 144*61046927SAndroid Build Coastguard Worker 145*61046927SAndroid Build Coastguard Worker struct tu_autotune_results_buffer; 146*61046927SAndroid Build Coastguard Worker 147*61046927SAndroid Build Coastguard Worker template <chip CHIP> 148*61046927SAndroid Build Coastguard Worker void tu_autotune_begin_renderpass(struct tu_cmd_buffer *cmd, 149*61046927SAndroid Build Coastguard Worker struct tu_cs *cs, 150*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *autotune_result); 151*61046927SAndroid Build Coastguard Worker 152*61046927SAndroid Build Coastguard Worker template <chip CHIP> 153*61046927SAndroid Build Coastguard Worker void tu_autotune_end_renderpass(struct tu_cmd_buffer *cmd, 154*61046927SAndroid Build Coastguard Worker struct tu_cs *cs, 155*61046927SAndroid Build Coastguard Worker struct tu_renderpass_result *autotune_result); 156*61046927SAndroid Build Coastguard Worker 157*61046927SAndroid Build Coastguard Worker #endif /* TU_AUTOTUNE_H */ 158