1 /* 2 * Copyright 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "include/gpuwork/gpuWork.h" 18 19 #include <linux/bpf.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #include <bpf_helpers.h> 24 25 #define S_IN_NS (1000000000) 26 #define SMALL_TIME_GAP_LIMIT_NS (S_IN_NS) 27 28 // A map from GpuIdUid (GPU ID and application UID) to |UidTrackingInfo|. 29 DEFINE_BPF_MAP_GRW(gpu_work_map, HASH, GpuIdUid, UidTrackingInfo, kMaxTrackedGpuIdUids, 30 AID_GRAPHICS); 31 32 // A map containing a single entry of |GlobalData|. 33 DEFINE_BPF_MAP_GRW(gpu_work_global_data, ARRAY, uint32_t, GlobalData, 1, AID_GRAPHICS); 34 35 // Defines the structure of the kernel tracepoint: 36 // 37 // /sys/kernel/tracing/events/power/gpu_work_period/ 38 // 39 // Drivers must define an appropriate gpu_work_period kernel tracepoint (for 40 // example, using the DECLARE_EVENT_CLASS and DEFINE_EVENT macros) such that the 41 // arguments/fields match the fields of |GpuWorkPeriodEvent|, excluding the 42 // initial "common" field. Drivers must invoke the tracepoint (also referred to 43 // as emitting the event) as described below. Note that the description below 44 // assumes a single physical GPU and its driver; for devices with multiple GPUs, 45 // each GPU and its driver should emit events independently, using a different 46 // value for |gpu_id| per GPU. 47 // 48 // |GpuWorkPeriodEvent| defines a non-overlapping, non-zero period of time from 49 // |start_time_ns| (inclusive) until |end_time_ns| (exclusive) for a given 50 // |uid|, and includes details of how much work the GPU was performing for |uid| 51 // during the period. When GPU work for a given |uid| runs on the GPU, the 52 // driver must track one or more periods that cover the time where the work was 53 // running, and emit events soon after. The driver should try to emit the event 54 // for a period at most 1 second after |end_time_ns|, and must emit the event at 55 // most 2 seconds after |end_time_ns|. A period's duration (|end_time_ns| - 56 // |start_time_ns|) must be at most 1 second. Periods for different |uids| can 57 // overlap, but periods for the same |uid| must not overlap. The driver must 58 // emit events for the same |uid| in strictly increasing order of 59 // |start_time_ns|, such that it is guaranteed that the tracepoint call for a 60 // period for |uid| has returned before the tracepoint call for the next period 61 // for |uid| is made. Note that synchronization may be necessary if the driver 62 // emits events for the same |uid| from different threads/contexts. Note that 63 // |end_time_ns| for a period for a |uid| may equal the |start_time_ns| of the 64 // next period for |uid|. The driver should try to avoid emitting a large number 65 // of events in a short time period (e.g. 1000 events per second) for a given 66 // |uid|. 67 // 68 // The |total_active_duration_ns| must be set to the approximate total amount of 69 // time the GPU spent running work for |uid| within the period, without 70 // "double-counting" parallel GPU work on the same GPU for the same |uid|. Note 71 // that even if the parallel GPU work was submitted from several different 72 // processes (i.e. different PIDs) with the same UID, this overlapping work must 73 // not be double-counted, as it still came from a single |uid|. "GPU work" 74 // should correspond to the "GPU slices" shown in the AGI (Android GPU 75 // Inspector) tool, and so should include work such as fragment and non-fragment 76 // work/shaders running on the shader cores of the GPU. For example, given the 77 // following for a single |uid|: 78 // - A period has: 79 // - |start_time_ns|: 100,000,000 ns 80 // - |end_time_ns|: 800,000,000 ns 81 // - Some GPU vertex work (A): 82 // - started at: 200,000,000 ns 83 // - ended at: 400,000,000 ns 84 // - Some GPU fragment work (B): 85 // - started at: 300,000,000 ns 86 // - ended at: 500,000,000 ns 87 // - Some GPU fragment work (C): 88 // - started at: 300,000,000 ns 89 // - ended at: 400,000,000 ns 90 // - Some GPU fragment work (D): 91 // - started at: 600,000,000 ns 92 // - ended at: 700,000,000 ns 93 // 94 // The |total_active_duration_ns| would be 400,000,000 ns, because GPU work for 95 // |uid| was executing: 96 // - from 200,000,000 ns to 500,000,000 ns, giving a duration of 300,000,000 ns 97 // (encompassing GPU work A, B, and C) 98 // - from 600,000,000 ns to 700,000,000 ns, giving a duration of 100,000,000 ns 99 // (GPU work D) 100 // 101 // Thus, the |total_active_duration_ns| is the sum of these two 102 // (non-overlapping) durations. Drivers may not have efficient access to the 103 // exact start and end times of all GPU work, as shown above, but drivers should 104 // try to approximate/aggregate the value of |total_active_duration_ns| as 105 // accurately as possible within the limitations of the hardware, without 106 // double-counting parallel GPU work for the same |uid|. The 107 // |total_active_duration_ns| value must be less than or equal to the period 108 // duration (|end_time_ns| - |start_time_ns|); if the aggregation approach might 109 // violate this requirement then the driver must clamp 110 // |total_active_duration_ns| to be at most the period duration. 111 // 112 // Protected mode: protected GPU work must not be reported. Periods must be 113 // emitted, and the |total_active_duration_ns| value set, as if the protected 114 // GPU work did not occur. 115 // 116 // Note that the above description allows for a certain amount of flexibility in 117 // how the driver tracks periods and emits the events. We list a few examples of 118 // how drivers might implement the above: 119 // 120 // - 1: The driver could track periods for all |uid| values at fixed intervals 121 // of 1 second. Thus, every period duration would be exactly 1 second, and 122 // periods from different |uid|s that overlap would have the same 123 // |start_time_ns| and |end_time_ns| values. 124 // 125 // - 2: The driver could track periods with many different durations (up to 1 126 // second), as needed in order to cover the GPU work for each |uid|. 127 // Overlapping periods for different |uid|s may have very different durations, 128 // as well as different |start_time_ns| and |end_time_ns| values. 129 // 130 // - 3: The driver could track fine-grained periods with different durations 131 // that precisely cover the time where GPU work is running for each |uid|. 132 // Thus, |total_active_duration_ns| would always equal the period duration. 133 // For example, if a game was running at 60 frames per second, the driver 134 // would most likely emit _at least_ 60 events per second (probably more, as 135 // there would likely be multiple "chunks" of GPU work per frame, with gaps 136 // between each chunk). However, the driver may sometimes need to resort to 137 // more coarse-grained periods to avoid emitting thousands of events per 138 // second for a |uid|, where |total_active_duration_ns| would then be less 139 // than the period duration. 140 typedef struct { 141 // Actual fields start at offset 8. 142 uint64_t common; 143 144 // A value that uniquely identifies the GPU within the system. 145 uint32_t gpu_id; 146 147 // The UID of the application (i.e. persistent, unique ID of the Android 148 // app) that submitted work to the GPU. 149 uint32_t uid; 150 151 // The start time of the period in nanoseconds. The clock must be 152 // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function. 153 uint64_t start_time_ns; 154 155 // The end time of the period in nanoseconds. The clock must be 156 // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function. 157 uint64_t end_time_ns; 158 159 // The amount of time the GPU was running GPU work for |uid| during the 160 // period, in nanoseconds, without double-counting parallel GPU work for the 161 // same |uid|. For example, this might include the amount of time the GPU 162 // spent performing shader work (vertex work, fragment work, etc.) for 163 // |uid|. 164 uint64_t total_active_duration_ns; 165 166 } GpuWorkPeriodEvent; 167 168 _Static_assert(offsetof(GpuWorkPeriodEvent, gpu_id) == 8 && 169 offsetof(GpuWorkPeriodEvent, uid) == 12 && 170 offsetof(GpuWorkPeriodEvent, start_time_ns) == 16 && 171 offsetof(GpuWorkPeriodEvent, end_time_ns) == 24 && 172 offsetof(GpuWorkPeriodEvent, total_active_duration_ns) == 32, 173 "Field offsets of struct GpuWorkPeriodEvent must not be changed because they " 174 "must match the tracepoint field offsets found via adb shell cat " 175 "/sys/kernel/tracing/events/power/gpu_work_period/format"); 176 177 DEFINE_BPF_PROG("tracepoint/power/gpu_work_period", AID_ROOT, AID_GRAPHICS, tp_gpu_work_period) 178 (GpuWorkPeriodEvent* const period) { 179 // Note: In eBPF programs, |__sync_fetch_and_add| is translated to an atomic 180 // add. 181 182 // Return 1 to avoid blocking simpleperf from receiving events. 183 const int ALLOW = 1; 184 185 GpuIdUid gpu_id_and_uid; 186 __builtin_memset(&gpu_id_and_uid, 0, sizeof(gpu_id_and_uid)); 187 gpu_id_and_uid.gpu_id = period->gpu_id; 188 gpu_id_and_uid.uid = period->uid; 189 190 // Get |UidTrackingInfo|. 191 UidTrackingInfo* uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid); 192 if (!uid_tracking_info) { 193 // There was no existing entry, so we add a new one. 194 UidTrackingInfo initial_info; 195 __builtin_memset(&initial_info, 0, sizeof(initial_info)); 196 if (0 == bpf_gpu_work_map_update_elem(&gpu_id_and_uid, &initial_info, BPF_NOEXIST)) { 197 // We added an entry to the map, so we increment our entry counter in 198 // |GlobalData|. 199 const uint32_t zero = 0; 200 // Get the |GlobalData|. 201 GlobalData* global_data = bpf_gpu_work_global_data_lookup_elem(&zero); 202 // Getting the global data never fails because it is an |ARRAY| map, 203 // but we need to keep the verifier happy. 204 if (global_data) { 205 __sync_fetch_and_add(&global_data->num_map_entries, 1); 206 } 207 } 208 uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid); 209 if (!uid_tracking_info) { 210 // This should never happen, unless entries are getting deleted at 211 // this moment. If so, we just give up. 212 return ALLOW; 213 } 214 } 215 216 if ( 217 // The period duration must be non-zero. 218 period->start_time_ns >= period->end_time_ns || 219 // The period duration must be at most 1 second. 220 (period->end_time_ns - period->start_time_ns) > S_IN_NS) { 221 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 222 return ALLOW; 223 } 224 225 // If |total_active_duration_ns| is 0 then no GPU work occurred and there is 226 // nothing to do. 227 if (period->total_active_duration_ns == 0) { 228 return ALLOW; 229 } 230 231 // Update |uid_tracking_info->total_active_duration_ns|. 232 __sync_fetch_and_add(&uid_tracking_info->total_active_duration_ns, 233 period->total_active_duration_ns); 234 235 // |small_gap_time_ns| is the time gap between the current and previous 236 // active period, which could be 0. If the gap is more than 237 // |SMALL_TIME_GAP_LIMIT_NS| then |small_gap_time_ns| will be set to 0 238 // because we want to estimate the small gaps between "continuous" GPU work. 239 uint64_t small_gap_time_ns = 0; 240 if (uid_tracking_info->previous_active_end_time_ns > period->start_time_ns) { 241 // The current period appears to have occurred before the previous 242 // active period, which must not happen because per-UID periods must not 243 // overlap and must be emitted in strictly increasing order of 244 // |start_time_ns|. 245 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 246 } else { 247 // The current period appears to have been emitted after the previous 248 // active period, as expected, so we can calculate the gap between the 249 // current and previous active period. 250 small_gap_time_ns = period->start_time_ns - uid_tracking_info->previous_active_end_time_ns; 251 252 // Update |previous_active_end_time_ns|. 253 uid_tracking_info->previous_active_end_time_ns = period->end_time_ns; 254 255 // We want to estimate the small gaps between "continuous" GPU work; if 256 // the gap is more than |SMALL_TIME_GAP_LIMIT_NS| then we don't consider 257 // this "continuous" GPU work. 258 if (small_gap_time_ns > SMALL_TIME_GAP_LIMIT_NS) { 259 small_gap_time_ns = 0; 260 } 261 } 262 263 uint64_t period_total_inactive_time_ns = 0; 264 const uint64_t period_duration_ns = period->end_time_ns - period->start_time_ns; 265 // |period->total_active_duration_ns| is the active time within the period duration, so 266 // it must not be larger than |period_duration_ns|. 267 if (period->total_active_duration_ns > period_duration_ns) { 268 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 269 } else { 270 period_total_inactive_time_ns = period_duration_ns - period->total_active_duration_ns; 271 } 272 273 // Update |uid_tracking_info->total_inactive_duration_ns| by adding the 274 // inactive time from this period, plus the small gap between the current 275 // and previous active period. Either or both of these values could be 0. 276 if (small_gap_time_ns > 0 || period_total_inactive_time_ns > 0) { 277 __sync_fetch_and_add(&uid_tracking_info->total_inactive_duration_ns, 278 small_gap_time_ns + period_total_inactive_time_ns); 279 } 280 281 return ALLOW; 282 } 283 284 LICENSE("Apache 2.0"); 285