xref: /aosp_15_r20/frameworks/native/services/gpuservice/gpuwork/bpfprogs/gpuWork.c (revision 38e8c45f13ce32b0dcecb25141ffecaf386fa17f)
1 /*
2  * Copyright 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "include/gpuwork/gpuWork.h"
18 
19 #include <linux/bpf.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #include <bpf_helpers.h>
24 
25 #define S_IN_NS (1000000000)
26 #define SMALL_TIME_GAP_LIMIT_NS (S_IN_NS)
27 
28 // A map from GpuIdUid (GPU ID and application UID) to |UidTrackingInfo|.
29 DEFINE_BPF_MAP_GRW(gpu_work_map, HASH, GpuIdUid, UidTrackingInfo, kMaxTrackedGpuIdUids,
30                    AID_GRAPHICS);
31 
32 // A map containing a single entry of |GlobalData|.
33 DEFINE_BPF_MAP_GRW(gpu_work_global_data, ARRAY, uint32_t, GlobalData, 1, AID_GRAPHICS);
34 
35 // Defines the structure of the kernel tracepoint:
36 //
37 //  /sys/kernel/tracing/events/power/gpu_work_period/
38 //
39 // Drivers must define an appropriate gpu_work_period kernel tracepoint (for
40 // example, using the DECLARE_EVENT_CLASS and DEFINE_EVENT macros) such that the
41 // arguments/fields match the fields of |GpuWorkPeriodEvent|, excluding the
42 // initial "common" field. Drivers must invoke the tracepoint (also referred to
43 // as emitting the event) as described below. Note that the description below
44 // assumes a single physical GPU and its driver; for devices with multiple GPUs,
45 // each GPU and its driver should emit events independently, using a different
46 // value for |gpu_id| per GPU.
47 //
48 // |GpuWorkPeriodEvent| defines a non-overlapping, non-zero period of time from
49 // |start_time_ns| (inclusive) until |end_time_ns| (exclusive) for a given
50 // |uid|, and includes details of how much work the GPU was performing for |uid|
51 // during the period. When GPU work for a given |uid| runs on the GPU, the
52 // driver must track one or more periods that cover the time where the work was
53 // running, and emit events soon after. The driver should try to emit the event
54 // for a period at most 1 second after |end_time_ns|, and must emit the event at
55 // most 2 seconds after |end_time_ns|. A period's duration (|end_time_ns| -
56 // |start_time_ns|) must be at most 1 second. Periods for different |uids| can
57 // overlap, but periods for the same |uid| must not overlap. The driver must
58 // emit events for the same |uid| in strictly increasing order of
59 // |start_time_ns|, such that it is guaranteed that the tracepoint call for a
60 // period for |uid| has returned before the tracepoint call for the next period
61 // for |uid| is made. Note that synchronization may be necessary if the driver
62 // emits events for the same |uid| from different threads/contexts. Note that
63 // |end_time_ns| for a period for a |uid| may equal the |start_time_ns| of the
64 // next period for |uid|. The driver should try to avoid emitting a large number
65 // of events in a short time period (e.g. 1000 events per second) for a given
66 // |uid|.
67 //
68 // The |total_active_duration_ns| must be set to the approximate total amount of
69 // time the GPU spent running work for |uid| within the period, without
70 // "double-counting" parallel GPU work on the same GPU for the same |uid|. Note
71 // that even if the parallel GPU work was submitted from several different
72 // processes (i.e. different PIDs) with the same UID, this overlapping work must
73 // not be double-counted, as it still came from a single |uid|. "GPU work"
74 // should correspond to the "GPU slices" shown in the AGI (Android GPU
75 // Inspector) tool, and so should include work such as fragment and non-fragment
76 // work/shaders running on the shader cores of the GPU. For example, given the
77 // following for a single |uid|:
78 //  - A period has:
79 //    - |start_time_ns|: 100,000,000 ns
80 //    - |end_time_ns|:   800,000,000 ns
81 //  - Some GPU vertex work (A):
82 //    - started at:      200,000,000 ns
83 //    - ended at:        400,000,000 ns
84 //  - Some GPU fragment work (B):
85 //    - started at:      300,000,000 ns
86 //    - ended at:        500,000,000 ns
87 //  - Some GPU fragment work (C):
88 //    - started at:      300,000,000 ns
89 //    - ended at:        400,000,000 ns
90 //  - Some GPU fragment work (D):
91 //    - started at:      600,000,000 ns
92 //    - ended at:        700,000,000 ns
93 //
94 // The |total_active_duration_ns| would be 400,000,000 ns, because GPU work for
95 // |uid| was executing:
96 //  - from 200,000,000 ns to 500,000,000 ns, giving a duration of 300,000,000 ns
97 //    (encompassing GPU work A, B, and C)
98 //  - from 600,000,000 ns to 700,000,000 ns, giving a duration of 100,000,000 ns
99 //    (GPU work D)
100 //
101 // Thus, the |total_active_duration_ns| is the sum of these two
102 // (non-overlapping) durations. Drivers may not have efficient access to the
103 // exact start and end times of all GPU work, as shown above, but drivers should
104 // try to approximate/aggregate the value of |total_active_duration_ns| as
105 // accurately as possible within the limitations of the hardware, without
106 // double-counting parallel GPU work for the same |uid|. The
107 // |total_active_duration_ns| value must be less than or equal to the period
108 // duration (|end_time_ns| - |start_time_ns|); if the aggregation approach might
109 // violate this requirement then the driver must clamp
110 // |total_active_duration_ns| to be at most the period duration.
111 //
112 // Protected mode: protected GPU work must not be reported. Periods must be
113 // emitted, and the |total_active_duration_ns| value set, as if the protected
114 // GPU work did not occur.
115 //
116 // Note that the above description allows for a certain amount of flexibility in
117 // how the driver tracks periods and emits the events. We list a few examples of
118 // how drivers might implement the above:
119 //
120 // - 1: The driver could track periods for all |uid| values at fixed intervals
121 //   of 1 second. Thus, every period duration would be exactly 1 second, and
122 //   periods from different |uid|s that overlap would have the same
123 //   |start_time_ns| and |end_time_ns| values.
124 //
125 // - 2: The driver could track periods with many different durations (up to 1
126 //   second), as needed in order to cover the GPU work for each |uid|.
127 //   Overlapping periods for different |uid|s may have very different durations,
128 //   as well as different |start_time_ns| and |end_time_ns| values.
129 //
130 // - 3: The driver could track fine-grained periods with different durations
131 //   that precisely cover the time where GPU work is running for each |uid|.
132 //   Thus, |total_active_duration_ns| would always equal the period duration.
133 //   For example, if a game was running at 60 frames per second, the driver
134 //   would most likely emit _at least_ 60 events per second (probably more, as
135 //   there would likely be multiple "chunks" of GPU work per frame, with gaps
136 //   between each chunk). However, the driver may sometimes need to resort to
137 //   more coarse-grained periods to avoid emitting thousands of events per
138 //   second for a |uid|, where |total_active_duration_ns| would then be less
139 //   than the period duration.
140 typedef struct {
141     // Actual fields start at offset 8.
142     uint64_t common;
143 
144     // A value that uniquely identifies the GPU within the system.
145     uint32_t gpu_id;
146 
147     // The UID of the application (i.e. persistent, unique ID of the Android
148     // app) that submitted work to the GPU.
149     uint32_t uid;
150 
151     // The start time of the period in nanoseconds. The clock must be
152     // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function.
153     uint64_t start_time_ns;
154 
155     // The end time of the period in nanoseconds. The clock must be
156     // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function.
157     uint64_t end_time_ns;
158 
159     // The amount of time the GPU was running GPU work for |uid| during the
160     // period, in nanoseconds, without double-counting parallel GPU work for the
161     // same |uid|. For example, this might include the amount of time the GPU
162     // spent performing shader work (vertex work, fragment work, etc.) for
163     // |uid|.
164     uint64_t total_active_duration_ns;
165 
166 } GpuWorkPeriodEvent;
167 
168 _Static_assert(offsetof(GpuWorkPeriodEvent, gpu_id) == 8 &&
169                        offsetof(GpuWorkPeriodEvent, uid) == 12 &&
170                        offsetof(GpuWorkPeriodEvent, start_time_ns) == 16 &&
171                        offsetof(GpuWorkPeriodEvent, end_time_ns) == 24 &&
172                        offsetof(GpuWorkPeriodEvent, total_active_duration_ns) == 32,
173                "Field offsets of struct GpuWorkPeriodEvent must not be changed because they "
174                "must match the tracepoint field offsets found via adb shell cat "
175                "/sys/kernel/tracing/events/power/gpu_work_period/format");
176 
177 DEFINE_BPF_PROG("tracepoint/power/gpu_work_period", AID_ROOT, AID_GRAPHICS, tp_gpu_work_period)
178 (GpuWorkPeriodEvent* const period) {
179     // Note: In eBPF programs, |__sync_fetch_and_add| is translated to an atomic
180     // add.
181 
182     // Return 1 to avoid blocking simpleperf from receiving events.
183     const int ALLOW = 1;
184 
185     GpuIdUid gpu_id_and_uid;
186     __builtin_memset(&gpu_id_and_uid, 0, sizeof(gpu_id_and_uid));
187     gpu_id_and_uid.gpu_id = period->gpu_id;
188     gpu_id_and_uid.uid = period->uid;
189 
190     // Get |UidTrackingInfo|.
191     UidTrackingInfo* uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid);
192     if (!uid_tracking_info) {
193         // There was no existing entry, so we add a new one.
194         UidTrackingInfo initial_info;
195         __builtin_memset(&initial_info, 0, sizeof(initial_info));
196         if (0 == bpf_gpu_work_map_update_elem(&gpu_id_and_uid, &initial_info, BPF_NOEXIST)) {
197             // We added an entry to the map, so we increment our entry counter in
198             // |GlobalData|.
199             const uint32_t zero = 0;
200             // Get the |GlobalData|.
201             GlobalData* global_data = bpf_gpu_work_global_data_lookup_elem(&zero);
202             // Getting the global data never fails because it is an |ARRAY| map,
203             // but we need to keep the verifier happy.
204             if (global_data) {
205                 __sync_fetch_and_add(&global_data->num_map_entries, 1);
206             }
207         }
208         uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid);
209         if (!uid_tracking_info) {
210             // This should never happen, unless entries are getting deleted at
211             // this moment. If so, we just give up.
212             return ALLOW;
213         }
214     }
215 
216     if (
217             // The period duration must be non-zero.
218             period->start_time_ns >= period->end_time_ns ||
219             // The period duration must be at most 1 second.
220             (period->end_time_ns - period->start_time_ns) > S_IN_NS) {
221         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
222         return ALLOW;
223     }
224 
225     // If |total_active_duration_ns| is 0 then no GPU work occurred and there is
226     // nothing to do.
227     if (period->total_active_duration_ns == 0) {
228         return ALLOW;
229     }
230 
231     // Update |uid_tracking_info->total_active_duration_ns|.
232     __sync_fetch_and_add(&uid_tracking_info->total_active_duration_ns,
233                          period->total_active_duration_ns);
234 
235     // |small_gap_time_ns| is the time gap between the current and previous
236     // active period, which could be 0. If the gap is more than
237     // |SMALL_TIME_GAP_LIMIT_NS| then |small_gap_time_ns| will be set to 0
238     // because we want to estimate the small gaps between "continuous" GPU work.
239     uint64_t small_gap_time_ns = 0;
240     if (uid_tracking_info->previous_active_end_time_ns > period->start_time_ns) {
241         // The current period appears to have occurred before the previous
242         // active period, which must not happen because per-UID periods must not
243         // overlap and must be emitted in strictly increasing order of
244         // |start_time_ns|.
245         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
246     } else {
247         // The current period appears to have been emitted after the previous
248         // active period, as expected, so we can calculate the gap between the
249         // current and previous active period.
250         small_gap_time_ns = period->start_time_ns - uid_tracking_info->previous_active_end_time_ns;
251 
252         // Update |previous_active_end_time_ns|.
253         uid_tracking_info->previous_active_end_time_ns = period->end_time_ns;
254 
255         // We want to estimate the small gaps between "continuous" GPU work; if
256         // the gap is more than |SMALL_TIME_GAP_LIMIT_NS| then we don't consider
257         // this "continuous" GPU work.
258         if (small_gap_time_ns > SMALL_TIME_GAP_LIMIT_NS) {
259             small_gap_time_ns = 0;
260         }
261     }
262 
263     uint64_t period_total_inactive_time_ns = 0;
264     const uint64_t period_duration_ns = period->end_time_ns - period->start_time_ns;
265     // |period->total_active_duration_ns| is the active time within the period duration, so
266     // it must not be larger than |period_duration_ns|.
267     if (period->total_active_duration_ns > period_duration_ns) {
268         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
269     } else {
270         period_total_inactive_time_ns = period_duration_ns - period->total_active_duration_ns;
271     }
272 
273     // Update |uid_tracking_info->total_inactive_duration_ns| by adding the
274     // inactive time from this period, plus the small gap between the current
275     // and previous active period. Either or both of these values could be 0.
276     if (small_gap_time_ns > 0 || period_total_inactive_time_ns > 0) {
277         __sync_fetch_and_add(&uid_tracking_info->total_inactive_duration_ns,
278                              small_gap_time_ns + period_total_inactive_time_ns);
279     }
280 
281     return ALLOW;
282 }
283 
284 LICENSE("Apache 2.0");
285