xref: /aosp_15_r20/system/bpfprogs/timeInState.c (revision 1783903d64ec063e49f57d40fc71d726200fdde2)
1 /*
2  * timeInState eBPF program
3  *
4  * Copyright (C) 2018 Google
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  */
16 
17 #include <android_bpf_defs.h>
18 #include <bpf_timeinstate.h>
19 #include <errno.h>
20 
21 #ifdef ENABLE_LIBBPF
22 #include <linux/bpf.h>
23 #include <private/android_filesystem_config.h>
24 #include <stdbool.h>
25 #endif  // ENABLE_LIBBPF
26 
27 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
28                    AID_SYSTEM)
29 
30 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
31 
32 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024,
33                    AID_SYSTEM)
34 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
35 
36 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
37 DEFINE_BPF_MAP_GWO(cpu_last_pid_map, PERCPU_ARRAY, uint32_t, pid_t, 1, AID_SYSTEM)
38 
39 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
40 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
41 
42 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
43 
44 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
45 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
46 
47 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
48 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
49 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
50 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
51                    AID_SYSTEM)
52 
53 struct switch_args {
54     unsigned long long ignore;
55     char prev_comm[16];
56     int prev_pid;
57     int prev_prio;
58     long long prev_state;
59     char next_comm[16];
60     int next_pid;
61     int next_prio;
62 };
63 
update_uid(uint32_t uid,uint64_t delta,uint64_t time,uint8_t freq_idx,uint32_t active,uint32_t policy_active)64 static inline __always_inline void update_uid(uint32_t uid, uint64_t delta, uint64_t time,
65                                               uint8_t freq_idx, uint32_t active,
66                                               uint32_t policy_active) {
67     time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
68     tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
69     if (!val) {
70         tis_val_t zero_val = {.ar = {0}};
71         bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
72         val = bpf_uid_time_in_state_map_lookup_elem(&key);
73     }
74     if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
75 
76     key.bucket = active / CPUS_PER_ENTRY;
77     concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
78     if (!ct) {
79         concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
80         bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
81         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
82     }
83     if (ct) ct->active[active % CPUS_PER_ENTRY] += delta;
84 
85     if (policy_active / CPUS_PER_ENTRY != key.bucket) {
86         key.bucket = policy_active / CPUS_PER_ENTRY;
87         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
88         if (!ct) {
89             concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
90             bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
91             ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
92         }
93     }
94     if (ct) ct->policy[policy_active % CPUS_PER_ENTRY] += delta;
95     uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
96     if (uid_last_update) {
97         *uid_last_update = time;
98     } else {
99         bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
100     }
101     return;
102 }
103 
104 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM,
105                 tracepoint_sched_sched_switch)
106 (struct switch_args* args) {
107     const int ALLOW = 1;  // return 1 to avoid blocking simpleperf from receiving events.
108     uint32_t zero = 0;
109     uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
110     if (!last) return ALLOW;
111     uint64_t old_last = *last;
112     uint64_t time = bpf_ktime_get_ns();
113     *last = time;
114 
115     // With suspend-to-ram, it's possible to see prev_pid==0 twice in a row on the same CPU. Add a
116     // check to ensure prev_pid matches the previous next_pid to avoid incorrectly incrementing our
117     // active CPU counts a second time in this scenario.
118     pid_t *cpu_pidp = bpf_cpu_last_pid_map_lookup_elem(&zero);
119     if (!cpu_pidp) return ALLOW;
120     pid_t cpu_pid = *cpu_pidp;
121     *cpu_pidp = args->next_pid;
122     if (old_last && args->prev_pid != cpu_pid) return ALLOW;
123 
124     uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
125     if (!active) return ALLOW;
126 
127     uint32_t cpu = bpf_get_smp_processor_id();
128     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
129     if (!policyp) return ALLOW;
130     uint32_t policy = *policyp;
131 
132     uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
133     if (!policy_active) return ALLOW;
134 
135     uint32_t nactive = *active - 1;
136     uint32_t policy_nactive = *policy_active - 1;
137 
138     if (!args->prev_pid || (!old_last && args->next_pid)) {
139         __sync_fetch_and_add(active, 1);
140         __sync_fetch_and_add(policy_active, 1);
141     }
142 
143     // Return here in 2 scenarios:
144     // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
145     //    decreasing.
146     // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
147     //    and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
148     //    if we're going idle.
149     if (!args->prev_pid || !old_last) return ALLOW;
150 
151     if (!args->next_pid) {
152         __sync_fetch_and_add(active, -1);
153         __sync_fetch_and_add(policy_active, -1);
154     }
155 
156     uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
157     if (!freq_idxp || !*freq_idxp) return ALLOW;
158     // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
159     uint8_t freq_idx = *freq_idxp - 1;
160 
161     // The bpf_get_current_uid_gid() helper function returns a u64 value, with the lower 32 bits
162     // containing the UID and the upper 32 bits containing the GID. Additionally, in rare cases,
163     // (usually something is very wrong with the kernel) the helper can return -EINVAL, in which
164     // case we should just return early.
165     unsigned long long uid_gid = bpf_get_current_uid_gid();
166     if (uid_gid == (unsigned long long)(-EINVAL)) return ALLOW;
167 
168     // Mask out the uid part of the uid_gid value returned from the kernel.
169     uint32_t uid = uid_gid & 0xFFFFFFFF;
170 
171     uint64_t delta = time - old_last;
172 
173     // For UIDs in the SDK sandbox range, we account per-UID times twice, both to the corresponding
174     // app uid and to the "virtual" UID AID_SDK_SANDBOX which is reserved for collecting total times
175     // across all SDK sandbox UIDs. Special handling for this reserved UID in framework code
176     // prevents double counting in systemwide totals.
177     if (((uid % AID_USER_OFFSET) >= AID_SDK_SANDBOX_PROCESS_START) &&
178         ((uid % AID_USER_OFFSET) <= AID_SDK_SANDBOX_PROCESS_END)) {
179         uid -= AID_SDK_SANDBOX_PROCESS_START - AID_APP_START;
180         update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
181         update_uid(AID_SDK_SANDBOX, delta, time, freq_idx, nactive, policy_nactive);
182     } else {
183         update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
184     }
185 
186     // Add delta to total.
187     const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
188                                     MAX_FREQS_FOR_TOTAL - 1;
189     uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
190     if (total) *total += delta;
191 
192     const int pid = args->prev_pid;
193     const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
194     bool is_tgid_tracked = false;
195 
196     // eBPF verifier does not currently allow loops.
197     // Instruct the C compiler to unroll the loop into a series of steps.
198     #pragma unroll
199     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
200         const uint32_t key = index;
201         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
202         if (!tracked_pid) continue;
203         if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
204             // Reached the end of the list
205             break;
206         }
207 
208         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
209             is_tgid_tracked = true;
210             break;
211         }
212     }
213 
214     if (is_tgid_tracked) {
215         // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
216         // with other threads sharing the same TGID and aggregation key.
217         uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
218         aggregated_task_tis_key_t task_key = {
219                 .tgid = tgid,
220                 .aggregation_key = aggregation_key ? *aggregation_key : 0,
221                 .bucket = freq_idx / FREQS_PER_ENTRY};
222         tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
223         if (!task_val) {
224             tis_val_t zero_val = {.ar = {0}};
225             bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
226             task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
227         }
228         if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
229     }
230     return ALLOW;
231 }
232 
233 struct cpufreq_args {
234     unsigned long long ignore;
235     unsigned int state;
236     unsigned int cpu_id;
237 };
238 
239 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM,
240                 tracepoint_power_cpu_frequency)
241 (struct cpufreq_args* args) {
242     const int ALLOW = 1;  // return 1 to avoid blocking simpleperf from receiving events.
243     uint32_t cpu = args->cpu_id;
244     unsigned int new = args->state;
245     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
246     if (!policyp) return ALLOW;
247     uint32_t policy = *policyp;
248     freq_idx_key_t key = {.policy = policy, .freq = new};
249     uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
250     if (!idxp) return ALLOW;
251     uint8_t idx = *idxp;
252     bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
253     return ALLOW;
254 }
255 
256 // The format of the sched/sched_process_free event is described in
257 // adb shell cat /d/tracing/events/sched/sched_process_free/format
258 struct sched_process_free_args {
259     unsigned long long ignore;
260     char comm[16];
261     pid_t pid;
262     int prio;
263 };
264 
265 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM,
266                 tracepoint_sched_sched_process_free)
267 (struct sched_process_free_args* args) {
268     const int ALLOW = 1;
269 
270     int pid = args->pid;
271     bool is_last = true;
272 
273     // eBPF verifier does not currently allow loops.
274     // Instruct the C compiler to unroll the loop into a series of steps.
275     #pragma unroll
276     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
277         const uint32_t key = MAX_TRACKED_PIDS - index - 1;
278         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
279         if (!tracked_pid) continue;
280         if (tracked_pid->pid == pid) {
281             tracked_pid->pid = 0;
282             tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
283             bpf_pid_tracked_hash_map_delete_elem(&key);
284             break;
285         }
286         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
287             is_last = false;
288         }
289     }
290 
291     bpf_pid_task_aggregation_map_delete_elem(&pid);
292     return ALLOW;
293 }
294 
295 LICENSE("GPL");
296