1 /*
2 * timeInState eBPF program
3 *
4 * Copyright (C) 2018 Google
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17 #include <android_bpf_defs.h>
18 #include <bpf_timeinstate.h>
19 #include <errno.h>
20
21 #ifdef ENABLE_LIBBPF
22 #include <linux/bpf.h>
23 #include <private/android_filesystem_config.h>
24 #include <stdbool.h>
25 #endif // ENABLE_LIBBPF
26
27 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
28 AID_SYSTEM)
29
30 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
31
32 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024,
33 AID_SYSTEM)
34 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
35
36 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
37 DEFINE_BPF_MAP_GWO(cpu_last_pid_map, PERCPU_ARRAY, uint32_t, pid_t, 1, AID_SYSTEM)
38
39 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
40 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
41
42 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
43
44 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
45 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
46
47 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
48 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
49 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
50 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
51 AID_SYSTEM)
52
53 struct switch_args {
54 unsigned long long ignore;
55 char prev_comm[16];
56 int prev_pid;
57 int prev_prio;
58 long long prev_state;
59 char next_comm[16];
60 int next_pid;
61 int next_prio;
62 };
63
update_uid(uint32_t uid,uint64_t delta,uint64_t time,uint8_t freq_idx,uint32_t active,uint32_t policy_active)64 static inline __always_inline void update_uid(uint32_t uid, uint64_t delta, uint64_t time,
65 uint8_t freq_idx, uint32_t active,
66 uint32_t policy_active) {
67 time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
68 tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
69 if (!val) {
70 tis_val_t zero_val = {.ar = {0}};
71 bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
72 val = bpf_uid_time_in_state_map_lookup_elem(&key);
73 }
74 if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
75
76 key.bucket = active / CPUS_PER_ENTRY;
77 concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
78 if (!ct) {
79 concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
80 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
81 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
82 }
83 if (ct) ct->active[active % CPUS_PER_ENTRY] += delta;
84
85 if (policy_active / CPUS_PER_ENTRY != key.bucket) {
86 key.bucket = policy_active / CPUS_PER_ENTRY;
87 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
88 if (!ct) {
89 concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
90 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
91 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
92 }
93 }
94 if (ct) ct->policy[policy_active % CPUS_PER_ENTRY] += delta;
95 uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
96 if (uid_last_update) {
97 *uid_last_update = time;
98 } else {
99 bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
100 }
101 return;
102 }
103
104 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM,
105 tracepoint_sched_sched_switch)
106 (struct switch_args* args) {
107 const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events.
108 uint32_t zero = 0;
109 uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
110 if (!last) return ALLOW;
111 uint64_t old_last = *last;
112 uint64_t time = bpf_ktime_get_ns();
113 *last = time;
114
115 // With suspend-to-ram, it's possible to see prev_pid==0 twice in a row on the same CPU. Add a
116 // check to ensure prev_pid matches the previous next_pid to avoid incorrectly incrementing our
117 // active CPU counts a second time in this scenario.
118 pid_t *cpu_pidp = bpf_cpu_last_pid_map_lookup_elem(&zero);
119 if (!cpu_pidp) return ALLOW;
120 pid_t cpu_pid = *cpu_pidp;
121 *cpu_pidp = args->next_pid;
122 if (old_last && args->prev_pid != cpu_pid) return ALLOW;
123
124 uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
125 if (!active) return ALLOW;
126
127 uint32_t cpu = bpf_get_smp_processor_id();
128 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
129 if (!policyp) return ALLOW;
130 uint32_t policy = *policyp;
131
132 uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
133 if (!policy_active) return ALLOW;
134
135 uint32_t nactive = *active - 1;
136 uint32_t policy_nactive = *policy_active - 1;
137
138 if (!args->prev_pid || (!old_last && args->next_pid)) {
139 __sync_fetch_and_add(active, 1);
140 __sync_fetch_and_add(policy_active, 1);
141 }
142
143 // Return here in 2 scenarios:
144 // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
145 // decreasing.
146 // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
147 // and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
148 // if we're going idle.
149 if (!args->prev_pid || !old_last) return ALLOW;
150
151 if (!args->next_pid) {
152 __sync_fetch_and_add(active, -1);
153 __sync_fetch_and_add(policy_active, -1);
154 }
155
156 uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
157 if (!freq_idxp || !*freq_idxp) return ALLOW;
158 // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
159 uint8_t freq_idx = *freq_idxp - 1;
160
161 // The bpf_get_current_uid_gid() helper function returns a u64 value, with the lower 32 bits
162 // containing the UID and the upper 32 bits containing the GID. Additionally, in rare cases,
163 // (usually something is very wrong with the kernel) the helper can return -EINVAL, in which
164 // case we should just return early.
165 unsigned long long uid_gid = bpf_get_current_uid_gid();
166 if (uid_gid == (unsigned long long)(-EINVAL)) return ALLOW;
167
168 // Mask out the uid part of the uid_gid value returned from the kernel.
169 uint32_t uid = uid_gid & 0xFFFFFFFF;
170
171 uint64_t delta = time - old_last;
172
173 // For UIDs in the SDK sandbox range, we account per-UID times twice, both to the corresponding
174 // app uid and to the "virtual" UID AID_SDK_SANDBOX which is reserved for collecting total times
175 // across all SDK sandbox UIDs. Special handling for this reserved UID in framework code
176 // prevents double counting in systemwide totals.
177 if (((uid % AID_USER_OFFSET) >= AID_SDK_SANDBOX_PROCESS_START) &&
178 ((uid % AID_USER_OFFSET) <= AID_SDK_SANDBOX_PROCESS_END)) {
179 uid -= AID_SDK_SANDBOX_PROCESS_START - AID_APP_START;
180 update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
181 update_uid(AID_SDK_SANDBOX, delta, time, freq_idx, nactive, policy_nactive);
182 } else {
183 update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
184 }
185
186 // Add delta to total.
187 const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
188 MAX_FREQS_FOR_TOTAL - 1;
189 uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
190 if (total) *total += delta;
191
192 const int pid = args->prev_pid;
193 const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
194 bool is_tgid_tracked = false;
195
196 // eBPF verifier does not currently allow loops.
197 // Instruct the C compiler to unroll the loop into a series of steps.
198 #pragma unroll
199 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
200 const uint32_t key = index;
201 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
202 if (!tracked_pid) continue;
203 if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
204 // Reached the end of the list
205 break;
206 }
207
208 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
209 is_tgid_tracked = true;
210 break;
211 }
212 }
213
214 if (is_tgid_tracked) {
215 // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
216 // with other threads sharing the same TGID and aggregation key.
217 uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
218 aggregated_task_tis_key_t task_key = {
219 .tgid = tgid,
220 .aggregation_key = aggregation_key ? *aggregation_key : 0,
221 .bucket = freq_idx / FREQS_PER_ENTRY};
222 tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
223 if (!task_val) {
224 tis_val_t zero_val = {.ar = {0}};
225 bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
226 task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
227 }
228 if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
229 }
230 return ALLOW;
231 }
232
233 struct cpufreq_args {
234 unsigned long long ignore;
235 unsigned int state;
236 unsigned int cpu_id;
237 };
238
239 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM,
240 tracepoint_power_cpu_frequency)
241 (struct cpufreq_args* args) {
242 const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events.
243 uint32_t cpu = args->cpu_id;
244 unsigned int new = args->state;
245 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
246 if (!policyp) return ALLOW;
247 uint32_t policy = *policyp;
248 freq_idx_key_t key = {.policy = policy, .freq = new};
249 uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
250 if (!idxp) return ALLOW;
251 uint8_t idx = *idxp;
252 bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
253 return ALLOW;
254 }
255
256 // The format of the sched/sched_process_free event is described in
257 // adb shell cat /d/tracing/events/sched/sched_process_free/format
258 struct sched_process_free_args {
259 unsigned long long ignore;
260 char comm[16];
261 pid_t pid;
262 int prio;
263 };
264
265 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM,
266 tracepoint_sched_sched_process_free)
267 (struct sched_process_free_args* args) {
268 const int ALLOW = 1;
269
270 int pid = args->pid;
271 bool is_last = true;
272
273 // eBPF verifier does not currently allow loops.
274 // Instruct the C compiler to unroll the loop into a series of steps.
275 #pragma unroll
276 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
277 const uint32_t key = MAX_TRACKED_PIDS - index - 1;
278 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
279 if (!tracked_pid) continue;
280 if (tracked_pid->pid == pid) {
281 tracked_pid->pid = 0;
282 tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
283 bpf_pid_tracked_hash_map_delete_elem(&key);
284 break;
285 }
286 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
287 is_last = false;
288 }
289 }
290
291 bpf_pid_task_aggregation_map_delete_elem(&pid);
292 return ALLOW;
293 }
294
295 LICENSE("GPL");
296