xref: /aosp_15_r20/external/bcc/examples/cpp/pyperf/PyPerfBPFProgram.cc (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1*387f9dfdSAndroid Build Coastguard Worker /*
2*387f9dfdSAndroid Build Coastguard Worker  * Copyright (c) Facebook, Inc.
3*387f9dfdSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License")
4*387f9dfdSAndroid Build Coastguard Worker  */
5*387f9dfdSAndroid Build Coastguard Worker 
6*387f9dfdSAndroid Build Coastguard Worker #include <string>
7*387f9dfdSAndroid Build Coastguard Worker 
8*387f9dfdSAndroid Build Coastguard Worker namespace ebpf {
9*387f9dfdSAndroid Build Coastguard Worker namespace pyperf {
10*387f9dfdSAndroid Build Coastguard Worker 
11*387f9dfdSAndroid Build Coastguard Worker extern const std::string PYPERF_BPF_PROGRAM = R"(
12*387f9dfdSAndroid Build Coastguard Worker #include <linux/sched.h>
13*387f9dfdSAndroid Build Coastguard Worker #include <uapi/linux/ptrace.h>
14*387f9dfdSAndroid Build Coastguard Worker 
15*387f9dfdSAndroid Build Coastguard Worker #define PYTHON_STACK_FRAMES_PER_PROG 25
16*387f9dfdSAndroid Build Coastguard Worker #define PYTHON_STACK_PROG_CNT 3
17*387f9dfdSAndroid Build Coastguard Worker #define STACK_MAX_LEN (PYTHON_STACK_FRAMES_PER_PROG * PYTHON_STACK_PROG_CNT)
18*387f9dfdSAndroid Build Coastguard Worker #define CLASS_NAME_LEN 32
19*387f9dfdSAndroid Build Coastguard Worker #define FUNCTION_NAME_LEN 64
20*387f9dfdSAndroid Build Coastguard Worker #define FILE_NAME_LEN 128
21*387f9dfdSAndroid Build Coastguard Worker #define TASK_COMM_LEN 16
22*387f9dfdSAndroid Build Coastguard Worker 
23*387f9dfdSAndroid Build Coastguard Worker enum {
24*387f9dfdSAndroid Build Coastguard Worker   STACK_STATUS_COMPLETE = 0,
25*387f9dfdSAndroid Build Coastguard Worker   STACK_STATUS_ERROR = 1,
26*387f9dfdSAndroid Build Coastguard Worker   STACK_STATUS_TRUNCATED = 2,
27*387f9dfdSAndroid Build Coastguard Worker };
28*387f9dfdSAndroid Build Coastguard Worker 
29*387f9dfdSAndroid Build Coastguard Worker enum {
30*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_NO_INFO = 0,
31*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_ERROR = 1,
32*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_UNINITIALIZED = 2,
33*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_NOT_LOCKED = 3,
34*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_THIS_THREAD = 4,
35*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_GLOBAL_CURRENT_THREAD = 5,
36*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_OTHER_THREAD = 6,
37*387f9dfdSAndroid Build Coastguard Worker   GIL_STATE_NULL = 7,
38*387f9dfdSAndroid Build Coastguard Worker };
39*387f9dfdSAndroid Build Coastguard Worker 
40*387f9dfdSAndroid Build Coastguard Worker enum {
41*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_UNKNOWN = 0,
42*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_MATCH = 1,
43*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_MISMATCH = 2,
44*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_THIS_THREAD_NULL = 3,
45*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL = 4,
46*387f9dfdSAndroid Build Coastguard Worker   THREAD_STATE_BOTH_NULL = 5,
47*387f9dfdSAndroid Build Coastguard Worker };
48*387f9dfdSAndroid Build Coastguard Worker 
49*387f9dfdSAndroid Build Coastguard Worker enum {
50*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_UNKNOWN = 0,
51*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_MATCH = 1,
52*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_MISMATCH = 2,
53*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_THREAD_STATE_NULL = 3,
54*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_NULL = 4,
55*387f9dfdSAndroid Build Coastguard Worker   PTHREAD_ID_ERROR = 5,
56*387f9dfdSAndroid Build Coastguard Worker };
57*387f9dfdSAndroid Build Coastguard Worker 
58*387f9dfdSAndroid Build Coastguard Worker typedef struct {
59*387f9dfdSAndroid Build Coastguard Worker   int64_t PyObject_type;
60*387f9dfdSAndroid Build Coastguard Worker   int64_t PyTypeObject_name;
61*387f9dfdSAndroid Build Coastguard Worker   int64_t PyThreadState_frame;
62*387f9dfdSAndroid Build Coastguard Worker   int64_t PyThreadState_thread;
63*387f9dfdSAndroid Build Coastguard Worker   int64_t PyFrameObject_back;
64*387f9dfdSAndroid Build Coastguard Worker   int64_t PyFrameObject_code;
65*387f9dfdSAndroid Build Coastguard Worker   int64_t PyFrameObject_lineno;
66*387f9dfdSAndroid Build Coastguard Worker   int64_t PyFrameObject_localsplus;
67*387f9dfdSAndroid Build Coastguard Worker   int64_t PyCodeObject_filename;
68*387f9dfdSAndroid Build Coastguard Worker   int64_t PyCodeObject_name;
69*387f9dfdSAndroid Build Coastguard Worker   int64_t PyCodeObject_varnames;
70*387f9dfdSAndroid Build Coastguard Worker   int64_t PyTupleObject_item;
71*387f9dfdSAndroid Build Coastguard Worker   int64_t String_data;
72*387f9dfdSAndroid Build Coastguard Worker   int64_t String_size;
73*387f9dfdSAndroid Build Coastguard Worker } OffsetConfig;
74*387f9dfdSAndroid Build Coastguard Worker 
75*387f9dfdSAndroid Build Coastguard Worker typedef struct {
76*387f9dfdSAndroid Build Coastguard Worker   uintptr_t current_state_addr; // virtual address of _PyThreadState_Current
77*387f9dfdSAndroid Build Coastguard Worker   uintptr_t tls_key_addr; // virtual address of autoTLSkey for pthreads TLS
78*387f9dfdSAndroid Build Coastguard Worker   uintptr_t gil_locked_addr; // virtual address of gil_locked
79*387f9dfdSAndroid Build Coastguard Worker   uintptr_t gil_last_holder_addr; // virtual address of gil_last_holder
80*387f9dfdSAndroid Build Coastguard Worker   OffsetConfig offsets;
81*387f9dfdSAndroid Build Coastguard Worker } PidData;
82*387f9dfdSAndroid Build Coastguard Worker 
83*387f9dfdSAndroid Build Coastguard Worker typedef struct {
84*387f9dfdSAndroid Build Coastguard Worker   char classname[CLASS_NAME_LEN];
85*387f9dfdSAndroid Build Coastguard Worker   char name[FUNCTION_NAME_LEN];
86*387f9dfdSAndroid Build Coastguard Worker   char file[FILE_NAME_LEN];
87*387f9dfdSAndroid Build Coastguard Worker   // NOTE: PyFrameObject also has line number but it is typically just the
88*387f9dfdSAndroid Build Coastguard Worker   // first line of that function and PyCode_Addr2Line needs to be called
89*387f9dfdSAndroid Build Coastguard Worker   // to get the actual line
90*387f9dfdSAndroid Build Coastguard Worker } Symbol;
91*387f9dfdSAndroid Build Coastguard Worker 
92*387f9dfdSAndroid Build Coastguard Worker typedef struct {
93*387f9dfdSAndroid Build Coastguard Worker   uint32_t pid;
94*387f9dfdSAndroid Build Coastguard Worker   uint32_t tid;
95*387f9dfdSAndroid Build Coastguard Worker   char comm[TASK_COMM_LEN];
96*387f9dfdSAndroid Build Coastguard Worker   uint8_t thread_state_match;
97*387f9dfdSAndroid Build Coastguard Worker   uint8_t gil_state;
98*387f9dfdSAndroid Build Coastguard Worker   uint8_t pthread_id_match;
99*387f9dfdSAndroid Build Coastguard Worker   uint8_t stack_status;
100*387f9dfdSAndroid Build Coastguard Worker   // instead of storing symbol name here directly, we add it to another
101*387f9dfdSAndroid Build Coastguard Worker   // hashmap with Symbols and only store the ids here
102*387f9dfdSAndroid Build Coastguard Worker   int64_t stack_len;
103*387f9dfdSAndroid Build Coastguard Worker   int32_t stack[STACK_MAX_LEN];
104*387f9dfdSAndroid Build Coastguard Worker } Event;
105*387f9dfdSAndroid Build Coastguard Worker 
106*387f9dfdSAndroid Build Coastguard Worker #define _STR_CONCAT(str1, str2) str1##str2
107*387f9dfdSAndroid Build Coastguard Worker #define STR_CONCAT(str1, str2) _STR_CONCAT(str1, str2)
108*387f9dfdSAndroid Build Coastguard Worker #define FAIL_COMPILATION_IF(condition)            \
109*387f9dfdSAndroid Build Coastguard Worker   typedef struct {                                \
110*387f9dfdSAndroid Build Coastguard Worker     char _condition_check[1 - 2 * !!(condition)]; \
111*387f9dfdSAndroid Build Coastguard Worker   } STR_CONCAT(compile_time_condition_check, __COUNTER__);
112*387f9dfdSAndroid Build Coastguard Worker // See comments in get_frame_data
113*387f9dfdSAndroid Build Coastguard Worker FAIL_COMPILATION_IF(sizeof(Symbol) == sizeof(struct bpf_perf_event_value))
114*387f9dfdSAndroid Build Coastguard Worker 
115*387f9dfdSAndroid Build Coastguard Worker typedef struct {
116*387f9dfdSAndroid Build Coastguard Worker   OffsetConfig offsets;
117*387f9dfdSAndroid Build Coastguard Worker   uint64_t cur_cpu;
118*387f9dfdSAndroid Build Coastguard Worker   int64_t symbol_counter;
119*387f9dfdSAndroid Build Coastguard Worker   void* frame_ptr;
120*387f9dfdSAndroid Build Coastguard Worker   int64_t python_stack_prog_call_cnt;
121*387f9dfdSAndroid Build Coastguard Worker   Event event;
122*387f9dfdSAndroid Build Coastguard Worker } sample_state_t;
123*387f9dfdSAndroid Build Coastguard Worker 
124*387f9dfdSAndroid Build Coastguard Worker BPF_PERCPU_ARRAY(state_heap, sample_state_t, 1);
125*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(symbols, Symbol, int32_t, __SYMBOLS_SIZE__);
126*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(pid_config, pid_t, PidData);
127*387f9dfdSAndroid Build Coastguard Worker BPF_PROG_ARRAY(progs, 1);
128*387f9dfdSAndroid Build Coastguard Worker 
129*387f9dfdSAndroid Build Coastguard Worker BPF_PERF_OUTPUT(events);
130*387f9dfdSAndroid Build Coastguard Worker 
131*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) void* get_thread_state(
132*387f9dfdSAndroid Build Coastguard Worker     void* tls_base,
133*387f9dfdSAndroid Build Coastguard Worker     PidData* pid_data) {
134*387f9dfdSAndroid Build Coastguard Worker   // Python sets the thread_state using pthread_setspecific with the key
135*387f9dfdSAndroid Build Coastguard Worker   // stored in a global variable autoTLSkey.
136*387f9dfdSAndroid Build Coastguard Worker   // We read the value of the key from the global variable and then read
137*387f9dfdSAndroid Build Coastguard Worker   // the value in the thread-local storage. This relies on pthread implementation.
138*387f9dfdSAndroid Build Coastguard Worker   // This is basically the same as running the following in GDB:
139*387f9dfdSAndroid Build Coastguard Worker   //  p *(PyThreadState*)((struct pthread*)pthread_self())->
140*387f9dfdSAndroid Build Coastguard Worker   //    specific_1stblock[autoTLSkey]->data
141*387f9dfdSAndroid Build Coastguard Worker   int key;
142*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(&key, sizeof(key), (void*)pid_data->tls_key_addr);
143*387f9dfdSAndroid Build Coastguard Worker   // This assumes autoTLSkey < 32, which means that the TLS is stored in
144*387f9dfdSAndroid Build Coastguard Worker   //   pthread->specific_1stblock[autoTLSkey]
145*387f9dfdSAndroid Build Coastguard Worker   // 0x310 is offsetof(struct pthread, specific_1stblock),
146*387f9dfdSAndroid Build Coastguard Worker   // 0x10 is sizeof(pthread_key_data)
147*387f9dfdSAndroid Build Coastguard Worker   // 0x8 is offsetof(struct pthread_key_data, data)
148*387f9dfdSAndroid Build Coastguard Worker   // 'struct pthread' is not in the public API so we have to hardcode
149*387f9dfdSAndroid Build Coastguard Worker   // the offsets here
150*387f9dfdSAndroid Build Coastguard Worker   void* thread_state;
151*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
152*387f9dfdSAndroid Build Coastguard Worker       &thread_state,
153*387f9dfdSAndroid Build Coastguard Worker       sizeof(thread_state),
154*387f9dfdSAndroid Build Coastguard Worker       tls_base + 0x310 + key * 0x10 + 0x08);
155*387f9dfdSAndroid Build Coastguard Worker   return thread_state;
156*387f9dfdSAndroid Build Coastguard Worker }
157*387f9dfdSAndroid Build Coastguard Worker 
158*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int submit_sample(
159*387f9dfdSAndroid Build Coastguard Worker     struct pt_regs* ctx,
160*387f9dfdSAndroid Build Coastguard Worker     sample_state_t* state) {
161*387f9dfdSAndroid Build Coastguard Worker   events.perf_submit(ctx, &state->event, sizeof(Event));
162*387f9dfdSAndroid Build Coastguard Worker   return 0;
163*387f9dfdSAndroid Build Coastguard Worker }
164*387f9dfdSAndroid Build Coastguard Worker 
165*387f9dfdSAndroid Build Coastguard Worker // this function is trivial, but we need to do map lookup in separate function,
166*387f9dfdSAndroid Build Coastguard Worker // because BCC doesn't allow direct map calls (including lookups) from inside
167*387f9dfdSAndroid Build Coastguard Worker // a macro (which we want to do in GET_STATE() macro below)
168*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) sample_state_t* get_state() {
169*387f9dfdSAndroid Build Coastguard Worker   int zero = 0;
170*387f9dfdSAndroid Build Coastguard Worker   return state_heap.lookup(&zero);
171*387f9dfdSAndroid Build Coastguard Worker }
172*387f9dfdSAndroid Build Coastguard Worker 
173*387f9dfdSAndroid Build Coastguard Worker #define GET_STATE()                     \
174*387f9dfdSAndroid Build Coastguard Worker   sample_state_t* state = get_state();  \
175*387f9dfdSAndroid Build Coastguard Worker   if (!state) {                         \
176*387f9dfdSAndroid Build Coastguard Worker     return 0; /* should never happen */ \
177*387f9dfdSAndroid Build Coastguard Worker   }
178*387f9dfdSAndroid Build Coastguard Worker 
179*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int get_thread_state_match(
180*387f9dfdSAndroid Build Coastguard Worker     void* this_thread_state,
181*387f9dfdSAndroid Build Coastguard Worker     void* global_thread_state) {
182*387f9dfdSAndroid Build Coastguard Worker   if (this_thread_state == 0 && global_thread_state == 0) {
183*387f9dfdSAndroid Build Coastguard Worker     return THREAD_STATE_BOTH_NULL;
184*387f9dfdSAndroid Build Coastguard Worker   }
185*387f9dfdSAndroid Build Coastguard Worker   if (this_thread_state == 0) {
186*387f9dfdSAndroid Build Coastguard Worker     return THREAD_STATE_THIS_THREAD_NULL;
187*387f9dfdSAndroid Build Coastguard Worker   }
188*387f9dfdSAndroid Build Coastguard Worker   if (global_thread_state == 0) {
189*387f9dfdSAndroid Build Coastguard Worker     return THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL;
190*387f9dfdSAndroid Build Coastguard Worker   }
191*387f9dfdSAndroid Build Coastguard Worker   if (this_thread_state == global_thread_state) {
192*387f9dfdSAndroid Build Coastguard Worker     return THREAD_STATE_MATCH;
193*387f9dfdSAndroid Build Coastguard Worker   } else {
194*387f9dfdSAndroid Build Coastguard Worker     return THREAD_STATE_MISMATCH;
195*387f9dfdSAndroid Build Coastguard Worker   }
196*387f9dfdSAndroid Build Coastguard Worker }
197*387f9dfdSAndroid Build Coastguard Worker 
198*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int get_gil_state(
199*387f9dfdSAndroid Build Coastguard Worker     void* this_thread_state,
200*387f9dfdSAndroid Build Coastguard Worker     void* global_thread_state,
201*387f9dfdSAndroid Build Coastguard Worker     PidData* pid_data) {
202*387f9dfdSAndroid Build Coastguard Worker   // Get information of GIL state
203*387f9dfdSAndroid Build Coastguard Worker   if (pid_data->gil_locked_addr == 0 || pid_data->gil_last_holder_addr == 0) {
204*387f9dfdSAndroid Build Coastguard Worker     return GIL_STATE_NO_INFO;
205*387f9dfdSAndroid Build Coastguard Worker   }
206*387f9dfdSAndroid Build Coastguard Worker 
207*387f9dfdSAndroid Build Coastguard Worker   int gil_locked = 0;
208*387f9dfdSAndroid Build Coastguard Worker   void* gil_thread_state = 0;
209*387f9dfdSAndroid Build Coastguard Worker   if (bpf_probe_read_user(
210*387f9dfdSAndroid Build Coastguard Worker           &gil_locked, sizeof(gil_locked), (void*)pid_data->gil_locked_addr)) {
211*387f9dfdSAndroid Build Coastguard Worker     return GIL_STATE_ERROR;
212*387f9dfdSAndroid Build Coastguard Worker   }
213*387f9dfdSAndroid Build Coastguard Worker 
214*387f9dfdSAndroid Build Coastguard Worker   switch (gil_locked) {
215*387f9dfdSAndroid Build Coastguard Worker     case -1:
216*387f9dfdSAndroid Build Coastguard Worker       return GIL_STATE_UNINITIALIZED;
217*387f9dfdSAndroid Build Coastguard Worker     case 0:
218*387f9dfdSAndroid Build Coastguard Worker       return GIL_STATE_NOT_LOCKED;
219*387f9dfdSAndroid Build Coastguard Worker     case 1:
220*387f9dfdSAndroid Build Coastguard Worker       // GIL is held by some Thread
221*387f9dfdSAndroid Build Coastguard Worker       bpf_probe_read_user(
222*387f9dfdSAndroid Build Coastguard Worker           &gil_thread_state,
223*387f9dfdSAndroid Build Coastguard Worker           sizeof(void*),
224*387f9dfdSAndroid Build Coastguard Worker           (void*)pid_data->gil_last_holder_addr);
225*387f9dfdSAndroid Build Coastguard Worker       if (gil_thread_state == this_thread_state) {
226*387f9dfdSAndroid Build Coastguard Worker         return GIL_STATE_THIS_THREAD;
227*387f9dfdSAndroid Build Coastguard Worker       } else if (gil_thread_state == global_thread_state) {
228*387f9dfdSAndroid Build Coastguard Worker         return GIL_STATE_GLOBAL_CURRENT_THREAD;
229*387f9dfdSAndroid Build Coastguard Worker       } else if (gil_thread_state == 0) {
230*387f9dfdSAndroid Build Coastguard Worker         return GIL_STATE_NULL;
231*387f9dfdSAndroid Build Coastguard Worker       } else {
232*387f9dfdSAndroid Build Coastguard Worker         return GIL_STATE_OTHER_THREAD;
233*387f9dfdSAndroid Build Coastguard Worker       }
234*387f9dfdSAndroid Build Coastguard Worker     default:
235*387f9dfdSAndroid Build Coastguard Worker       return GIL_STATE_ERROR;
236*387f9dfdSAndroid Build Coastguard Worker   }
237*387f9dfdSAndroid Build Coastguard Worker }
238*387f9dfdSAndroid Build Coastguard Worker 
239*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int
240*387f9dfdSAndroid Build Coastguard Worker get_pthread_id_match(void* thread_state, void* tls_base, PidData* pid_data) {
241*387f9dfdSAndroid Build Coastguard Worker   if (thread_state == 0) {
242*387f9dfdSAndroid Build Coastguard Worker     return PTHREAD_ID_THREAD_STATE_NULL;
243*387f9dfdSAndroid Build Coastguard Worker   }
244*387f9dfdSAndroid Build Coastguard Worker 
245*387f9dfdSAndroid Build Coastguard Worker   uint64_t pthread_self, pthread_created;
246*387f9dfdSAndroid Build Coastguard Worker 
247*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
248*387f9dfdSAndroid Build Coastguard Worker       &pthread_created,
249*387f9dfdSAndroid Build Coastguard Worker       sizeof(pthread_created),
250*387f9dfdSAndroid Build Coastguard Worker       thread_state + pid_data->offsets.PyThreadState_thread);
251*387f9dfdSAndroid Build Coastguard Worker   if (pthread_created == 0) {
252*387f9dfdSAndroid Build Coastguard Worker     return PTHREAD_ID_NULL;
253*387f9dfdSAndroid Build Coastguard Worker   }
254*387f9dfdSAndroid Build Coastguard Worker 
255*387f9dfdSAndroid Build Coastguard Worker   // 0x10 = offsetof(struct pthread, header.self)
256*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
257*387f9dfdSAndroid Build Coastguard Worker   if (pthread_self == 0) {
258*387f9dfdSAndroid Build Coastguard Worker     return PTHREAD_ID_ERROR;
259*387f9dfdSAndroid Build Coastguard Worker   }
260*387f9dfdSAndroid Build Coastguard Worker 
261*387f9dfdSAndroid Build Coastguard Worker   if (pthread_self == pthread_created) {
262*387f9dfdSAndroid Build Coastguard Worker     return PTHREAD_ID_MATCH;
263*387f9dfdSAndroid Build Coastguard Worker   } else {
264*387f9dfdSAndroid Build Coastguard Worker     return PTHREAD_ID_MISMATCH;
265*387f9dfdSAndroid Build Coastguard Worker   }
266*387f9dfdSAndroid Build Coastguard Worker }
267*387f9dfdSAndroid Build Coastguard Worker 
268*387f9dfdSAndroid Build Coastguard Worker int on_event(struct pt_regs* ctx) {
269*387f9dfdSAndroid Build Coastguard Worker   uint64_t pid_tgid = bpf_get_current_pid_tgid();
270*387f9dfdSAndroid Build Coastguard Worker   pid_t pid = (pid_t)(pid_tgid >> 32);
271*387f9dfdSAndroid Build Coastguard Worker   PidData* pid_data = pid_config.lookup(&pid);
272*387f9dfdSAndroid Build Coastguard Worker   if (!pid_data) {
273*387f9dfdSAndroid Build Coastguard Worker     return 0;
274*387f9dfdSAndroid Build Coastguard Worker   }
275*387f9dfdSAndroid Build Coastguard Worker 
276*387f9dfdSAndroid Build Coastguard Worker   GET_STATE();
277*387f9dfdSAndroid Build Coastguard Worker 
278*387f9dfdSAndroid Build Coastguard Worker   state->offsets = pid_data->offsets;
279*387f9dfdSAndroid Build Coastguard Worker   state->cur_cpu = bpf_get_smp_processor_id();
280*387f9dfdSAndroid Build Coastguard Worker   state->python_stack_prog_call_cnt = 0;
281*387f9dfdSAndroid Build Coastguard Worker 
282*387f9dfdSAndroid Build Coastguard Worker   Event* event = &state->event;
283*387f9dfdSAndroid Build Coastguard Worker   event->pid = pid;
284*387f9dfdSAndroid Build Coastguard Worker   event->tid = (pid_t)pid_tgid;
285*387f9dfdSAndroid Build Coastguard Worker   bpf_get_current_comm(&event->comm, sizeof(event->comm));
286*387f9dfdSAndroid Build Coastguard Worker 
287*387f9dfdSAndroid Build Coastguard Worker   // Get pointer of global PyThreadState, which should belong to the Thread
288*387f9dfdSAndroid Build Coastguard Worker   // currently holds the GIL
289*387f9dfdSAndroid Build Coastguard Worker   void* global_current_thread = (void*)0;
290*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
291*387f9dfdSAndroid Build Coastguard Worker       &global_current_thread,
292*387f9dfdSAndroid Build Coastguard Worker       sizeof(global_current_thread),
293*387f9dfdSAndroid Build Coastguard Worker       (void*)pid_data->current_state_addr);
294*387f9dfdSAndroid Build Coastguard Worker 
295*387f9dfdSAndroid Build Coastguard Worker   struct task_struct* task = (struct task_struct*)bpf_get_current_task();
296*387f9dfdSAndroid Build Coastguard Worker #if __x86_64__
297*387f9dfdSAndroid Build Coastguard Worker // thread_struct->fs was renamed to fsbase in
298*387f9dfdSAndroid Build Coastguard Worker // https://github.com/torvalds/linux/commit/296f781a4b7801ad9c1c0219f9e87b6c25e196fe
299*387f9dfdSAndroid Build Coastguard Worker // so depending on kernel version, we need to account for that
300*387f9dfdSAndroid Build Coastguard Worker #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
301*387f9dfdSAndroid Build Coastguard Worker   void* tls_base = (void*)task->thread.fs;
302*387f9dfdSAndroid Build Coastguard Worker #else
303*387f9dfdSAndroid Build Coastguard Worker   void* tls_base = (void*)task->thread.fsbase;
304*387f9dfdSAndroid Build Coastguard Worker #endif
305*387f9dfdSAndroid Build Coastguard Worker #elif __aarch64__
306*387f9dfdSAndroid Build Coastguard Worker   void* tls_base = (void*)task->thread.tp_value;
307*387f9dfdSAndroid Build Coastguard Worker #else
308*387f9dfdSAndroid Build Coastguard Worker #error "Unsupported platform"
309*387f9dfdSAndroid Build Coastguard Worker #endif
310*387f9dfdSAndroid Build Coastguard Worker 
311*387f9dfdSAndroid Build Coastguard Worker   // Read PyThreadState of this Thread from TLS
312*387f9dfdSAndroid Build Coastguard Worker   void* thread_state = get_thread_state(tls_base, pid_data);
313*387f9dfdSAndroid Build Coastguard Worker 
314*387f9dfdSAndroid Build Coastguard Worker   // Check for matching between TLS PyThreadState and
315*387f9dfdSAndroid Build Coastguard Worker   // the global _PyThreadState_Current
316*387f9dfdSAndroid Build Coastguard Worker   event->thread_state_match =
317*387f9dfdSAndroid Build Coastguard Worker       get_thread_state_match(thread_state, global_current_thread);
318*387f9dfdSAndroid Build Coastguard Worker 
319*387f9dfdSAndroid Build Coastguard Worker   // Read GIL state
320*387f9dfdSAndroid Build Coastguard Worker   event->gil_state =
321*387f9dfdSAndroid Build Coastguard Worker       get_gil_state(thread_state, global_current_thread, pid_data);
322*387f9dfdSAndroid Build Coastguard Worker 
323*387f9dfdSAndroid Build Coastguard Worker   // Check for matching between pthread ID created current PyThreadState and
324*387f9dfdSAndroid Build Coastguard Worker   // pthread of actual current pthread
325*387f9dfdSAndroid Build Coastguard Worker   event->pthread_id_match =
326*387f9dfdSAndroid Build Coastguard Worker       get_pthread_id_match(thread_state, tls_base, pid_data);
327*387f9dfdSAndroid Build Coastguard Worker 
328*387f9dfdSAndroid Build Coastguard Worker   // pre-initialize event struct in case any subprogram below fails
329*387f9dfdSAndroid Build Coastguard Worker   event->stack_status = STACK_STATUS_COMPLETE;
330*387f9dfdSAndroid Build Coastguard Worker   event->stack_len = 0;
331*387f9dfdSAndroid Build Coastguard Worker 
332*387f9dfdSAndroid Build Coastguard Worker   if (thread_state != 0) {
333*387f9dfdSAndroid Build Coastguard Worker     // Get pointer to top frame from PyThreadState
334*387f9dfdSAndroid Build Coastguard Worker     bpf_probe_read_user(
335*387f9dfdSAndroid Build Coastguard Worker         &state->frame_ptr,
336*387f9dfdSAndroid Build Coastguard Worker         sizeof(void*),
337*387f9dfdSAndroid Build Coastguard Worker         thread_state + pid_data->offsets.PyThreadState_frame);
338*387f9dfdSAndroid Build Coastguard Worker     // jump to reading first set of Python frames
339*387f9dfdSAndroid Build Coastguard Worker     progs.call(ctx, PYTHON_STACK_PROG_IDX);
340*387f9dfdSAndroid Build Coastguard Worker     // we won't ever get here
341*387f9dfdSAndroid Build Coastguard Worker   }
342*387f9dfdSAndroid Build Coastguard Worker 
343*387f9dfdSAndroid Build Coastguard Worker   return submit_sample(ctx, state);
344*387f9dfdSAndroid Build Coastguard Worker }
345*387f9dfdSAndroid Build Coastguard Worker 
346*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) void get_names(
347*387f9dfdSAndroid Build Coastguard Worker     void* cur_frame,
348*387f9dfdSAndroid Build Coastguard Worker     void* code_ptr,
349*387f9dfdSAndroid Build Coastguard Worker     OffsetConfig* offsets,
350*387f9dfdSAndroid Build Coastguard Worker     Symbol* symbol,
351*387f9dfdSAndroid Build Coastguard Worker     void* ctx) {
352*387f9dfdSAndroid Build Coastguard Worker   // Figure out if we want to parse class name, basically checking the name of
353*387f9dfdSAndroid Build Coastguard Worker   // the first argument,
354*387f9dfdSAndroid Build Coastguard Worker   //   ((PyTupleObject*)$frame->f_code->co_varnames)->ob_item[0]
355*387f9dfdSAndroid Build Coastguard Worker   // If it's 'self', we get the type and it's name, if it's cls, we just get
356*387f9dfdSAndroid Build Coastguard Worker   // the name. This is not perfect but there is no better way to figure this
357*387f9dfdSAndroid Build Coastguard Worker   // out from the code object.
358*387f9dfdSAndroid Build Coastguard Worker   void* args_ptr;
359*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
360*387f9dfdSAndroid Build Coastguard Worker       &args_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_varnames);
361*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
362*387f9dfdSAndroid Build Coastguard Worker       &args_ptr, sizeof(void*), args_ptr + offsets->PyTupleObject_item);
363*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user_str(
364*387f9dfdSAndroid Build Coastguard Worker       &symbol->name, sizeof(symbol->name), args_ptr + offsets->String_data);
365*387f9dfdSAndroid Build Coastguard Worker 
366*387f9dfdSAndroid Build Coastguard Worker   // compare strings as ints to save instructions
367*387f9dfdSAndroid Build Coastguard Worker   char self_str[4] = {'s', 'e', 'l', 'f'};
368*387f9dfdSAndroid Build Coastguard Worker   char cls_str[4] = {'c', 'l', 's', '\0'};
369*387f9dfdSAndroid Build Coastguard Worker   bool first_self = *(int32_t*)symbol->name == *(int32_t*)self_str;
370*387f9dfdSAndroid Build Coastguard Worker   bool first_cls = *(int32_t*)symbol->name == *(int32_t*)cls_str;
371*387f9dfdSAndroid Build Coastguard Worker 
372*387f9dfdSAndroid Build Coastguard Worker   // We re-use the same Symbol instance across loop iterations, which means
373*387f9dfdSAndroid Build Coastguard Worker   // we will have left-over data in the struct. Although this won't affect
374*387f9dfdSAndroid Build Coastguard Worker   // correctness of the result because we have '\0' at end of the strings read,
375*387f9dfdSAndroid Build Coastguard Worker   // it would affect effectiveness of the deduplication.
376*387f9dfdSAndroid Build Coastguard Worker   // Helper bpf_perf_prog_read_value clears the buffer on error, so here we
377*387f9dfdSAndroid Build Coastguard Worker   // (ab)use this behavior to clear the memory. It requires the size of Symbol
378*387f9dfdSAndroid Build Coastguard Worker   // to be different from struct bpf_perf_event_value, which we check at
379*387f9dfdSAndroid Build Coastguard Worker   // compilation time using the FAIL_COMPILATION_IF macro.
380*387f9dfdSAndroid Build Coastguard Worker   bpf_perf_prog_read_value(ctx, symbol, sizeof(Symbol));
381*387f9dfdSAndroid Build Coastguard Worker 
382*387f9dfdSAndroid Build Coastguard Worker   // Read class name from $frame->f_localsplus[0]->ob_type->tp_name.
383*387f9dfdSAndroid Build Coastguard Worker   if (first_self || first_cls) {
384*387f9dfdSAndroid Build Coastguard Worker     void* ptr;
385*387f9dfdSAndroid Build Coastguard Worker     bpf_probe_read_user(
386*387f9dfdSAndroid Build Coastguard Worker         &ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_localsplus);
387*387f9dfdSAndroid Build Coastguard Worker     if (first_self) {
388*387f9dfdSAndroid Build Coastguard Worker       // we are working with an instance, first we need to get type
389*387f9dfdSAndroid Build Coastguard Worker       bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyObject_type);
390*387f9dfdSAndroid Build Coastguard Worker     }
391*387f9dfdSAndroid Build Coastguard Worker     bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyTypeObject_name);
392*387f9dfdSAndroid Build Coastguard Worker     bpf_probe_read_user_str(&symbol->classname, sizeof(symbol->classname), ptr);
393*387f9dfdSAndroid Build Coastguard Worker   }
394*387f9dfdSAndroid Build Coastguard Worker 
395*387f9dfdSAndroid Build Coastguard Worker   void* pystr_ptr;
396*387f9dfdSAndroid Build Coastguard Worker   // read PyCodeObject's filename into symbol
397*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
398*387f9dfdSAndroid Build Coastguard Worker       &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_filename);
399*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user_str(
400*387f9dfdSAndroid Build Coastguard Worker       &symbol->file, sizeof(symbol->file), pystr_ptr + offsets->String_data);
401*387f9dfdSAndroid Build Coastguard Worker   // read PyCodeObject's name into symbol
402*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
403*387f9dfdSAndroid Build Coastguard Worker       &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_name);
404*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user_str(
405*387f9dfdSAndroid Build Coastguard Worker       &symbol->name, sizeof(symbol->name), pystr_ptr + offsets->String_data);
406*387f9dfdSAndroid Build Coastguard Worker }
407*387f9dfdSAndroid Build Coastguard Worker 
408*387f9dfdSAndroid Build Coastguard Worker // get_frame_data reads current PyFrameObject filename/name and updates
409*387f9dfdSAndroid Build Coastguard Worker // stack_info->frame_ptr with pointer to next PyFrameObject
410*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) bool get_frame_data(
411*387f9dfdSAndroid Build Coastguard Worker     void** frame_ptr,
412*387f9dfdSAndroid Build Coastguard Worker     OffsetConfig* offsets,
413*387f9dfdSAndroid Build Coastguard Worker     Symbol* symbol,
414*387f9dfdSAndroid Build Coastguard Worker     // ctx is only used to call helper to clear symbol, see documentation below
415*387f9dfdSAndroid Build Coastguard Worker     void* ctx) {
416*387f9dfdSAndroid Build Coastguard Worker   void* cur_frame = *frame_ptr;
417*387f9dfdSAndroid Build Coastguard Worker   if (!cur_frame) {
418*387f9dfdSAndroid Build Coastguard Worker     return false;
419*387f9dfdSAndroid Build Coastguard Worker   }
420*387f9dfdSAndroid Build Coastguard Worker   void* code_ptr;
421*387f9dfdSAndroid Build Coastguard Worker   // read PyCodeObject first, if that fails, then no point reading next frame
422*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
423*387f9dfdSAndroid Build Coastguard Worker       &code_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_code);
424*387f9dfdSAndroid Build Coastguard Worker   if (!code_ptr) {
425*387f9dfdSAndroid Build Coastguard Worker     return false;
426*387f9dfdSAndroid Build Coastguard Worker   }
427*387f9dfdSAndroid Build Coastguard Worker 
428*387f9dfdSAndroid Build Coastguard Worker   get_names(cur_frame, code_ptr, offsets, symbol, ctx);
429*387f9dfdSAndroid Build Coastguard Worker 
430*387f9dfdSAndroid Build Coastguard Worker   // read next PyFrameObject pointer, update in place
431*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(
432*387f9dfdSAndroid Build Coastguard Worker       frame_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_back);
433*387f9dfdSAndroid Build Coastguard Worker 
434*387f9dfdSAndroid Build Coastguard Worker   return true;
435*387f9dfdSAndroid Build Coastguard Worker }
436*387f9dfdSAndroid Build Coastguard Worker 
437*387f9dfdSAndroid Build Coastguard Worker // To avoid duplicate ids, every CPU needs to use different ids when inserting
438*387f9dfdSAndroid Build Coastguard Worker // into the hashmap. NUM_CPUS is defined at PyPerf backend side and passed
439*387f9dfdSAndroid Build Coastguard Worker // through CFlag.
440*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int64_t get_symbol_id(
441*387f9dfdSAndroid Build Coastguard Worker     sample_state_t* state,
442*387f9dfdSAndroid Build Coastguard Worker     Symbol* sym) {
443*387f9dfdSAndroid Build Coastguard Worker   int32_t* symbol_id_ptr = symbols.lookup(sym);
444*387f9dfdSAndroid Build Coastguard Worker   if (symbol_id_ptr) {
445*387f9dfdSAndroid Build Coastguard Worker     return *symbol_id_ptr;
446*387f9dfdSAndroid Build Coastguard Worker   }
447*387f9dfdSAndroid Build Coastguard Worker   // the symbol is new, bump the counter
448*387f9dfdSAndroid Build Coastguard Worker   int32_t symbol_id = state->symbol_counter * NUM_CPUS + state->cur_cpu;
449*387f9dfdSAndroid Build Coastguard Worker   state->symbol_counter++;
450*387f9dfdSAndroid Build Coastguard Worker   symbols.update(sym, &symbol_id);
451*387f9dfdSAndroid Build Coastguard Worker   return symbol_id;
452*387f9dfdSAndroid Build Coastguard Worker }
453*387f9dfdSAndroid Build Coastguard Worker 
454*387f9dfdSAndroid Build Coastguard Worker int read_python_stack(struct pt_regs* ctx) {
455*387f9dfdSAndroid Build Coastguard Worker   GET_STATE();
456*387f9dfdSAndroid Build Coastguard Worker 
457*387f9dfdSAndroid Build Coastguard Worker   state->python_stack_prog_call_cnt++;
458*387f9dfdSAndroid Build Coastguard Worker   Event* sample = &state->event;
459*387f9dfdSAndroid Build Coastguard Worker 
460*387f9dfdSAndroid Build Coastguard Worker   Symbol sym = {};
461*387f9dfdSAndroid Build Coastguard Worker   bool last_res = false;
462*387f9dfdSAndroid Build Coastguard Worker #pragma unroll
463*387f9dfdSAndroid Build Coastguard Worker   for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) {
464*387f9dfdSAndroid Build Coastguard Worker     last_res = get_frame_data(&state->frame_ptr, &state->offsets, &sym, ctx);
465*387f9dfdSAndroid Build Coastguard Worker     if (last_res) {
466*387f9dfdSAndroid Build Coastguard Worker       uint32_t symbol_id = get_symbol_id(state, &sym);
467*387f9dfdSAndroid Build Coastguard Worker       int64_t cur_len = sample->stack_len;
468*387f9dfdSAndroid Build Coastguard Worker       if (cur_len >= 0 && cur_len < STACK_MAX_LEN) {
469*387f9dfdSAndroid Build Coastguard Worker         sample->stack[cur_len] = symbol_id;
470*387f9dfdSAndroid Build Coastguard Worker         sample->stack_len++;
471*387f9dfdSAndroid Build Coastguard Worker       }
472*387f9dfdSAndroid Build Coastguard Worker     }
473*387f9dfdSAndroid Build Coastguard Worker   }
474*387f9dfdSAndroid Build Coastguard Worker 
475*387f9dfdSAndroid Build Coastguard Worker   if (!state->frame_ptr) {
476*387f9dfdSAndroid Build Coastguard Worker     sample->stack_status = STACK_STATUS_COMPLETE;
477*387f9dfdSAndroid Build Coastguard Worker   } else {
478*387f9dfdSAndroid Build Coastguard Worker     if (!last_res) {
479*387f9dfdSAndroid Build Coastguard Worker       sample->stack_status = STACK_STATUS_ERROR;
480*387f9dfdSAndroid Build Coastguard Worker     } else {
481*387f9dfdSAndroid Build Coastguard Worker       sample->stack_status = STACK_STATUS_TRUNCATED;
482*387f9dfdSAndroid Build Coastguard Worker     }
483*387f9dfdSAndroid Build Coastguard Worker   }
484*387f9dfdSAndroid Build Coastguard Worker 
485*387f9dfdSAndroid Build Coastguard Worker   if (sample->stack_status == STACK_STATUS_TRUNCATED &&
486*387f9dfdSAndroid Build Coastguard Worker       state->python_stack_prog_call_cnt < PYTHON_STACK_PROG_CNT) {
487*387f9dfdSAndroid Build Coastguard Worker     // read next batch of frames
488*387f9dfdSAndroid Build Coastguard Worker     progs.call(ctx, PYTHON_STACK_PROG_IDX);
489*387f9dfdSAndroid Build Coastguard Worker   }
490*387f9dfdSAndroid Build Coastguard Worker 
491*387f9dfdSAndroid Build Coastguard Worker   return submit_sample(ctx, state);
492*387f9dfdSAndroid Build Coastguard Worker }
493*387f9dfdSAndroid Build Coastguard Worker )";
494*387f9dfdSAndroid Build Coastguard Worker 
495*387f9dfdSAndroid Build Coastguard Worker }
496*387f9dfdSAndroid Build Coastguard Worker }  // namespace ebpf
497