1*387f9dfdSAndroid Build Coastguard Worker /* 2*387f9dfdSAndroid Build Coastguard Worker * Copyright (c) Facebook, Inc. 3*387f9dfdSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License") 4*387f9dfdSAndroid Build Coastguard Worker */ 5*387f9dfdSAndroid Build Coastguard Worker 6*387f9dfdSAndroid Build Coastguard Worker #include <string> 7*387f9dfdSAndroid Build Coastguard Worker 8*387f9dfdSAndroid Build Coastguard Worker namespace ebpf { 9*387f9dfdSAndroid Build Coastguard Worker namespace pyperf { 10*387f9dfdSAndroid Build Coastguard Worker 11*387f9dfdSAndroid Build Coastguard Worker extern const std::string PYPERF_BPF_PROGRAM = R"( 12*387f9dfdSAndroid Build Coastguard Worker #include <linux/sched.h> 13*387f9dfdSAndroid Build Coastguard Worker #include <uapi/linux/ptrace.h> 14*387f9dfdSAndroid Build Coastguard Worker 15*387f9dfdSAndroid Build Coastguard Worker #define PYTHON_STACK_FRAMES_PER_PROG 25 16*387f9dfdSAndroid Build Coastguard Worker #define PYTHON_STACK_PROG_CNT 3 17*387f9dfdSAndroid Build Coastguard Worker #define STACK_MAX_LEN (PYTHON_STACK_FRAMES_PER_PROG * PYTHON_STACK_PROG_CNT) 18*387f9dfdSAndroid Build Coastguard Worker #define CLASS_NAME_LEN 32 19*387f9dfdSAndroid Build Coastguard Worker #define FUNCTION_NAME_LEN 64 20*387f9dfdSAndroid Build Coastguard Worker #define FILE_NAME_LEN 128 21*387f9dfdSAndroid Build Coastguard Worker #define TASK_COMM_LEN 16 22*387f9dfdSAndroid Build Coastguard Worker 23*387f9dfdSAndroid Build Coastguard Worker enum { 24*387f9dfdSAndroid Build Coastguard Worker STACK_STATUS_COMPLETE = 0, 25*387f9dfdSAndroid Build Coastguard Worker STACK_STATUS_ERROR = 1, 26*387f9dfdSAndroid Build Coastguard Worker STACK_STATUS_TRUNCATED = 2, 27*387f9dfdSAndroid Build Coastguard Worker }; 28*387f9dfdSAndroid Build Coastguard Worker 29*387f9dfdSAndroid Build Coastguard Worker enum { 30*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_NO_INFO = 0, 31*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_ERROR = 1, 32*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_UNINITIALIZED = 2, 33*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_NOT_LOCKED = 3, 34*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_THIS_THREAD = 4, 35*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_GLOBAL_CURRENT_THREAD = 5, 36*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_OTHER_THREAD = 6, 37*387f9dfdSAndroid Build Coastguard Worker GIL_STATE_NULL = 7, 38*387f9dfdSAndroid Build Coastguard Worker }; 39*387f9dfdSAndroid Build Coastguard Worker 40*387f9dfdSAndroid Build Coastguard Worker enum { 41*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_UNKNOWN = 0, 42*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_MATCH = 1, 43*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_MISMATCH = 2, 44*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_THIS_THREAD_NULL = 3, 45*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL = 4, 46*387f9dfdSAndroid Build Coastguard Worker THREAD_STATE_BOTH_NULL = 5, 47*387f9dfdSAndroid Build Coastguard Worker }; 48*387f9dfdSAndroid Build Coastguard Worker 49*387f9dfdSAndroid Build Coastguard Worker enum { 50*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_UNKNOWN = 0, 51*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_MATCH = 1, 52*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_MISMATCH = 2, 53*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_THREAD_STATE_NULL = 3, 54*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_NULL = 4, 55*387f9dfdSAndroid Build Coastguard Worker PTHREAD_ID_ERROR = 5, 56*387f9dfdSAndroid Build Coastguard Worker }; 57*387f9dfdSAndroid Build Coastguard Worker 58*387f9dfdSAndroid Build Coastguard Worker typedef struct { 59*387f9dfdSAndroid Build Coastguard Worker int64_t PyObject_type; 60*387f9dfdSAndroid Build Coastguard Worker int64_t PyTypeObject_name; 61*387f9dfdSAndroid Build Coastguard Worker int64_t PyThreadState_frame; 62*387f9dfdSAndroid Build Coastguard Worker int64_t PyThreadState_thread; 63*387f9dfdSAndroid Build Coastguard Worker int64_t PyFrameObject_back; 64*387f9dfdSAndroid Build Coastguard Worker int64_t PyFrameObject_code; 65*387f9dfdSAndroid Build Coastguard Worker int64_t PyFrameObject_lineno; 66*387f9dfdSAndroid Build Coastguard Worker int64_t PyFrameObject_localsplus; 67*387f9dfdSAndroid Build Coastguard Worker int64_t PyCodeObject_filename; 68*387f9dfdSAndroid Build Coastguard Worker int64_t PyCodeObject_name; 69*387f9dfdSAndroid Build Coastguard Worker int64_t PyCodeObject_varnames; 70*387f9dfdSAndroid Build Coastguard Worker int64_t PyTupleObject_item; 71*387f9dfdSAndroid Build Coastguard Worker int64_t String_data; 72*387f9dfdSAndroid Build Coastguard Worker int64_t String_size; 73*387f9dfdSAndroid Build Coastguard Worker } OffsetConfig; 74*387f9dfdSAndroid Build Coastguard Worker 75*387f9dfdSAndroid Build Coastguard Worker typedef struct { 76*387f9dfdSAndroid Build Coastguard Worker uintptr_t current_state_addr; // virtual address of _PyThreadState_Current 77*387f9dfdSAndroid Build Coastguard Worker uintptr_t tls_key_addr; // virtual address of autoTLSkey for pthreads TLS 78*387f9dfdSAndroid Build Coastguard Worker uintptr_t gil_locked_addr; // virtual address of gil_locked 79*387f9dfdSAndroid Build Coastguard Worker uintptr_t gil_last_holder_addr; // virtual address of gil_last_holder 80*387f9dfdSAndroid Build Coastguard Worker OffsetConfig offsets; 81*387f9dfdSAndroid Build Coastguard Worker } PidData; 82*387f9dfdSAndroid Build Coastguard Worker 83*387f9dfdSAndroid Build Coastguard Worker typedef struct { 84*387f9dfdSAndroid Build Coastguard Worker char classname[CLASS_NAME_LEN]; 85*387f9dfdSAndroid Build Coastguard Worker char name[FUNCTION_NAME_LEN]; 86*387f9dfdSAndroid Build Coastguard Worker char file[FILE_NAME_LEN]; 87*387f9dfdSAndroid Build Coastguard Worker // NOTE: PyFrameObject also has line number but it is typically just the 88*387f9dfdSAndroid Build Coastguard Worker // first line of that function and PyCode_Addr2Line needs to be called 89*387f9dfdSAndroid Build Coastguard Worker // to get the actual line 90*387f9dfdSAndroid Build Coastguard Worker } Symbol; 91*387f9dfdSAndroid Build Coastguard Worker 92*387f9dfdSAndroid Build Coastguard Worker typedef struct { 93*387f9dfdSAndroid Build Coastguard Worker uint32_t pid; 94*387f9dfdSAndroid Build Coastguard Worker uint32_t tid; 95*387f9dfdSAndroid Build Coastguard Worker char comm[TASK_COMM_LEN]; 96*387f9dfdSAndroid Build Coastguard Worker uint8_t thread_state_match; 97*387f9dfdSAndroid Build Coastguard Worker uint8_t gil_state; 98*387f9dfdSAndroid Build Coastguard Worker uint8_t pthread_id_match; 99*387f9dfdSAndroid Build Coastguard Worker uint8_t stack_status; 100*387f9dfdSAndroid Build Coastguard Worker // instead of storing symbol name here directly, we add it to another 101*387f9dfdSAndroid Build Coastguard Worker // hashmap with Symbols and only store the ids here 102*387f9dfdSAndroid Build Coastguard Worker int64_t stack_len; 103*387f9dfdSAndroid Build Coastguard Worker int32_t stack[STACK_MAX_LEN]; 104*387f9dfdSAndroid Build Coastguard Worker } Event; 105*387f9dfdSAndroid Build Coastguard Worker 106*387f9dfdSAndroid Build Coastguard Worker #define _STR_CONCAT(str1, str2) str1##str2 107*387f9dfdSAndroid Build Coastguard Worker #define STR_CONCAT(str1, str2) _STR_CONCAT(str1, str2) 108*387f9dfdSAndroid Build Coastguard Worker #define FAIL_COMPILATION_IF(condition) \ 109*387f9dfdSAndroid Build Coastguard Worker typedef struct { \ 110*387f9dfdSAndroid Build Coastguard Worker char _condition_check[1 - 2 * !!(condition)]; \ 111*387f9dfdSAndroid Build Coastguard Worker } STR_CONCAT(compile_time_condition_check, __COUNTER__); 112*387f9dfdSAndroid Build Coastguard Worker // See comments in get_frame_data 113*387f9dfdSAndroid Build Coastguard Worker FAIL_COMPILATION_IF(sizeof(Symbol) == sizeof(struct bpf_perf_event_value)) 114*387f9dfdSAndroid Build Coastguard Worker 115*387f9dfdSAndroid Build Coastguard Worker typedef struct { 116*387f9dfdSAndroid Build Coastguard Worker OffsetConfig offsets; 117*387f9dfdSAndroid Build Coastguard Worker uint64_t cur_cpu; 118*387f9dfdSAndroid Build Coastguard Worker int64_t symbol_counter; 119*387f9dfdSAndroid Build Coastguard Worker void* frame_ptr; 120*387f9dfdSAndroid Build Coastguard Worker int64_t python_stack_prog_call_cnt; 121*387f9dfdSAndroid Build Coastguard Worker Event event; 122*387f9dfdSAndroid Build Coastguard Worker } sample_state_t; 123*387f9dfdSAndroid Build Coastguard Worker 124*387f9dfdSAndroid Build Coastguard Worker BPF_PERCPU_ARRAY(state_heap, sample_state_t, 1); 125*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(symbols, Symbol, int32_t, __SYMBOLS_SIZE__); 126*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(pid_config, pid_t, PidData); 127*387f9dfdSAndroid Build Coastguard Worker BPF_PROG_ARRAY(progs, 1); 128*387f9dfdSAndroid Build Coastguard Worker 129*387f9dfdSAndroid Build Coastguard Worker BPF_PERF_OUTPUT(events); 130*387f9dfdSAndroid Build Coastguard Worker 131*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) void* get_thread_state( 132*387f9dfdSAndroid Build Coastguard Worker void* tls_base, 133*387f9dfdSAndroid Build Coastguard Worker PidData* pid_data) { 134*387f9dfdSAndroid Build Coastguard Worker // Python sets the thread_state using pthread_setspecific with the key 135*387f9dfdSAndroid Build Coastguard Worker // stored in a global variable autoTLSkey. 136*387f9dfdSAndroid Build Coastguard Worker // We read the value of the key from the global variable and then read 137*387f9dfdSAndroid Build Coastguard Worker // the value in the thread-local storage. This relies on pthread implementation. 138*387f9dfdSAndroid Build Coastguard Worker // This is basically the same as running the following in GDB: 139*387f9dfdSAndroid Build Coastguard Worker // p *(PyThreadState*)((struct pthread*)pthread_self())-> 140*387f9dfdSAndroid Build Coastguard Worker // specific_1stblock[autoTLSkey]->data 141*387f9dfdSAndroid Build Coastguard Worker int key; 142*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user(&key, sizeof(key), (void*)pid_data->tls_key_addr); 143*387f9dfdSAndroid Build Coastguard Worker // This assumes autoTLSkey < 32, which means that the TLS is stored in 144*387f9dfdSAndroid Build Coastguard Worker // pthread->specific_1stblock[autoTLSkey] 145*387f9dfdSAndroid Build Coastguard Worker // 0x310 is offsetof(struct pthread, specific_1stblock), 146*387f9dfdSAndroid Build Coastguard Worker // 0x10 is sizeof(pthread_key_data) 147*387f9dfdSAndroid Build Coastguard Worker // 0x8 is offsetof(struct pthread_key_data, data) 148*387f9dfdSAndroid Build Coastguard Worker // 'struct pthread' is not in the public API so we have to hardcode 149*387f9dfdSAndroid Build Coastguard Worker // the offsets here 150*387f9dfdSAndroid Build Coastguard Worker void* thread_state; 151*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 152*387f9dfdSAndroid Build Coastguard Worker &thread_state, 153*387f9dfdSAndroid Build Coastguard Worker sizeof(thread_state), 154*387f9dfdSAndroid Build Coastguard Worker tls_base + 0x310 + key * 0x10 + 0x08); 155*387f9dfdSAndroid Build Coastguard Worker return thread_state; 156*387f9dfdSAndroid Build Coastguard Worker } 157*387f9dfdSAndroid Build Coastguard Worker 158*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int submit_sample( 159*387f9dfdSAndroid Build Coastguard Worker struct pt_regs* ctx, 160*387f9dfdSAndroid Build Coastguard Worker sample_state_t* state) { 161*387f9dfdSAndroid Build Coastguard Worker events.perf_submit(ctx, &state->event, sizeof(Event)); 162*387f9dfdSAndroid Build Coastguard Worker return 0; 163*387f9dfdSAndroid Build Coastguard Worker } 164*387f9dfdSAndroid Build Coastguard Worker 165*387f9dfdSAndroid Build Coastguard Worker // this function is trivial, but we need to do map lookup in separate function, 166*387f9dfdSAndroid Build Coastguard Worker // because BCC doesn't allow direct map calls (including lookups) from inside 167*387f9dfdSAndroid Build Coastguard Worker // a macro (which we want to do in GET_STATE() macro below) 168*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) sample_state_t* get_state() { 169*387f9dfdSAndroid Build Coastguard Worker int zero = 0; 170*387f9dfdSAndroid Build Coastguard Worker return state_heap.lookup(&zero); 171*387f9dfdSAndroid Build Coastguard Worker } 172*387f9dfdSAndroid Build Coastguard Worker 173*387f9dfdSAndroid Build Coastguard Worker #define GET_STATE() \ 174*387f9dfdSAndroid Build Coastguard Worker sample_state_t* state = get_state(); \ 175*387f9dfdSAndroid Build Coastguard Worker if (!state) { \ 176*387f9dfdSAndroid Build Coastguard Worker return 0; /* should never happen */ \ 177*387f9dfdSAndroid Build Coastguard Worker } 178*387f9dfdSAndroid Build Coastguard Worker 179*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int get_thread_state_match( 180*387f9dfdSAndroid Build Coastguard Worker void* this_thread_state, 181*387f9dfdSAndroid Build Coastguard Worker void* global_thread_state) { 182*387f9dfdSAndroid Build Coastguard Worker if (this_thread_state == 0 && global_thread_state == 0) { 183*387f9dfdSAndroid Build Coastguard Worker return THREAD_STATE_BOTH_NULL; 184*387f9dfdSAndroid Build Coastguard Worker } 185*387f9dfdSAndroid Build Coastguard Worker if (this_thread_state == 0) { 186*387f9dfdSAndroid Build Coastguard Worker return THREAD_STATE_THIS_THREAD_NULL; 187*387f9dfdSAndroid Build Coastguard Worker } 188*387f9dfdSAndroid Build Coastguard Worker if (global_thread_state == 0) { 189*387f9dfdSAndroid Build Coastguard Worker return THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL; 190*387f9dfdSAndroid Build Coastguard Worker } 191*387f9dfdSAndroid Build Coastguard Worker if (this_thread_state == global_thread_state) { 192*387f9dfdSAndroid Build Coastguard Worker return THREAD_STATE_MATCH; 193*387f9dfdSAndroid Build Coastguard Worker } else { 194*387f9dfdSAndroid Build Coastguard Worker return THREAD_STATE_MISMATCH; 195*387f9dfdSAndroid Build Coastguard Worker } 196*387f9dfdSAndroid Build Coastguard Worker } 197*387f9dfdSAndroid Build Coastguard Worker 198*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int get_gil_state( 199*387f9dfdSAndroid Build Coastguard Worker void* this_thread_state, 200*387f9dfdSAndroid Build Coastguard Worker void* global_thread_state, 201*387f9dfdSAndroid Build Coastguard Worker PidData* pid_data) { 202*387f9dfdSAndroid Build Coastguard Worker // Get information of GIL state 203*387f9dfdSAndroid Build Coastguard Worker if (pid_data->gil_locked_addr == 0 || pid_data->gil_last_holder_addr == 0) { 204*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_NO_INFO; 205*387f9dfdSAndroid Build Coastguard Worker } 206*387f9dfdSAndroid Build Coastguard Worker 207*387f9dfdSAndroid Build Coastguard Worker int gil_locked = 0; 208*387f9dfdSAndroid Build Coastguard Worker void* gil_thread_state = 0; 209*387f9dfdSAndroid Build Coastguard Worker if (bpf_probe_read_user( 210*387f9dfdSAndroid Build Coastguard Worker &gil_locked, sizeof(gil_locked), (void*)pid_data->gil_locked_addr)) { 211*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_ERROR; 212*387f9dfdSAndroid Build Coastguard Worker } 213*387f9dfdSAndroid Build Coastguard Worker 214*387f9dfdSAndroid Build Coastguard Worker switch (gil_locked) { 215*387f9dfdSAndroid Build Coastguard Worker case -1: 216*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_UNINITIALIZED; 217*387f9dfdSAndroid Build Coastguard Worker case 0: 218*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_NOT_LOCKED; 219*387f9dfdSAndroid Build Coastguard Worker case 1: 220*387f9dfdSAndroid Build Coastguard Worker // GIL is held by some Thread 221*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 222*387f9dfdSAndroid Build Coastguard Worker &gil_thread_state, 223*387f9dfdSAndroid Build Coastguard Worker sizeof(void*), 224*387f9dfdSAndroid Build Coastguard Worker (void*)pid_data->gil_last_holder_addr); 225*387f9dfdSAndroid Build Coastguard Worker if (gil_thread_state == this_thread_state) { 226*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_THIS_THREAD; 227*387f9dfdSAndroid Build Coastguard Worker } else if (gil_thread_state == global_thread_state) { 228*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_GLOBAL_CURRENT_THREAD; 229*387f9dfdSAndroid Build Coastguard Worker } else if (gil_thread_state == 0) { 230*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_NULL; 231*387f9dfdSAndroid Build Coastguard Worker } else { 232*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_OTHER_THREAD; 233*387f9dfdSAndroid Build Coastguard Worker } 234*387f9dfdSAndroid Build Coastguard Worker default: 235*387f9dfdSAndroid Build Coastguard Worker return GIL_STATE_ERROR; 236*387f9dfdSAndroid Build Coastguard Worker } 237*387f9dfdSAndroid Build Coastguard Worker } 238*387f9dfdSAndroid Build Coastguard Worker 239*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int 240*387f9dfdSAndroid Build Coastguard Worker get_pthread_id_match(void* thread_state, void* tls_base, PidData* pid_data) { 241*387f9dfdSAndroid Build Coastguard Worker if (thread_state == 0) { 242*387f9dfdSAndroid Build Coastguard Worker return PTHREAD_ID_THREAD_STATE_NULL; 243*387f9dfdSAndroid Build Coastguard Worker } 244*387f9dfdSAndroid Build Coastguard Worker 245*387f9dfdSAndroid Build Coastguard Worker uint64_t pthread_self, pthread_created; 246*387f9dfdSAndroid Build Coastguard Worker 247*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 248*387f9dfdSAndroid Build Coastguard Worker &pthread_created, 249*387f9dfdSAndroid Build Coastguard Worker sizeof(pthread_created), 250*387f9dfdSAndroid Build Coastguard Worker thread_state + pid_data->offsets.PyThreadState_thread); 251*387f9dfdSAndroid Build Coastguard Worker if (pthread_created == 0) { 252*387f9dfdSAndroid Build Coastguard Worker return PTHREAD_ID_NULL; 253*387f9dfdSAndroid Build Coastguard Worker } 254*387f9dfdSAndroid Build Coastguard Worker 255*387f9dfdSAndroid Build Coastguard Worker // 0x10 = offsetof(struct pthread, header.self) 256*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user(&pthread_self, sizeof(pthread_self), tls_base + 0x10); 257*387f9dfdSAndroid Build Coastguard Worker if (pthread_self == 0) { 258*387f9dfdSAndroid Build Coastguard Worker return PTHREAD_ID_ERROR; 259*387f9dfdSAndroid Build Coastguard Worker } 260*387f9dfdSAndroid Build Coastguard Worker 261*387f9dfdSAndroid Build Coastguard Worker if (pthread_self == pthread_created) { 262*387f9dfdSAndroid Build Coastguard Worker return PTHREAD_ID_MATCH; 263*387f9dfdSAndroid Build Coastguard Worker } else { 264*387f9dfdSAndroid Build Coastguard Worker return PTHREAD_ID_MISMATCH; 265*387f9dfdSAndroid Build Coastguard Worker } 266*387f9dfdSAndroid Build Coastguard Worker } 267*387f9dfdSAndroid Build Coastguard Worker 268*387f9dfdSAndroid Build Coastguard Worker int on_event(struct pt_regs* ctx) { 269*387f9dfdSAndroid Build Coastguard Worker uint64_t pid_tgid = bpf_get_current_pid_tgid(); 270*387f9dfdSAndroid Build Coastguard Worker pid_t pid = (pid_t)(pid_tgid >> 32); 271*387f9dfdSAndroid Build Coastguard Worker PidData* pid_data = pid_config.lookup(&pid); 272*387f9dfdSAndroid Build Coastguard Worker if (!pid_data) { 273*387f9dfdSAndroid Build Coastguard Worker return 0; 274*387f9dfdSAndroid Build Coastguard Worker } 275*387f9dfdSAndroid Build Coastguard Worker 276*387f9dfdSAndroid Build Coastguard Worker GET_STATE(); 277*387f9dfdSAndroid Build Coastguard Worker 278*387f9dfdSAndroid Build Coastguard Worker state->offsets = pid_data->offsets; 279*387f9dfdSAndroid Build Coastguard Worker state->cur_cpu = bpf_get_smp_processor_id(); 280*387f9dfdSAndroid Build Coastguard Worker state->python_stack_prog_call_cnt = 0; 281*387f9dfdSAndroid Build Coastguard Worker 282*387f9dfdSAndroid Build Coastguard Worker Event* event = &state->event; 283*387f9dfdSAndroid Build Coastguard Worker event->pid = pid; 284*387f9dfdSAndroid Build Coastguard Worker event->tid = (pid_t)pid_tgid; 285*387f9dfdSAndroid Build Coastguard Worker bpf_get_current_comm(&event->comm, sizeof(event->comm)); 286*387f9dfdSAndroid Build Coastguard Worker 287*387f9dfdSAndroid Build Coastguard Worker // Get pointer of global PyThreadState, which should belong to the Thread 288*387f9dfdSAndroid Build Coastguard Worker // currently holds the GIL 289*387f9dfdSAndroid Build Coastguard Worker void* global_current_thread = (void*)0; 290*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 291*387f9dfdSAndroid Build Coastguard Worker &global_current_thread, 292*387f9dfdSAndroid Build Coastguard Worker sizeof(global_current_thread), 293*387f9dfdSAndroid Build Coastguard Worker (void*)pid_data->current_state_addr); 294*387f9dfdSAndroid Build Coastguard Worker 295*387f9dfdSAndroid Build Coastguard Worker struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 296*387f9dfdSAndroid Build Coastguard Worker #if __x86_64__ 297*387f9dfdSAndroid Build Coastguard Worker // thread_struct->fs was renamed to fsbase in 298*387f9dfdSAndroid Build Coastguard Worker // https://github.com/torvalds/linux/commit/296f781a4b7801ad9c1c0219f9e87b6c25e196fe 299*387f9dfdSAndroid Build Coastguard Worker // so depending on kernel version, we need to account for that 300*387f9dfdSAndroid Build Coastguard Worker #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) 301*387f9dfdSAndroid Build Coastguard Worker void* tls_base = (void*)task->thread.fs; 302*387f9dfdSAndroid Build Coastguard Worker #else 303*387f9dfdSAndroid Build Coastguard Worker void* tls_base = (void*)task->thread.fsbase; 304*387f9dfdSAndroid Build Coastguard Worker #endif 305*387f9dfdSAndroid Build Coastguard Worker #elif __aarch64__ 306*387f9dfdSAndroid Build Coastguard Worker void* tls_base = (void*)task->thread.tp_value; 307*387f9dfdSAndroid Build Coastguard Worker #else 308*387f9dfdSAndroid Build Coastguard Worker #error "Unsupported platform" 309*387f9dfdSAndroid Build Coastguard Worker #endif 310*387f9dfdSAndroid Build Coastguard Worker 311*387f9dfdSAndroid Build Coastguard Worker // Read PyThreadState of this Thread from TLS 312*387f9dfdSAndroid Build Coastguard Worker void* thread_state = get_thread_state(tls_base, pid_data); 313*387f9dfdSAndroid Build Coastguard Worker 314*387f9dfdSAndroid Build Coastguard Worker // Check for matching between TLS PyThreadState and 315*387f9dfdSAndroid Build Coastguard Worker // the global _PyThreadState_Current 316*387f9dfdSAndroid Build Coastguard Worker event->thread_state_match = 317*387f9dfdSAndroid Build Coastguard Worker get_thread_state_match(thread_state, global_current_thread); 318*387f9dfdSAndroid Build Coastguard Worker 319*387f9dfdSAndroid Build Coastguard Worker // Read GIL state 320*387f9dfdSAndroid Build Coastguard Worker event->gil_state = 321*387f9dfdSAndroid Build Coastguard Worker get_gil_state(thread_state, global_current_thread, pid_data); 322*387f9dfdSAndroid Build Coastguard Worker 323*387f9dfdSAndroid Build Coastguard Worker // Check for matching between pthread ID created current PyThreadState and 324*387f9dfdSAndroid Build Coastguard Worker // pthread of actual current pthread 325*387f9dfdSAndroid Build Coastguard Worker event->pthread_id_match = 326*387f9dfdSAndroid Build Coastguard Worker get_pthread_id_match(thread_state, tls_base, pid_data); 327*387f9dfdSAndroid Build Coastguard Worker 328*387f9dfdSAndroid Build Coastguard Worker // pre-initialize event struct in case any subprogram below fails 329*387f9dfdSAndroid Build Coastguard Worker event->stack_status = STACK_STATUS_COMPLETE; 330*387f9dfdSAndroid Build Coastguard Worker event->stack_len = 0; 331*387f9dfdSAndroid Build Coastguard Worker 332*387f9dfdSAndroid Build Coastguard Worker if (thread_state != 0) { 333*387f9dfdSAndroid Build Coastguard Worker // Get pointer to top frame from PyThreadState 334*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 335*387f9dfdSAndroid Build Coastguard Worker &state->frame_ptr, 336*387f9dfdSAndroid Build Coastguard Worker sizeof(void*), 337*387f9dfdSAndroid Build Coastguard Worker thread_state + pid_data->offsets.PyThreadState_frame); 338*387f9dfdSAndroid Build Coastguard Worker // jump to reading first set of Python frames 339*387f9dfdSAndroid Build Coastguard Worker progs.call(ctx, PYTHON_STACK_PROG_IDX); 340*387f9dfdSAndroid Build Coastguard Worker // we won't ever get here 341*387f9dfdSAndroid Build Coastguard Worker } 342*387f9dfdSAndroid Build Coastguard Worker 343*387f9dfdSAndroid Build Coastguard Worker return submit_sample(ctx, state); 344*387f9dfdSAndroid Build Coastguard Worker } 345*387f9dfdSAndroid Build Coastguard Worker 346*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) void get_names( 347*387f9dfdSAndroid Build Coastguard Worker void* cur_frame, 348*387f9dfdSAndroid Build Coastguard Worker void* code_ptr, 349*387f9dfdSAndroid Build Coastguard Worker OffsetConfig* offsets, 350*387f9dfdSAndroid Build Coastguard Worker Symbol* symbol, 351*387f9dfdSAndroid Build Coastguard Worker void* ctx) { 352*387f9dfdSAndroid Build Coastguard Worker // Figure out if we want to parse class name, basically checking the name of 353*387f9dfdSAndroid Build Coastguard Worker // the first argument, 354*387f9dfdSAndroid Build Coastguard Worker // ((PyTupleObject*)$frame->f_code->co_varnames)->ob_item[0] 355*387f9dfdSAndroid Build Coastguard Worker // If it's 'self', we get the type and it's name, if it's cls, we just get 356*387f9dfdSAndroid Build Coastguard Worker // the name. This is not perfect but there is no better way to figure this 357*387f9dfdSAndroid Build Coastguard Worker // out from the code object. 358*387f9dfdSAndroid Build Coastguard Worker void* args_ptr; 359*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 360*387f9dfdSAndroid Build Coastguard Worker &args_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_varnames); 361*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 362*387f9dfdSAndroid Build Coastguard Worker &args_ptr, sizeof(void*), args_ptr + offsets->PyTupleObject_item); 363*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user_str( 364*387f9dfdSAndroid Build Coastguard Worker &symbol->name, sizeof(symbol->name), args_ptr + offsets->String_data); 365*387f9dfdSAndroid Build Coastguard Worker 366*387f9dfdSAndroid Build Coastguard Worker // compare strings as ints to save instructions 367*387f9dfdSAndroid Build Coastguard Worker char self_str[4] = {'s', 'e', 'l', 'f'}; 368*387f9dfdSAndroid Build Coastguard Worker char cls_str[4] = {'c', 'l', 's', '\0'}; 369*387f9dfdSAndroid Build Coastguard Worker bool first_self = *(int32_t*)symbol->name == *(int32_t*)self_str; 370*387f9dfdSAndroid Build Coastguard Worker bool first_cls = *(int32_t*)symbol->name == *(int32_t*)cls_str; 371*387f9dfdSAndroid Build Coastguard Worker 372*387f9dfdSAndroid Build Coastguard Worker // We re-use the same Symbol instance across loop iterations, which means 373*387f9dfdSAndroid Build Coastguard Worker // we will have left-over data in the struct. Although this won't affect 374*387f9dfdSAndroid Build Coastguard Worker // correctness of the result because we have '\0' at end of the strings read, 375*387f9dfdSAndroid Build Coastguard Worker // it would affect effectiveness of the deduplication. 376*387f9dfdSAndroid Build Coastguard Worker // Helper bpf_perf_prog_read_value clears the buffer on error, so here we 377*387f9dfdSAndroid Build Coastguard Worker // (ab)use this behavior to clear the memory. It requires the size of Symbol 378*387f9dfdSAndroid Build Coastguard Worker // to be different from struct bpf_perf_event_value, which we check at 379*387f9dfdSAndroid Build Coastguard Worker // compilation time using the FAIL_COMPILATION_IF macro. 380*387f9dfdSAndroid Build Coastguard Worker bpf_perf_prog_read_value(ctx, symbol, sizeof(Symbol)); 381*387f9dfdSAndroid Build Coastguard Worker 382*387f9dfdSAndroid Build Coastguard Worker // Read class name from $frame->f_localsplus[0]->ob_type->tp_name. 383*387f9dfdSAndroid Build Coastguard Worker if (first_self || first_cls) { 384*387f9dfdSAndroid Build Coastguard Worker void* ptr; 385*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 386*387f9dfdSAndroid Build Coastguard Worker &ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_localsplus); 387*387f9dfdSAndroid Build Coastguard Worker if (first_self) { 388*387f9dfdSAndroid Build Coastguard Worker // we are working with an instance, first we need to get type 389*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyObject_type); 390*387f9dfdSAndroid Build Coastguard Worker } 391*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyTypeObject_name); 392*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user_str(&symbol->classname, sizeof(symbol->classname), ptr); 393*387f9dfdSAndroid Build Coastguard Worker } 394*387f9dfdSAndroid Build Coastguard Worker 395*387f9dfdSAndroid Build Coastguard Worker void* pystr_ptr; 396*387f9dfdSAndroid Build Coastguard Worker // read PyCodeObject's filename into symbol 397*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 398*387f9dfdSAndroid Build Coastguard Worker &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_filename); 399*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user_str( 400*387f9dfdSAndroid Build Coastguard Worker &symbol->file, sizeof(symbol->file), pystr_ptr + offsets->String_data); 401*387f9dfdSAndroid Build Coastguard Worker // read PyCodeObject's name into symbol 402*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 403*387f9dfdSAndroid Build Coastguard Worker &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_name); 404*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user_str( 405*387f9dfdSAndroid Build Coastguard Worker &symbol->name, sizeof(symbol->name), pystr_ptr + offsets->String_data); 406*387f9dfdSAndroid Build Coastguard Worker } 407*387f9dfdSAndroid Build Coastguard Worker 408*387f9dfdSAndroid Build Coastguard Worker // get_frame_data reads current PyFrameObject filename/name and updates 409*387f9dfdSAndroid Build Coastguard Worker // stack_info->frame_ptr with pointer to next PyFrameObject 410*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) bool get_frame_data( 411*387f9dfdSAndroid Build Coastguard Worker void** frame_ptr, 412*387f9dfdSAndroid Build Coastguard Worker OffsetConfig* offsets, 413*387f9dfdSAndroid Build Coastguard Worker Symbol* symbol, 414*387f9dfdSAndroid Build Coastguard Worker // ctx is only used to call helper to clear symbol, see documentation below 415*387f9dfdSAndroid Build Coastguard Worker void* ctx) { 416*387f9dfdSAndroid Build Coastguard Worker void* cur_frame = *frame_ptr; 417*387f9dfdSAndroid Build Coastguard Worker if (!cur_frame) { 418*387f9dfdSAndroid Build Coastguard Worker return false; 419*387f9dfdSAndroid Build Coastguard Worker } 420*387f9dfdSAndroid Build Coastguard Worker void* code_ptr; 421*387f9dfdSAndroid Build Coastguard Worker // read PyCodeObject first, if that fails, then no point reading next frame 422*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 423*387f9dfdSAndroid Build Coastguard Worker &code_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_code); 424*387f9dfdSAndroid Build Coastguard Worker if (!code_ptr) { 425*387f9dfdSAndroid Build Coastguard Worker return false; 426*387f9dfdSAndroid Build Coastguard Worker } 427*387f9dfdSAndroid Build Coastguard Worker 428*387f9dfdSAndroid Build Coastguard Worker get_names(cur_frame, code_ptr, offsets, symbol, ctx); 429*387f9dfdSAndroid Build Coastguard Worker 430*387f9dfdSAndroid Build Coastguard Worker // read next PyFrameObject pointer, update in place 431*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read_user( 432*387f9dfdSAndroid Build Coastguard Worker frame_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_back); 433*387f9dfdSAndroid Build Coastguard Worker 434*387f9dfdSAndroid Build Coastguard Worker return true; 435*387f9dfdSAndroid Build Coastguard Worker } 436*387f9dfdSAndroid Build Coastguard Worker 437*387f9dfdSAndroid Build Coastguard Worker // To avoid duplicate ids, every CPU needs to use different ids when inserting 438*387f9dfdSAndroid Build Coastguard Worker // into the hashmap. NUM_CPUS is defined at PyPerf backend side and passed 439*387f9dfdSAndroid Build Coastguard Worker // through CFlag. 440*387f9dfdSAndroid Build Coastguard Worker static inline __attribute__((__always_inline__)) int64_t get_symbol_id( 441*387f9dfdSAndroid Build Coastguard Worker sample_state_t* state, 442*387f9dfdSAndroid Build Coastguard Worker Symbol* sym) { 443*387f9dfdSAndroid Build Coastguard Worker int32_t* symbol_id_ptr = symbols.lookup(sym); 444*387f9dfdSAndroid Build Coastguard Worker if (symbol_id_ptr) { 445*387f9dfdSAndroid Build Coastguard Worker return *symbol_id_ptr; 446*387f9dfdSAndroid Build Coastguard Worker } 447*387f9dfdSAndroid Build Coastguard Worker // the symbol is new, bump the counter 448*387f9dfdSAndroid Build Coastguard Worker int32_t symbol_id = state->symbol_counter * NUM_CPUS + state->cur_cpu; 449*387f9dfdSAndroid Build Coastguard Worker state->symbol_counter++; 450*387f9dfdSAndroid Build Coastguard Worker symbols.update(sym, &symbol_id); 451*387f9dfdSAndroid Build Coastguard Worker return symbol_id; 452*387f9dfdSAndroid Build Coastguard Worker } 453*387f9dfdSAndroid Build Coastguard Worker 454*387f9dfdSAndroid Build Coastguard Worker int read_python_stack(struct pt_regs* ctx) { 455*387f9dfdSAndroid Build Coastguard Worker GET_STATE(); 456*387f9dfdSAndroid Build Coastguard Worker 457*387f9dfdSAndroid Build Coastguard Worker state->python_stack_prog_call_cnt++; 458*387f9dfdSAndroid Build Coastguard Worker Event* sample = &state->event; 459*387f9dfdSAndroid Build Coastguard Worker 460*387f9dfdSAndroid Build Coastguard Worker Symbol sym = {}; 461*387f9dfdSAndroid Build Coastguard Worker bool last_res = false; 462*387f9dfdSAndroid Build Coastguard Worker #pragma unroll 463*387f9dfdSAndroid Build Coastguard Worker for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) { 464*387f9dfdSAndroid Build Coastguard Worker last_res = get_frame_data(&state->frame_ptr, &state->offsets, &sym, ctx); 465*387f9dfdSAndroid Build Coastguard Worker if (last_res) { 466*387f9dfdSAndroid Build Coastguard Worker uint32_t symbol_id = get_symbol_id(state, &sym); 467*387f9dfdSAndroid Build Coastguard Worker int64_t cur_len = sample->stack_len; 468*387f9dfdSAndroid Build Coastguard Worker if (cur_len >= 0 && cur_len < STACK_MAX_LEN) { 469*387f9dfdSAndroid Build Coastguard Worker sample->stack[cur_len] = symbol_id; 470*387f9dfdSAndroid Build Coastguard Worker sample->stack_len++; 471*387f9dfdSAndroid Build Coastguard Worker } 472*387f9dfdSAndroid Build Coastguard Worker } 473*387f9dfdSAndroid Build Coastguard Worker } 474*387f9dfdSAndroid Build Coastguard Worker 475*387f9dfdSAndroid Build Coastguard Worker if (!state->frame_ptr) { 476*387f9dfdSAndroid Build Coastguard Worker sample->stack_status = STACK_STATUS_COMPLETE; 477*387f9dfdSAndroid Build Coastguard Worker } else { 478*387f9dfdSAndroid Build Coastguard Worker if (!last_res) { 479*387f9dfdSAndroid Build Coastguard Worker sample->stack_status = STACK_STATUS_ERROR; 480*387f9dfdSAndroid Build Coastguard Worker } else { 481*387f9dfdSAndroid Build Coastguard Worker sample->stack_status = STACK_STATUS_TRUNCATED; 482*387f9dfdSAndroid Build Coastguard Worker } 483*387f9dfdSAndroid Build Coastguard Worker } 484*387f9dfdSAndroid Build Coastguard Worker 485*387f9dfdSAndroid Build Coastguard Worker if (sample->stack_status == STACK_STATUS_TRUNCATED && 486*387f9dfdSAndroid Build Coastguard Worker state->python_stack_prog_call_cnt < PYTHON_STACK_PROG_CNT) { 487*387f9dfdSAndroid Build Coastguard Worker // read next batch of frames 488*387f9dfdSAndroid Build Coastguard Worker progs.call(ctx, PYTHON_STACK_PROG_IDX); 489*387f9dfdSAndroid Build Coastguard Worker } 490*387f9dfdSAndroid Build Coastguard Worker 491*387f9dfdSAndroid Build Coastguard Worker return submit_sample(ctx, state); 492*387f9dfdSAndroid Build Coastguard Worker } 493*387f9dfdSAndroid Build Coastguard Worker )"; 494*387f9dfdSAndroid Build Coastguard Worker 495*387f9dfdSAndroid Build Coastguard Worker } 496*387f9dfdSAndroid Build Coastguard Worker } // namespace ebpf 497