xref: /aosp_15_r20/external/bcc/tools/deadlock.c (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1*387f9dfdSAndroid Build Coastguard Worker /*
2*387f9dfdSAndroid Build Coastguard Worker  * deadlock.c  Detects potential deadlocks in a running process.
3*387f9dfdSAndroid Build Coastguard Worker  *             For Linux, uses BCC, eBPF. See .py file.
4*387f9dfdSAndroid Build Coastguard Worker  *
5*387f9dfdSAndroid Build Coastguard Worker  * Copyright 2017 Facebook, Inc.
6*387f9dfdSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License")
7*387f9dfdSAndroid Build Coastguard Worker  *
8*387f9dfdSAndroid Build Coastguard Worker  * 1-Feb-2016   Kenny Yu   Created this.
9*387f9dfdSAndroid Build Coastguard Worker  */
10*387f9dfdSAndroid Build Coastguard Worker 
11*387f9dfdSAndroid Build Coastguard Worker #include <linux/sched.h>
12*387f9dfdSAndroid Build Coastguard Worker #include <uapi/linux/ptrace.h>
13*387f9dfdSAndroid Build Coastguard Worker 
14*387f9dfdSAndroid Build Coastguard Worker // Maximum number of mutexes a single thread can hold at once.
15*387f9dfdSAndroid Build Coastguard Worker // If the number is too big, the unrolled loops wil cause the stack
16*387f9dfdSAndroid Build Coastguard Worker // to be too big, and the bpf verifier will fail.
17*387f9dfdSAndroid Build Coastguard Worker #define MAX_HELD_MUTEXES 16
18*387f9dfdSAndroid Build Coastguard Worker 
19*387f9dfdSAndroid Build Coastguard Worker // Info about held mutexes. `mutex` will be 0 if not held.
20*387f9dfdSAndroid Build Coastguard Worker struct held_mutex_t {
21*387f9dfdSAndroid Build Coastguard Worker   u64 mutex;
22*387f9dfdSAndroid Build Coastguard Worker   u64 stack_id;
23*387f9dfdSAndroid Build Coastguard Worker };
24*387f9dfdSAndroid Build Coastguard Worker 
25*387f9dfdSAndroid Build Coastguard Worker // List of mutexes that a thread is holding. Whenever we loop over this array,
26*387f9dfdSAndroid Build Coastguard Worker // we need to force the compiler to unroll the loop, otherwise the bcc verifier
27*387f9dfdSAndroid Build Coastguard Worker // will fail because the loop will create a backwards edge.
28*387f9dfdSAndroid Build Coastguard Worker struct thread_to_held_mutex_leaf_t {
29*387f9dfdSAndroid Build Coastguard Worker   struct held_mutex_t held_mutexes[MAX_HELD_MUTEXES];
30*387f9dfdSAndroid Build Coastguard Worker };
31*387f9dfdSAndroid Build Coastguard Worker 
32*387f9dfdSAndroid Build Coastguard Worker // Map of thread ID -> array of (mutex addresses, stack id)
33*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(thread_to_held_mutexes, u32, struct thread_to_held_mutex_leaf_t, MAX_THREADS);
34*387f9dfdSAndroid Build Coastguard Worker 
35*387f9dfdSAndroid Build Coastguard Worker // Key type for edges. Represents an edge from mutex1 => mutex2.
36*387f9dfdSAndroid Build Coastguard Worker struct edges_key_t {
37*387f9dfdSAndroid Build Coastguard Worker   u64 mutex1;
38*387f9dfdSAndroid Build Coastguard Worker   u64 mutex2;
39*387f9dfdSAndroid Build Coastguard Worker };
40*387f9dfdSAndroid Build Coastguard Worker 
41*387f9dfdSAndroid Build Coastguard Worker // Leaf type for edges. Holds information about where each mutex was acquired.
42*387f9dfdSAndroid Build Coastguard Worker struct edges_leaf_t {
43*387f9dfdSAndroid Build Coastguard Worker   u64 mutex1_stack_id;
44*387f9dfdSAndroid Build Coastguard Worker   u64 mutex2_stack_id;
45*387f9dfdSAndroid Build Coastguard Worker   u32 thread_pid;
46*387f9dfdSAndroid Build Coastguard Worker   char comm[TASK_COMM_LEN];
47*387f9dfdSAndroid Build Coastguard Worker };
48*387f9dfdSAndroid Build Coastguard Worker 
49*387f9dfdSAndroid Build Coastguard Worker // Represents all edges currently in the mutex wait graph.
50*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(edges, struct edges_key_t, struct edges_leaf_t, MAX_EDGES);
51*387f9dfdSAndroid Build Coastguard Worker 
52*387f9dfdSAndroid Build Coastguard Worker // Info about parent thread when a child thread is created.
53*387f9dfdSAndroid Build Coastguard Worker struct thread_created_leaf_t {
54*387f9dfdSAndroid Build Coastguard Worker   u64 stack_id;
55*387f9dfdSAndroid Build Coastguard Worker   u32 parent_pid;
56*387f9dfdSAndroid Build Coastguard Worker   char comm[TASK_COMM_LEN];
57*387f9dfdSAndroid Build Coastguard Worker };
58*387f9dfdSAndroid Build Coastguard Worker 
59*387f9dfdSAndroid Build Coastguard Worker // Map of child thread pid -> info about parent thread.
60*387f9dfdSAndroid Build Coastguard Worker BPF_HASH(thread_to_parent, u32, struct thread_created_leaf_t);
61*387f9dfdSAndroid Build Coastguard Worker 
62*387f9dfdSAndroid Build Coastguard Worker // Stack traces when threads are created and when mutexes are locked/unlocked.
63*387f9dfdSAndroid Build Coastguard Worker BPF_STACK_TRACE(stack_traces, MAX_TRACES);
64*387f9dfdSAndroid Build Coastguard Worker 
65*387f9dfdSAndroid Build Coastguard Worker // The first argument to the user space function we are tracing
66*387f9dfdSAndroid Build Coastguard Worker // is a pointer to the mutex M held by thread T.
67*387f9dfdSAndroid Build Coastguard Worker //
68*387f9dfdSAndroid Build Coastguard Worker // For all mutexes N held by mutexes_held[T]
69*387f9dfdSAndroid Build Coastguard Worker //   add edge N => M (held by T)
70*387f9dfdSAndroid Build Coastguard Worker // mutexes_held[T].add(M)
trace_mutex_acquire(struct pt_regs * ctx,void * mutex_addr)71*387f9dfdSAndroid Build Coastguard Worker int trace_mutex_acquire(struct pt_regs *ctx, void *mutex_addr) {
72*387f9dfdSAndroid Build Coastguard Worker   // Higher 32 bits is process ID, Lower 32 bits is thread ID
73*387f9dfdSAndroid Build Coastguard Worker   u32 pid = bpf_get_current_pid_tgid();
74*387f9dfdSAndroid Build Coastguard Worker   u64 mutex = (u64)mutex_addr;
75*387f9dfdSAndroid Build Coastguard Worker 
76*387f9dfdSAndroid Build Coastguard Worker   struct thread_to_held_mutex_leaf_t empty_leaf = {};
77*387f9dfdSAndroid Build Coastguard Worker   struct thread_to_held_mutex_leaf_t *leaf =
78*387f9dfdSAndroid Build Coastguard Worker       thread_to_held_mutexes.lookup_or_try_init(&pid, &empty_leaf);
79*387f9dfdSAndroid Build Coastguard Worker   if (!leaf) {
80*387f9dfdSAndroid Build Coastguard Worker     bpf_trace_printk(
81*387f9dfdSAndroid Build Coastguard Worker         "could not add thread_to_held_mutex key, thread: %d, mutex: %p\n", pid,
82*387f9dfdSAndroid Build Coastguard Worker         mutex);
83*387f9dfdSAndroid Build Coastguard Worker     return 1; // Could not insert, no more memory
84*387f9dfdSAndroid Build Coastguard Worker   }
85*387f9dfdSAndroid Build Coastguard Worker 
86*387f9dfdSAndroid Build Coastguard Worker   // Recursive mutexes lock the same mutex multiple times. We cannot tell if
87*387f9dfdSAndroid Build Coastguard Worker   // the mutex is recursive after the mutex is already created. To avoid noisy
88*387f9dfdSAndroid Build Coastguard Worker   // reports, disallow self edges. Do one pass to check if we are already
89*387f9dfdSAndroid Build Coastguard Worker   // holding the mutex, and if we are, do nothing.
90*387f9dfdSAndroid Build Coastguard Worker   #pragma unroll
91*387f9dfdSAndroid Build Coastguard Worker   for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
92*387f9dfdSAndroid Build Coastguard Worker     if (leaf->held_mutexes[i].mutex == mutex) {
93*387f9dfdSAndroid Build Coastguard Worker       return 1; // Disallow self edges
94*387f9dfdSAndroid Build Coastguard Worker     }
95*387f9dfdSAndroid Build Coastguard Worker   }
96*387f9dfdSAndroid Build Coastguard Worker 
97*387f9dfdSAndroid Build Coastguard Worker   u64 stack_id =
98*387f9dfdSAndroid Build Coastguard Worker       stack_traces.get_stackid(ctx, BPF_F_USER_STACK);
99*387f9dfdSAndroid Build Coastguard Worker 
100*387f9dfdSAndroid Build Coastguard Worker   int added_mutex = 0;
101*387f9dfdSAndroid Build Coastguard Worker   #pragma unroll
102*387f9dfdSAndroid Build Coastguard Worker   for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
103*387f9dfdSAndroid Build Coastguard Worker     // If this is a free slot, see if we can insert.
104*387f9dfdSAndroid Build Coastguard Worker     if (!leaf->held_mutexes[i].mutex) {
105*387f9dfdSAndroid Build Coastguard Worker       if (!added_mutex) {
106*387f9dfdSAndroid Build Coastguard Worker         leaf->held_mutexes[i].mutex = mutex;
107*387f9dfdSAndroid Build Coastguard Worker         leaf->held_mutexes[i].stack_id = stack_id;
108*387f9dfdSAndroid Build Coastguard Worker         added_mutex = 1;
109*387f9dfdSAndroid Build Coastguard Worker       }
110*387f9dfdSAndroid Build Coastguard Worker       continue; // Nothing to do for a free slot
111*387f9dfdSAndroid Build Coastguard Worker     }
112*387f9dfdSAndroid Build Coastguard Worker 
113*387f9dfdSAndroid Build Coastguard Worker     // Add edges from held mutex => current mutex
114*387f9dfdSAndroid Build Coastguard Worker     struct edges_key_t edge_key = {};
115*387f9dfdSAndroid Build Coastguard Worker     edge_key.mutex1 = leaf->held_mutexes[i].mutex;
116*387f9dfdSAndroid Build Coastguard Worker     edge_key.mutex2 = mutex;
117*387f9dfdSAndroid Build Coastguard Worker 
118*387f9dfdSAndroid Build Coastguard Worker     struct edges_leaf_t edge_leaf = {};
119*387f9dfdSAndroid Build Coastguard Worker     edge_leaf.mutex1_stack_id = leaf->held_mutexes[i].stack_id;
120*387f9dfdSAndroid Build Coastguard Worker     edge_leaf.mutex2_stack_id = stack_id;
121*387f9dfdSAndroid Build Coastguard Worker     edge_leaf.thread_pid = pid;
122*387f9dfdSAndroid Build Coastguard Worker     bpf_get_current_comm(&edge_leaf.comm, sizeof(edge_leaf.comm));
123*387f9dfdSAndroid Build Coastguard Worker 
124*387f9dfdSAndroid Build Coastguard Worker     // Returns non-zero on error
125*387f9dfdSAndroid Build Coastguard Worker     int result = edges.update(&edge_key, &edge_leaf);
126*387f9dfdSAndroid Build Coastguard Worker     if (result) {
127*387f9dfdSAndroid Build Coastguard Worker       bpf_trace_printk("could not add edge key %p, %p, error: %d\n",
128*387f9dfdSAndroid Build Coastguard Worker                        edge_key.mutex1, edge_key.mutex2, result);
129*387f9dfdSAndroid Build Coastguard Worker       continue; // Could not insert, no more memory
130*387f9dfdSAndroid Build Coastguard Worker     }
131*387f9dfdSAndroid Build Coastguard Worker   }
132*387f9dfdSAndroid Build Coastguard Worker 
133*387f9dfdSAndroid Build Coastguard Worker   // There were no free slots for this mutex.
134*387f9dfdSAndroid Build Coastguard Worker   if (!added_mutex) {
135*387f9dfdSAndroid Build Coastguard Worker     bpf_trace_printk("could not add mutex %p, added_mutex: %d\n", mutex,
136*387f9dfdSAndroid Build Coastguard Worker                      added_mutex);
137*387f9dfdSAndroid Build Coastguard Worker     return 1;
138*387f9dfdSAndroid Build Coastguard Worker   }
139*387f9dfdSAndroid Build Coastguard Worker   return 0;
140*387f9dfdSAndroid Build Coastguard Worker }
141*387f9dfdSAndroid Build Coastguard Worker 
142*387f9dfdSAndroid Build Coastguard Worker // The first argument to the user space function we are tracing
143*387f9dfdSAndroid Build Coastguard Worker // is a pointer to the mutex M held by thread T.
144*387f9dfdSAndroid Build Coastguard Worker //
145*387f9dfdSAndroid Build Coastguard Worker // mutexes_held[T].remove(M)
trace_mutex_release(struct pt_regs * ctx,void * mutex_addr)146*387f9dfdSAndroid Build Coastguard Worker int trace_mutex_release(struct pt_regs *ctx, void *mutex_addr) {
147*387f9dfdSAndroid Build Coastguard Worker   // Higher 32 bits is process ID, Lower 32 bits is thread ID
148*387f9dfdSAndroid Build Coastguard Worker   u32 pid = bpf_get_current_pid_tgid();
149*387f9dfdSAndroid Build Coastguard Worker   u64 mutex = (u64)mutex_addr;
150*387f9dfdSAndroid Build Coastguard Worker 
151*387f9dfdSAndroid Build Coastguard Worker   struct thread_to_held_mutex_leaf_t *leaf =
152*387f9dfdSAndroid Build Coastguard Worker       thread_to_held_mutexes.lookup(&pid);
153*387f9dfdSAndroid Build Coastguard Worker   if (!leaf) {
154*387f9dfdSAndroid Build Coastguard Worker     // If the leaf does not exist for the pid, then it means we either missed
155*387f9dfdSAndroid Build Coastguard Worker     // the acquire event, or we had no more memory and could not add it.
156*387f9dfdSAndroid Build Coastguard Worker     bpf_trace_printk(
157*387f9dfdSAndroid Build Coastguard Worker         "could not find thread_to_held_mutex, thread: %d, mutex: %p\n", pid,
158*387f9dfdSAndroid Build Coastguard Worker         mutex);
159*387f9dfdSAndroid Build Coastguard Worker     return 1;
160*387f9dfdSAndroid Build Coastguard Worker   }
161*387f9dfdSAndroid Build Coastguard Worker 
162*387f9dfdSAndroid Build Coastguard Worker   // For older kernels without "Bpf: allow access into map value arrays"
163*387f9dfdSAndroid Build Coastguard Worker   // (https://lkml.org/lkml/2016/8/30/287) the bpf verifier will fail with an
164*387f9dfdSAndroid Build Coastguard Worker   // invalid memory access on `leaf->held_mutexes[i]` below. On newer kernels,
165*387f9dfdSAndroid Build Coastguard Worker   // we can avoid making this extra copy in `value` and use `leaf` directly.
166*387f9dfdSAndroid Build Coastguard Worker   struct thread_to_held_mutex_leaf_t value = {};
167*387f9dfdSAndroid Build Coastguard Worker   bpf_probe_read_user(&value, sizeof(struct thread_to_held_mutex_leaf_t), leaf);
168*387f9dfdSAndroid Build Coastguard Worker 
169*387f9dfdSAndroid Build Coastguard Worker   #pragma unroll
170*387f9dfdSAndroid Build Coastguard Worker   for (int i = 0; i < MAX_HELD_MUTEXES; ++i) {
171*387f9dfdSAndroid Build Coastguard Worker     // Find the current mutex (if it exists), and clear it.
172*387f9dfdSAndroid Build Coastguard Worker     // Note: Can't use `leaf->` in this if condition, see comment above.
173*387f9dfdSAndroid Build Coastguard Worker     if (value.held_mutexes[i].mutex == mutex) {
174*387f9dfdSAndroid Build Coastguard Worker       leaf->held_mutexes[i].mutex = 0;
175*387f9dfdSAndroid Build Coastguard Worker       leaf->held_mutexes[i].stack_id = 0;
176*387f9dfdSAndroid Build Coastguard Worker     }
177*387f9dfdSAndroid Build Coastguard Worker   }
178*387f9dfdSAndroid Build Coastguard Worker 
179*387f9dfdSAndroid Build Coastguard Worker   return 0;
180*387f9dfdSAndroid Build Coastguard Worker }
181*387f9dfdSAndroid Build Coastguard Worker 
182*387f9dfdSAndroid Build Coastguard Worker // Trace return from clone() syscall in the child thread (return value > 0).
trace_clone(struct pt_regs * ctx,unsigned long flags,void * child_stack,void * ptid,void * ctid,struct pt_regs * regs)183*387f9dfdSAndroid Build Coastguard Worker int trace_clone(struct pt_regs *ctx, unsigned long flags, void *child_stack,
184*387f9dfdSAndroid Build Coastguard Worker                 void *ptid, void *ctid, struct pt_regs *regs) {
185*387f9dfdSAndroid Build Coastguard Worker   u32 child_pid = PT_REGS_RC(ctx);
186*387f9dfdSAndroid Build Coastguard Worker   if (child_pid <= 0) {
187*387f9dfdSAndroid Build Coastguard Worker     return 1;
188*387f9dfdSAndroid Build Coastguard Worker   }
189*387f9dfdSAndroid Build Coastguard Worker 
190*387f9dfdSAndroid Build Coastguard Worker   struct thread_created_leaf_t thread_created_leaf = {};
191*387f9dfdSAndroid Build Coastguard Worker   thread_created_leaf.parent_pid = bpf_get_current_pid_tgid();
192*387f9dfdSAndroid Build Coastguard Worker   thread_created_leaf.stack_id =
193*387f9dfdSAndroid Build Coastguard Worker       stack_traces.get_stackid(ctx, BPF_F_USER_STACK);
194*387f9dfdSAndroid Build Coastguard Worker   bpf_get_current_comm(&thread_created_leaf.comm,
195*387f9dfdSAndroid Build Coastguard Worker                        sizeof(thread_created_leaf.comm));
196*387f9dfdSAndroid Build Coastguard Worker 
197*387f9dfdSAndroid Build Coastguard Worker   struct thread_created_leaf_t *insert_result =
198*387f9dfdSAndroid Build Coastguard Worker       thread_to_parent.lookup_or_try_init(&child_pid, &thread_created_leaf);
199*387f9dfdSAndroid Build Coastguard Worker   if (!insert_result) {
200*387f9dfdSAndroid Build Coastguard Worker     bpf_trace_printk(
201*387f9dfdSAndroid Build Coastguard Worker         "could not add thread_created_key, child: %d, parent: %d\n", child_pid,
202*387f9dfdSAndroid Build Coastguard Worker         thread_created_leaf.parent_pid);
203*387f9dfdSAndroid Build Coastguard Worker     return 1; // Could not insert, no more memory
204*387f9dfdSAndroid Build Coastguard Worker   }
205*387f9dfdSAndroid Build Coastguard Worker   return 0;
206*387f9dfdSAndroid Build Coastguard Worker }
207