xref: /aosp_15_r20/system/core/debuggerd/handler/debuggerd_fallback.cpp (revision 00c7fec1bb09f3284aad6a6f96d2f63dfc3650ad)
1 /*
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <poll.h>
20 #include <pthread.h>
21 #include <stddef.h>
22 #include <sys/ucontext.h>
23 #include <syscall.h>
24 #include <unistd.h>
25 
26 #include <atomic>
27 #include <memory>
28 #include <mutex>
29 
30 #include <android-base/file.h>
31 #include <android-base/unique_fd.h>
32 #include <async_safe/log.h>
33 #include <bionic/reserved_signals.h>
34 #include <unwindstack/AndroidUnwinder.h>
35 #include <unwindstack/Memory.h>
36 #include <unwindstack/Regs.h>
37 
38 #include "debuggerd/handler.h"
39 #include "handler/fallback.h"
40 #include "tombstoned/tombstoned.h"
41 #include "util.h"
42 
43 #include "libdebuggerd/backtrace.h"
44 #include "libdebuggerd/tombstone.h"
45 
46 using android::base::unique_fd;
47 
48 extern "C" bool __linker_enable_fallback_allocator();
49 extern "C" void __linker_disable_fallback_allocator();
50 
51 // This file implements a fallback path for processes that do not allow the
52 // normal fork and exec of crash_dump to handle crashes/unwinds.
53 // The issue is that all of this happens from within a signal handler, which
54 // can cause problems since this code uses the linker allocator which is not
55 // thread safe. In order to avoid any problems allocating, the code calls
56 // a function to switch to use a fallback allocator in the linker that will
57 // only be used for the current thread. All of the libunwindstack code does
58 // allocations using C++ stl, but should be fine since the code runs in the
59 // linker and should use the fallback handler.
60 
61 // This method can still fail if the virtual space is exhausted on a 32 bit
62 // process or mmap failing due to hitting the maximum number of maps (65535
63 // total maps) on a 64 bit process.
64 
65 // Class to handle automatically turning on and off the fallback allocator.
66 class ScopedUseFallbackAllocator {
67  public:
ScopedUseFallbackAllocator()68   ScopedUseFallbackAllocator() { Enable(); }
69 
~ScopedUseFallbackAllocator()70   ~ScopedUseFallbackAllocator() { Disable(); }
71 
Enable()72   bool Enable() {
73     if (!enabled_) {
74       enabled_ = __linker_enable_fallback_allocator();
75       if (!enabled_) {
76         async_safe_format_log(ANDROID_LOG_ERROR, "libc",
77                               "Unable to enable fallback allocator, already in use.");
78       }
79     }
80     return enabled_;
81   }
82 
Disable()83   void Disable() {
84     if (enabled_) {
85       __linker_disable_fallback_allocator();
86       enabled_ = false;
87     }
88   }
89 
enabled()90   bool enabled() { return enabled_; }
91 
92  private:
93   bool enabled_ = false;
94 };
95 
debuggerd_fallback_trace(int output_fd,ucontext_t * ucontext)96 static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
97   std::unique_ptr<unwindstack::Regs> regs;
98 
99   ThreadInfo thread;
100   thread.pid = getpid();
101   thread.tid = gettid();
102   thread.thread_name = get_thread_name(gettid());
103   thread.registers.reset(
104       unwindstack::Regs::CreateFromUcontext(unwindstack::Regs::CurrentArch(), ucontext));
105 
106   // Do not use the thread cache here because it will call pthread_key_create
107   // which doesn't work in linker code. See b/189803009.
108   // Use a normal cached object because the thread is stopped, and there
109   // is no chance of data changing between reads.
110   auto process_memory = unwindstack::Memory::CreateProcessMemoryCached(getpid());
111   // TODO: Create this once and store it in a global?
112   unwindstack::AndroidLocalUnwinder unwinder(process_memory);
113   dump_backtrace_thread(output_fd, &unwinder, thread);
114 }
115 
forward_output(int src_fd,int dst_fd,pid_t expected_tid)116 static bool forward_output(int src_fd, int dst_fd, pid_t expected_tid) {
117   // Make sure the thread actually got the signal.
118   struct pollfd pfd = {
119     .fd = src_fd, .events = POLLIN,
120   };
121 
122   // Wait for up to a second for output to start flowing.
123   if (poll(&pfd, 1, 1000) != 1) {
124     return false;
125   }
126 
127   pid_t tid;
128   if (TEMP_FAILURE_RETRY(read(src_fd, &tid, sizeof(tid))) != sizeof(tid)) {
129     async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to read tid");
130     return false;
131   }
132 
133   if (tid != expected_tid) {
134     async_safe_format_log(ANDROID_LOG_ERROR, "libc", "received tid %d, expected %d", tid,
135                           expected_tid);
136     return false;
137   }
138 
139   while (true) {
140     char buf[512];
141     ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf)));
142     if (rc == 0) {
143       return true;
144     } else if (rc < 0) {
145       return false;
146     }
147 
148     if (!android::base::WriteFully(dst_fd, buf, rc)) {
149       // We failed to write to tombstoned, but there's not much we can do.
150       // Keep reading from src_fd to keep things going.
151       continue;
152     }
153   }
154 }
155 
156 struct __attribute__((__packed__)) packed_thread_output {
157   int32_t tid;
158   int32_t fd;
159 };
160 
pack_thread_fd(pid_t tid,int fd)161 static uint64_t pack_thread_fd(pid_t tid, int fd) {
162   packed_thread_output packed = {.tid = tid, .fd = fd};
163   uint64_t result;
164   static_assert(sizeof(packed) == sizeof(result));
165   memcpy(&result, &packed, sizeof(packed));
166   return result;
167 }
168 
unpack_thread_fd(uint64_t value)169 static std::pair<pid_t, int> unpack_thread_fd(uint64_t value) {
170   packed_thread_output result;
171   memcpy(&result, &value, sizeof(value));
172   return std::make_pair(result.tid, result.fd);
173 }
174 
trace_handler(siginfo_t * info,ucontext_t * ucontext)175 static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
176   ScopedUseFallbackAllocator allocator;
177   if (!allocator.enabled()) {
178     return;
179   }
180 
181   static std::atomic<uint64_t> trace_output(pack_thread_fd(-1, -1));
182 
183   if (info->si_value.sival_ptr == kDebuggerdFallbackSivalPtrRequestDump) {
184     // Asked to dump by the original signal recipient.
185     uint64_t val = trace_output.load();
186     auto [tid, fd] = unpack_thread_fd(val);
187     if (tid != gettid()) {
188       // We received some other thread's info request?
189       async_safe_format_log(ANDROID_LOG_ERROR, "libc",
190                             "thread %d received output fd for thread %d?", gettid(), tid);
191       return;
192     }
193 
194     if (!trace_output.compare_exchange_strong(val, pack_thread_fd(-1, -1))) {
195       // Presumably, the timeout in forward_output expired, and the main thread moved on.
196       // If this happened, the main thread closed our fd for us, so just return.
197       async_safe_format_log(ANDROID_LOG_ERROR, "libc", "cmpxchg for thread %d failed", gettid());
198       return;
199     }
200 
201     // Write our tid to the output fd to let the main thread know that we're working.
202     if (TEMP_FAILURE_RETRY(write(fd, &tid, sizeof(tid))) == sizeof(tid)) {
203       debuggerd_fallback_trace(fd, ucontext);
204     } else {
205       async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to write to output fd");
206     }
207 
208     // Stop using the fallback allocator before the close. This will prevent
209     // a race condition where the thread backtracing all of the threads tries
210     // to re-acquire the fallback allocator.
211     allocator.Disable();
212 
213     close(fd);
214     return;
215   }
216 
217   // Only allow one thread to perform a trace at a time.
218   static std::mutex trace_mutex;
219   if (!trace_mutex.try_lock()) {
220     async_safe_format_log(ANDROID_LOG_INFO, "libc", "trace lock failed");
221     return;
222   }
223 
224   std::lock_guard<std::mutex> scoped_lock(trace_mutex, std::adopt_lock);
225 
226   // Fetch output fd from tombstoned.
227   unique_fd tombstone_socket, output_fd;
228   if (!tombstoned_connect(getpid(), &tombstone_socket, &output_fd, nullptr,
229                           kDebuggerdNativeBacktrace)) {
230     async_safe_format_log(ANDROID_LOG_ERROR, "libc",
231                           "missing crash_dump_fallback() in selinux policy?");
232     return;
233   }
234 
235   dump_backtrace_header(output_fd.get());
236 
237   // Dump our own stack.
238   debuggerd_fallback_trace(output_fd.get(), ucontext);
239 
240   // Send a signal to all of our siblings, asking them to dump their stack.
241   pid_t current_tid = gettid();
242   if (!iterate_tids(current_tid, [&allocator, &output_fd, &current_tid](pid_t tid) {
243         if (current_tid == tid) {
244           return;
245         }
246 
247         if (!allocator.enabled()) {
248           return;
249         }
250 
251         // Use a pipe, to be able to detect situations where the thread gracefully exits before
252         // receiving our signal.
253         unique_fd pipe_read, pipe_write;
254         if (!Pipe(&pipe_read, &pipe_write)) {
255           async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to create pipe: %s",
256                                 strerror(errno));
257           return;
258         }
259 
260         uint64_t expected = pack_thread_fd(-1, -1);
261         int sent_fd = pipe_write.release();
262         if (!trace_output.compare_exchange_strong(expected, pack_thread_fd(tid, sent_fd))) {
263           auto [tid, fd] = unpack_thread_fd(expected);
264           async_safe_format_log(ANDROID_LOG_ERROR, "libc",
265                                 "thread %d is already outputting to fd %d?", tid, fd);
266           close(sent_fd);
267           return;
268         }
269 
270         // Disable our use of the fallback allocator while the target thread
271         // is getting the backtrace.
272         allocator.Disable();
273 
274         siginfo_t siginfo = {};
275         siginfo.si_code = SI_QUEUE;
276         siginfo.si_value.sival_ptr = kDebuggerdFallbackSivalPtrRequestDump;
277         siginfo.si_pid = getpid();
278         siginfo.si_uid = getuid();
279 
280         if (syscall(__NR_rt_tgsigqueueinfo, getpid(), tid, BIONIC_SIGNAL_DEBUGGER, &siginfo) == 0) {
281           if (!forward_output(pipe_read.get(), output_fd.get(), tid)) {
282             async_safe_format_log(ANDROID_LOG_ERROR, "libc",
283                                   "timeout expired while waiting for thread %d to dump", tid);
284           }
285         } else {
286           async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to send trace signal to %d: %s",
287                                 tid, strerror(errno));
288         }
289 
290         // The thread should be finished now, so try and re-enable the fallback allocator.
291         if (!allocator.Enable()) {
292           return;
293         }
294 
295         // Regardless of whether the poll succeeds, check to see if the thread took fd ownership.
296         uint64_t post_wait = trace_output.exchange(pack_thread_fd(-1, -1));
297         if (post_wait != pack_thread_fd(-1, -1)) {
298           auto [tid, fd] = unpack_thread_fd(post_wait);
299           if (fd != -1) {
300             async_safe_format_log(ANDROID_LOG_ERROR, "libc", "closing fd %d for thread %d", fd, tid);
301             close(fd);
302           }
303         }
304       })) {
305     async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to open /proc/%d/task: %s",
306                           current_tid, strerror(errno));
307   }
308 
309   if (allocator.enabled()) {
310     dump_backtrace_footer(output_fd.get());
311   }
312 
313   tombstoned_notify_completion(tombstone_socket.get());
314 }
315 
crash_handler(siginfo_t * info,ucontext_t * ucontext,void * abort_message)316 static void crash_handler(siginfo_t* info, ucontext_t* ucontext, void* abort_message) {
317   // Only allow one thread to handle a crash at a time (this can happen multiple times without
318   // exit, since tombstones can be requested without a real crash happening.)
319   static std::recursive_mutex crash_mutex;
320   static int lock_count;
321 
322   crash_mutex.lock();
323   if (lock_count++ > 0) {
324     async_safe_format_log(ANDROID_LOG_ERROR, "libc", "recursed signal handler call, aborting");
325     signal(SIGABRT, SIG_DFL);
326     raise(SIGABRT);
327     sigset_t sigset;
328     sigemptyset(&sigset);
329     sigaddset(&sigset, SIGABRT);
330     sigprocmask(SIG_UNBLOCK, &sigset, nullptr);
331 
332     // Just in case...
333     async_safe_format_log(ANDROID_LOG_ERROR, "libc", "abort didn't exit, exiting");
334     _exit(1);
335   }
336 
337   unique_fd tombstone_socket, output_fd, proto_fd;
338   bool tombstoned_connected = tombstoned_connect(getpid(), &tombstone_socket, &output_fd, &proto_fd,
339                                                  kDebuggerdTombstoneProto);
340   {
341     ScopedUseFallbackAllocator allocator;
342     if (allocator.enabled()) {
343       engrave_tombstone_ucontext(output_fd.get(), proto_fd.get(),
344                                  reinterpret_cast<uintptr_t>(abort_message), info, ucontext);
345     }
346   }
347   if (tombstoned_connected) {
348     tombstoned_notify_completion(tombstone_socket.get());
349   }
350 
351   --lock_count;
352   crash_mutex.unlock();
353 }
354 
debuggerd_fallback_handler(siginfo_t * info,ucontext_t * ucontext,void * abort_message)355 extern "C" void debuggerd_fallback_handler(siginfo_t* info, ucontext_t* ucontext,
356                                            void* abort_message) {
357   if (info->si_signo == BIONIC_SIGNAL_DEBUGGER && info->si_value.sival_ptr != nullptr) {
358     return trace_handler(info, ucontext);
359   } else {
360     return crash_handler(info, ucontext, abort_message);
361   }
362 }
363