xref: /aosp_15_r20/bionic/libc/bionic/pthread_create.cpp (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <pthread.h>
30 
31 #include <errno.h>
32 #include <string.h>
33 #include <sys/auxv.h>
34 #include <sys/mman.h>
35 #include <sys/prctl.h>
36 #include <sys/random.h>
37 #include <unistd.h>
38 
39 #include "pthread_internal.h"
40 
41 #include <async_safe/log.h>
42 
43 #include "platform/bionic/macros.h"
44 #include "platform/bionic/mte.h"
45 #include "platform/bionic/page.h"
46 #include "private/ErrnoRestorer.h"
47 #include "private/ScopedRWLock.h"
48 #include "private/bionic_constants.h"
49 #include "private/bionic_defs.h"
50 #include "private/bionic_globals.h"
51 #include "private/bionic_ssp.h"
52 #include "private/bionic_systrace.h"
53 #include "private/bionic_tls.h"
54 
55 // x86 uses segment descriptors rather than a direct pointer to TLS.
56 #if defined(__i386__)
57 #include <asm/ldt.h>
58 void __init_user_desc(struct user_desc*, bool, void*);
59 #endif
60 
61 __attribute__((no_stack_protector))
__init_tcb_stack_guard(bionic_tcb * tcb)62 void __init_tcb_stack_guard(bionic_tcb* tcb) {
63   // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
64   tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
65 }
66 
__init_bionic_tls_ptrs(bionic_tcb * tcb,bionic_tls * tls)67 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
68   tcb->thread()->bionic_tcb = tcb;
69   tcb->thread()->bionic_tls = tls;
70   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
71 }
72 
73 // Allocate a temporary bionic_tls that the dynamic linker's main thread can
74 // use while it's loading the initial set of ELF modules.
__allocate_temp_bionic_tls()75 bionic_tls* __allocate_temp_bionic_tls() {
76   size_t allocation_size = __BIONIC_ALIGN(sizeof(bionic_tls), page_size());
77   void* allocation = mmap(nullptr, allocation_size,
78                           PROT_READ | PROT_WRITE,
79                           MAP_PRIVATE | MAP_ANONYMOUS,
80                           -1, 0);
81   if (allocation == MAP_FAILED) {
82     async_safe_fatal("failed to allocate bionic_tls: %m");
83   }
84   return static_cast<bionic_tls*>(allocation);
85 }
86 
__free_temp_bionic_tls(bionic_tls * tls)87 void __free_temp_bionic_tls(bionic_tls* tls) {
88   munmap(tls, __BIONIC_ALIGN(sizeof(bionic_tls), page_size()));
89 }
90 
__init_alternate_signal_stack(pthread_internal_t * thread)91 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
92   // Create and set an alternate signal stack.
93   int prot = PROT_READ | PROT_WRITE;
94 #ifdef __aarch64__
95   if (atomic_load(&__libc_memtag_stack)) {
96     prot |= PROT_MTE;
97   }
98 #endif
99   void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
100   if (stack_base != MAP_FAILED) {
101     // Create a guard to catch stack overflows in signal handlers.
102     if (mprotect(stack_base, PTHREAD_GUARD_SIZE, PROT_NONE) == -1) {
103       munmap(stack_base, SIGNAL_STACK_SIZE);
104       return;
105     }
106     stack_t ss;
107     ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PTHREAD_GUARD_SIZE;
108     ss.ss_size = SIGNAL_STACK_SIZE - PTHREAD_GUARD_SIZE;
109     ss.ss_flags = 0;
110     sigaltstack(&ss, nullptr);
111     thread->alternate_signal_stack = stack_base;
112 
113     // We can only use const static allocated string for mapped region name, as Android kernel
114     // uses the string pointer directly when dumping /proc/pid/maps.
115     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
116   }
117 }
118 
__init_shadow_call_stack(pthread_internal_t * thread __unused)119 static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
120 #if defined(__aarch64__) || defined(__riscv)
121   // Allocate the shadow call stack and its guard region.
122   char* scs_guard_region = reinterpret_cast<char*>(
123       mmap(nullptr, SCS_GUARD_REGION_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0));
124   if (scs_guard_region == MAP_FAILED) {
125     async_safe_fatal("failed to allocate shadow stack: %m");
126   }
127   thread->shadow_call_stack_guard_region = scs_guard_region;
128 
129   // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
130   // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
131   char* scs_aligned_guard_region =
132       reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
133 
134   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
135   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
136   // use arc4random_uniform in init because /dev/urandom might not have been created yet.
137   size_t scs_offset =
138       (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
139 
140   // Make the stack read-write, and store its address in the register we're using as the shadow
141   // stack pointer. This is deliberately the only place where the address is stored.
142   char* scs = scs_aligned_guard_region + scs_offset;
143   if (mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE) == -1) {
144     async_safe_fatal("shadow stack read-write mprotect(%p, %d) failed: %m", scs, SCS_SIZE);
145   }
146 #if defined(__aarch64__)
147   __asm__ __volatile__("mov x18, %0" ::"r"(scs));
148 #elif defined(__riscv)
149   __asm__ __volatile__("mv x3, %0" ::"r"(scs));
150 #endif
151 #endif
152 }
153 
__init_additional_stacks(pthread_internal_t * thread)154 void __init_additional_stacks(pthread_internal_t* thread) {
155   __init_alternate_signal_stack(thread);
156   __init_shadow_call_stack(thread);
157 }
158 
__init_thread(pthread_internal_t * thread)159 int __init_thread(pthread_internal_t* thread) {
160   thread->cleanup_stack = nullptr;
161 
162   ThreadJoinState state = THREAD_NOT_JOINED;
163   if (__predict_false((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0)) {
164     state = THREAD_DETACHED;
165   }
166   atomic_store_explicit(&thread->join_state, state, memory_order_relaxed);
167 
168   // Set the scheduling policy/priority of the thread if necessary.
169   bool need_set = true;
170   int policy;
171   sched_param param;
172   if ((thread->attr.flags & PTHREAD_ATTR_FLAG_INHERIT) != 0) {
173     // Unless the parent has SCHED_RESET_ON_FORK set, we've already inherited from the parent.
174     policy = sched_getscheduler(0);
175     need_set = ((policy & SCHED_RESET_ON_FORK) != 0);
176     if (need_set) {
177       if (policy == -1) {
178         async_safe_format_log(ANDROID_LOG_WARN, "libc",
179                               "pthread_create sched_getscheduler failed: %m");
180         return errno;
181       }
182       if (sched_getparam(0, &param) == -1) {
183         async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create sched_getparam failed: %m");
184         return errno;
185       }
186     }
187   } else {
188     policy = thread->attr.sched_policy;
189     param.sched_priority = thread->attr.sched_priority;
190   }
191   // Backwards compatibility: before P, Android didn't have pthread_attr_setinheritsched,
192   // and our behavior was neither of the POSIX behaviors.
193   if ((thread->attr.flags & (PTHREAD_ATTR_FLAG_INHERIT|PTHREAD_ATTR_FLAG_EXPLICIT)) == 0) {
194     need_set = (thread->attr.sched_policy != SCHED_NORMAL);
195   }
196   if (need_set) {
197     if (sched_setscheduler(thread->tid, policy, &param) == -1) {
198       async_safe_format_log(ANDROID_LOG_WARN, "libc",
199                             "pthread_create sched_setscheduler(%d, {%d}) call failed: %m", policy,
200                             param.sched_priority);
201 #if defined(__LP64__)
202       // For backwards compatibility reasons, we only report failures on 64-bit devices.
203       return errno;
204 #endif
205     }
206   }
207 
208   return 0;
209 }
210 
211 // Allocate a thread's primary mapping. This mapping includes static TLS and
212 // optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
213 // struct.
214 //
215 // The stack_guard_size must be a multiple of the page_size().
__allocate_thread_mapping(size_t stack_size,size_t stack_guard_size)216 ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
217   const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
218 
219   // Allocate in order: stack guard, stack, static TLS, guard page.
220   size_t mmap_size;
221   if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
222   if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
223   if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
224 
225   // Align the result to a page size.
226   const size_t unaligned_size = mmap_size;
227   mmap_size = __BIONIC_ALIGN(mmap_size, page_size());
228   if (mmap_size < unaligned_size) return {};
229 
230   // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
231   // read+write area in the middle.
232   const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
233   char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
234   if (space == MAP_FAILED) {
235     async_safe_format_log(ANDROID_LOG_WARN, "libc",
236                           "pthread_create failed: couldn't allocate %zu-bytes mapped space: %m",
237                           mmap_size);
238     return {};
239   }
240   const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
241   int prot = PROT_READ | PROT_WRITE;
242   const char* prot_str = "R+W";
243 #ifdef __aarch64__
244   if (atomic_load(&__libc_memtag_stack)) {
245     prot |= PROT_MTE;
246     prot_str = "R+W+MTE";
247   }
248 #endif
249   if (mprotect(space + stack_guard_size, writable_size, prot) != 0) {
250     async_safe_format_log(
251         ANDROID_LOG_WARN, "libc",
252         "pthread_create failed: couldn't mprotect %s %zu-byte thread mapping region: %m", prot_str,
253         writable_size);
254     munmap(space, mmap_size);
255     return {};
256   }
257 
258   ThreadMapping result = {};
259   result.mmap_base = space;
260   result.mmap_size = mmap_size;
261   result.mmap_base_unguarded = space + stack_guard_size;
262   result.mmap_size_unguarded = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
263   result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
264   result.stack_base = space;
265   result.stack_top = result.static_tls;
266   return result;
267 }
268 
__allocate_thread(pthread_attr_t * attr,bionic_tcb ** tcbp,void ** child_stack)269 static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
270   ThreadMapping mapping;
271   char* stack_top;
272   bool stack_clean = false;
273 
274   if (attr->stack_base == nullptr) {
275     // The caller didn't provide a stack, so allocate one.
276 
277     // Make sure the guard size is a multiple of page_size().
278     const size_t unaligned_guard_size = attr->guard_size;
279     attr->guard_size = __BIONIC_ALIGN(attr->guard_size, page_size());
280     if (attr->guard_size < unaligned_guard_size) return EAGAIN;
281 
282     mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
283     if (mapping.mmap_base == nullptr) return EAGAIN;
284 
285     stack_top = mapping.stack_top;
286     attr->stack_base = mapping.stack_base;
287     stack_clean = true;
288   } else {
289     mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
290     if (mapping.mmap_base == nullptr) return EAGAIN;
291 
292     stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
293   }
294 
295   // Carve out space from the stack for the thread's pthread_internal_t. This
296   // memory isn't counted in pthread_attr_getstacksize.
297 
298   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
299   stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
300 
301   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
302   if (!stack_clean) {
303     // If thread was not allocated by mmap(), it may not have been cleared to zero.
304     // So assume the worst and zero it.
305     memset(thread, 0, sizeof(pthread_internal_t));
306   }
307 
308   // Locate static TLS structures within the mapped region.
309   const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
310   auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
311   auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
312 
313   // Initialize TLS memory.
314   __init_static_tls(mapping.static_tls);
315   __init_tcb(tcb, thread);
316   __init_tcb_dtv(tcb);
317   __init_tcb_stack_guard(tcb);
318   __init_bionic_tls_ptrs(tcb, tls);
319 
320   attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
321   thread->attr = *attr;
322   thread->mmap_base = mapping.mmap_base;
323   thread->mmap_size = mapping.mmap_size;
324   thread->mmap_base_unguarded = mapping.mmap_base_unguarded;
325   thread->mmap_size_unguarded = mapping.mmap_size_unguarded;
326   thread->stack_top = reinterpret_cast<uintptr_t>(stack_top);
327 
328   *tcbp = tcb;
329   *child_stack = stack_top;
330   return 0;
331 }
332 
__set_stack_and_tls_vma_name(bool is_main_thread)333 void __set_stack_and_tls_vma_name(bool is_main_thread) {
334   // Name the thread's stack-and-tls area to help with debugging. This mapped area also includes
335   // static TLS data, which is typically a few pages (e.g. bionic_tls).
336   pthread_internal_t* thread = __get_thread();
337   const char* name;
338   if (is_main_thread) {
339     name = "stack_and_tls:main";
340   } else {
341     // The kernel doesn't copy the name string, but this variable will last at least as long as the
342     // mapped area. The mapped area's VMAs are unmapped with a single call to munmap.
343     auto& name_buffer = thread->vma_name_buffer;
344     static_assert(arraysize(name_buffer) >= arraysize("stack_and_tls:") + 11 + 1);
345     async_safe_format_buffer(name_buffer, arraysize(name_buffer), "stack_and_tls:%d", thread->tid);
346     name = name_buffer;
347   }
348   prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, thread->mmap_base_unguarded, thread->mmap_size_unguarded,
349         name);
350 }
351 
352 extern "C" int __rt_sigprocmask(int, const sigset64_t*, sigset64_t*, size_t);
353 
354 __attribute__((no_sanitize("hwaddress", "memtag")))
355 #if defined(__aarch64__)
356 // This function doesn't return, but it does appear in stack traces. Avoid using return PAC in this
357 // function because we may end up resetting IA, which may confuse unwinders due to mismatching keys.
358 __attribute__((target("branch-protection=bti")))
359 #endif
360 static int
__pthread_start(void * arg)361 __pthread_start(void* arg) {
362   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
363 #if defined(__aarch64__)
364   if (thread->should_allocate_stack_mte_ringbuffer) {
365     thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
366   }
367 #endif
368   __hwasan_thread_enter();
369 
370   // Wait for our creating thread to release us. This lets it have time to
371   // notify gdb about this thread before we start doing anything.
372   // This also provides the memory barrier needed to ensure that all memory
373   // accesses previously made by the creating thread are visible to us.
374   thread->startup_handshake_lock.lock();
375 
376   __set_stack_and_tls_vma_name(false);
377   __init_additional_stacks(thread);
378   __rt_sigprocmask(SIG_SETMASK, &thread->start_mask, nullptr, sizeof(thread->start_mask));
379 #if defined(__aarch64__)
380   // Chrome's sandbox prevents this prctl, so only reset IA if the target SDK level is high enough.
381   // Furthermore, processes loaded from vendor partitions may have their own sandboxes that would
382   // reject the prctl. Because no devices launched with PAC enabled before API level 31, we can
383   // avoid issues on upgrading devices by checking for PAC support before issuing the prctl.
384   static const bool pac_supported = getauxval(AT_HWCAP) & HWCAP_PACA;
385   if (pac_supported && android_get_application_target_sdk_version() >= 31) {
386     prctl(PR_PAC_RESET_KEYS, PR_PAC_APIAKEY, 0, 0, 0);
387   }
388 #endif
389 
390   void* result = thread->start_routine(thread->start_routine_arg);
391   pthread_exit(result);
392 
393   return 0;
394 }
395 
396 // A no-op start routine for pthread_create failures where we've created a thread but aren't
397 // going to run user code on it. We swap out the user's start routine for this and take advantage
398 // of the regular thread teardown to free up resources.
__do_nothing(void *)399 static void* __do_nothing(void*) {
400   return nullptr;
401 }
402 
403 pthread_rwlock_t g_thread_creation_lock = PTHREAD_RWLOCK_INITIALIZER;
404 
405 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
pthread_create(pthread_t * thread_out,pthread_attr_t const * attr,void * (* start_routine)(void *),void * arg)406 int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
407                    void* (*start_routine)(void*), void* arg) {
408   ErrnoRestorer errno_restorer;
409 
410   pthread_attr_t thread_attr;
411   ScopedTrace trace("pthread_create");
412   if (attr == nullptr) {
413     pthread_attr_init(&thread_attr);
414   } else {
415     thread_attr = *attr;
416     attr = nullptr; // Prevent misuse below.
417   }
418 
419   bionic_tcb* tcb = nullptr;
420   void* child_stack = nullptr;
421   int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
422   if (result != 0) {
423     return result;
424   }
425 
426   pthread_internal_t* thread = tcb->thread();
427 
428   // Create a lock for the thread to wait on once it starts so we can keep
429   // it from doing anything until after we notify the debugger about it
430   //
431   // This also provides the memory barrier we need to ensure that all
432   // memory accesses previously performed by this thread are visible to
433   // the new thread.
434   thread->startup_handshake_lock.init(false);
435   thread->startup_handshake_lock.lock();
436 
437   thread->start_routine = start_routine;
438   thread->start_routine_arg = arg;
439 
440   thread->set_cached_pid(getpid());
441 
442   int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
443       CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
444   void* tls = &tcb->tls_slot(0);
445 #if defined(__i386__)
446   // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
447   // a pointer to the TLS itself.
448   user_desc tls_descriptor;
449   __init_user_desc(&tls_descriptor, false, tls);
450   tls = &tls_descriptor;
451 #endif
452 
453   ScopedReadLock locker(&g_thread_creation_lock);
454 
455 // This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
456 // __pthread_internal_remap_stack_with_mte.
457 #ifdef __aarch64__
458   thread->should_allocate_stack_mte_ringbuffer = __libc_memtag_stack_abi;
459 #else
460   thread->should_allocate_stack_mte_ringbuffer = false;
461 #endif
462 
463   sigset64_t block_all_mask;
464   sigfillset64(&block_all_mask);
465   __rt_sigprocmask(SIG_SETMASK, &block_all_mask, &thread->start_mask, sizeof(thread->start_mask));
466   int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
467   __rt_sigprocmask(SIG_SETMASK, &thread->start_mask, nullptr, sizeof(thread->start_mask));
468   if (rc == -1) {
469     int clone_errno = errno;
470     // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
471     // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
472     // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
473     thread->startup_handshake_lock.unlock();
474     if (thread->mmap_size != 0) {
475       munmap(thread->mmap_base, thread->mmap_size);
476     }
477     async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %m");
478     return clone_errno;
479   }
480 
481   int init_errno = __init_thread(thread);
482   if (init_errno != 0) {
483     // Mark the thread detached and replace its start_routine with a no-op.
484     // Letting the thread run is the easiest way to clean up its resources.
485     atomic_store(&thread->join_state, THREAD_DETACHED);
486     __pthread_internal_add(thread);
487     thread->start_routine = __do_nothing;
488     thread->startup_handshake_lock.unlock();
489     return init_errno;
490   }
491 
492   // Publish the pthread_t and unlock the mutex to let the new thread start running.
493   *thread_out = __pthread_internal_add(thread);
494   thread->startup_handshake_lock.unlock();
495 
496   return 0;
497 }
498