xref: /aosp_15_r20/external/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h (revision 7c3d14c8b49c529e04be81a3ce6f5cc23712e4c6)
1*7c3d14c8STreehugger Robot //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2*7c3d14c8STreehugger Robot //
3*7c3d14c8STreehugger Robot //                     The LLVM Compiler Infrastructure
4*7c3d14c8STreehugger Robot //
5*7c3d14c8STreehugger Robot // This file is distributed under the University of Illinois Open Source
6*7c3d14c8STreehugger Robot // License. See LICENSE.TXT for details.
7*7c3d14c8STreehugger Robot //
8*7c3d14c8STreehugger Robot //===----------------------------------------------------------------------===//
9*7c3d14c8STreehugger Robot //
10*7c3d14c8STreehugger Robot // Symbolizer is used by sanitizers to map instruction address to a location in
11*7c3d14c8STreehugger Robot // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
12*7c3d14c8STreehugger Robot // defined in the program, or (if they are missing) tries to find and
13*7c3d14c8STreehugger Robot // launch "llvm-symbolizer" commandline tool in a separate process and
14*7c3d14c8STreehugger Robot // communicate with it.
15*7c3d14c8STreehugger Robot //
16*7c3d14c8STreehugger Robot // Generally we should try to avoid calling system library functions during
17*7c3d14c8STreehugger Robot // symbolization (and use their replacements from sanitizer_libc.h instead).
18*7c3d14c8STreehugger Robot //===----------------------------------------------------------------------===//
19*7c3d14c8STreehugger Robot #ifndef SANITIZER_SYMBOLIZER_H
20*7c3d14c8STreehugger Robot #define SANITIZER_SYMBOLIZER_H
21*7c3d14c8STreehugger Robot 
22*7c3d14c8STreehugger Robot #include "sanitizer_common.h"
23*7c3d14c8STreehugger Robot #include "sanitizer_mutex.h"
24*7c3d14c8STreehugger Robot 
25*7c3d14c8STreehugger Robot namespace __sanitizer {
26*7c3d14c8STreehugger Robot 
27*7c3d14c8STreehugger Robot struct AddressInfo {
28*7c3d14c8STreehugger Robot   // Owns all the string members. Storage for them is
29*7c3d14c8STreehugger Robot   // (de)allocated using sanitizer internal allocator.
30*7c3d14c8STreehugger Robot   uptr address;
31*7c3d14c8STreehugger Robot 
32*7c3d14c8STreehugger Robot   char *module;
33*7c3d14c8STreehugger Robot   uptr module_offset;
34*7c3d14c8STreehugger Robot 
35*7c3d14c8STreehugger Robot   static const uptr kUnknown = ~(uptr)0;
36*7c3d14c8STreehugger Robot   char *function;
37*7c3d14c8STreehugger Robot   uptr function_offset;
38*7c3d14c8STreehugger Robot 
39*7c3d14c8STreehugger Robot   char *file;
40*7c3d14c8STreehugger Robot   int line;
41*7c3d14c8STreehugger Robot   int column;
42*7c3d14c8STreehugger Robot 
43*7c3d14c8STreehugger Robot   AddressInfo();
44*7c3d14c8STreehugger Robot   // Deletes all strings and resets all fields.
45*7c3d14c8STreehugger Robot   void Clear();
46*7c3d14c8STreehugger Robot   void FillModuleInfo(const char *mod_name, uptr mod_offset);
47*7c3d14c8STreehugger Robot };
48*7c3d14c8STreehugger Robot 
49*7c3d14c8STreehugger Robot // Linked list of symbolized frames (each frame is described by AddressInfo).
50*7c3d14c8STreehugger Robot struct SymbolizedStack {
51*7c3d14c8STreehugger Robot   SymbolizedStack *next;
52*7c3d14c8STreehugger Robot   AddressInfo info;
53*7c3d14c8STreehugger Robot   static SymbolizedStack *New(uptr addr);
54*7c3d14c8STreehugger Robot   // Deletes current, and all subsequent frames in the linked list.
55*7c3d14c8STreehugger Robot   // The object cannot be accessed after the call to this function.
56*7c3d14c8STreehugger Robot   void ClearAll();
57*7c3d14c8STreehugger Robot 
58*7c3d14c8STreehugger Robot  private:
59*7c3d14c8STreehugger Robot   SymbolizedStack();
60*7c3d14c8STreehugger Robot };
61*7c3d14c8STreehugger Robot 
62*7c3d14c8STreehugger Robot // For now, DataInfo is used to describe global variable.
63*7c3d14c8STreehugger Robot struct DataInfo {
64*7c3d14c8STreehugger Robot   // Owns all the string members. Storage for them is
65*7c3d14c8STreehugger Robot   // (de)allocated using sanitizer internal allocator.
66*7c3d14c8STreehugger Robot   char *module;
67*7c3d14c8STreehugger Robot   uptr module_offset;
68*7c3d14c8STreehugger Robot   char *file;
69*7c3d14c8STreehugger Robot   uptr line;
70*7c3d14c8STreehugger Robot   char *name;
71*7c3d14c8STreehugger Robot   uptr start;
72*7c3d14c8STreehugger Robot   uptr size;
73*7c3d14c8STreehugger Robot 
74*7c3d14c8STreehugger Robot   DataInfo();
75*7c3d14c8STreehugger Robot   void Clear();
76*7c3d14c8STreehugger Robot };
77*7c3d14c8STreehugger Robot 
78*7c3d14c8STreehugger Robot class SymbolizerTool;
79*7c3d14c8STreehugger Robot 
80*7c3d14c8STreehugger Robot class Symbolizer final {
81*7c3d14c8STreehugger Robot  public:
82*7c3d14c8STreehugger Robot   /// Initialize and return platform-specific implementation of symbolizer
83*7c3d14c8STreehugger Robot   /// (if it wasn't already initialized).
84*7c3d14c8STreehugger Robot   static Symbolizer *GetOrInit();
85*7c3d14c8STreehugger Robot   static void LateInitialize();
86*7c3d14c8STreehugger Robot   // Returns a list of symbolized frames for a given address (containing
87*7c3d14c8STreehugger Robot   // all inlined functions, if necessary).
88*7c3d14c8STreehugger Robot   SymbolizedStack *SymbolizePC(uptr address);
89*7c3d14c8STreehugger Robot   bool SymbolizeData(uptr address, DataInfo *info);
90*7c3d14c8STreehugger Robot 
91*7c3d14c8STreehugger Robot   // The module names Symbolizer returns are stable and unique for every given
92*7c3d14c8STreehugger Robot   // module.  It is safe to store and compare them as pointers.
93*7c3d14c8STreehugger Robot   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
94*7c3d14c8STreehugger Robot                                    uptr *module_address);
GetModuleNameForPc(uptr pc)95*7c3d14c8STreehugger Robot   const char *GetModuleNameForPc(uptr pc) {
96*7c3d14c8STreehugger Robot     const char *module_name = nullptr;
97*7c3d14c8STreehugger Robot     uptr unused;
98*7c3d14c8STreehugger Robot     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
99*7c3d14c8STreehugger Robot       return module_name;
100*7c3d14c8STreehugger Robot     return nullptr;
101*7c3d14c8STreehugger Robot   }
102*7c3d14c8STreehugger Robot 
103*7c3d14c8STreehugger Robot   // Release internal caches (if any).
104*7c3d14c8STreehugger Robot   void Flush();
105*7c3d14c8STreehugger Robot   // Attempts to demangle the provided C++ mangled name.
106*7c3d14c8STreehugger Robot   const char *Demangle(const char *name);
107*7c3d14c8STreehugger Robot   void PrepareForSandboxing();
108*7c3d14c8STreehugger Robot 
109*7c3d14c8STreehugger Robot   // Allow user to install hooks that would be called before/after Symbolizer
110*7c3d14c8STreehugger Robot   // does the actual file/line info fetching. Specific sanitizers may need this
111*7c3d14c8STreehugger Robot   // to distinguish system library calls made in user code from calls made
112*7c3d14c8STreehugger Robot   // during in-process symbolization.
113*7c3d14c8STreehugger Robot   typedef void (*StartSymbolizationHook)();
114*7c3d14c8STreehugger Robot   typedef void (*EndSymbolizationHook)();
115*7c3d14c8STreehugger Robot   // May be called at most once.
116*7c3d14c8STreehugger Robot   void AddHooks(StartSymbolizationHook start_hook,
117*7c3d14c8STreehugger Robot                 EndSymbolizationHook end_hook);
118*7c3d14c8STreehugger Robot 
119*7c3d14c8STreehugger Robot   const LoadedModule *FindModuleForAddress(uptr address);
120*7c3d14c8STreehugger Robot 
121*7c3d14c8STreehugger Robot  private:
122*7c3d14c8STreehugger Robot   // GetModuleNameAndOffsetForPC has to return a string to the caller.
123*7c3d14c8STreehugger Robot   // Since the corresponding module might get unloaded later, we should create
124*7c3d14c8STreehugger Robot   // our owned copies of the strings that we can safely return.
125*7c3d14c8STreehugger Robot   // ModuleNameOwner does not provide any synchronization, thus calls to
126*7c3d14c8STreehugger Robot   // its method should be protected by |mu_|.
127*7c3d14c8STreehugger Robot   class ModuleNameOwner {
128*7c3d14c8STreehugger Robot    public:
ModuleNameOwner(BlockingMutex * synchronized_by)129*7c3d14c8STreehugger Robot     explicit ModuleNameOwner(BlockingMutex *synchronized_by)
130*7c3d14c8STreehugger Robot         : storage_(kInitialCapacity), last_match_(nullptr),
131*7c3d14c8STreehugger Robot           mu_(synchronized_by) {}
132*7c3d14c8STreehugger Robot     const char *GetOwnedCopy(const char *str);
133*7c3d14c8STreehugger Robot 
134*7c3d14c8STreehugger Robot    private:
135*7c3d14c8STreehugger Robot     static const uptr kInitialCapacity = 1000;
136*7c3d14c8STreehugger Robot     InternalMmapVector<const char*> storage_;
137*7c3d14c8STreehugger Robot     const char *last_match_;
138*7c3d14c8STreehugger Robot 
139*7c3d14c8STreehugger Robot     BlockingMutex *mu_;
140*7c3d14c8STreehugger Robot   } module_names_;
141*7c3d14c8STreehugger Robot 
142*7c3d14c8STreehugger Robot   /// Platform-specific function for creating a Symbolizer object.
143*7c3d14c8STreehugger Robot   static Symbolizer *PlatformInit();
144*7c3d14c8STreehugger Robot 
145*7c3d14c8STreehugger Robot   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
146*7c3d14c8STreehugger Robot                                          uptr *module_offset);
147*7c3d14c8STreehugger Robot   ListOfModules modules_;
148*7c3d14c8STreehugger Robot   // If stale, need to reload the modules before looking up addresses.
149*7c3d14c8STreehugger Robot   bool modules_fresh_;
150*7c3d14c8STreehugger Robot 
151*7c3d14c8STreehugger Robot   // Platform-specific default demangler, must not return nullptr.
152*7c3d14c8STreehugger Robot   const char *PlatformDemangle(const char *name);
153*7c3d14c8STreehugger Robot   void PlatformPrepareForSandboxing();
154*7c3d14c8STreehugger Robot 
155*7c3d14c8STreehugger Robot   static Symbolizer *symbolizer_;
156*7c3d14c8STreehugger Robot   static StaticSpinMutex init_mu_;
157*7c3d14c8STreehugger Robot 
158*7c3d14c8STreehugger Robot   // Mutex locked from public methods of |Symbolizer|, so that the internals
159*7c3d14c8STreehugger Robot   // (including individual symbolizer tools and platform-specific methods) are
160*7c3d14c8STreehugger Robot   // always synchronized.
161*7c3d14c8STreehugger Robot   BlockingMutex mu_;
162*7c3d14c8STreehugger Robot 
163*7c3d14c8STreehugger Robot   IntrusiveList<SymbolizerTool> tools_;
164*7c3d14c8STreehugger Robot 
165*7c3d14c8STreehugger Robot   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
166*7c3d14c8STreehugger Robot 
167*7c3d14c8STreehugger Robot   static LowLevelAllocator symbolizer_allocator_;
168*7c3d14c8STreehugger Robot 
169*7c3d14c8STreehugger Robot   StartSymbolizationHook start_hook_;
170*7c3d14c8STreehugger Robot   EndSymbolizationHook end_hook_;
171*7c3d14c8STreehugger Robot   class SymbolizerScope {
172*7c3d14c8STreehugger Robot    public:
173*7c3d14c8STreehugger Robot     explicit SymbolizerScope(const Symbolizer *sym);
174*7c3d14c8STreehugger Robot     ~SymbolizerScope();
175*7c3d14c8STreehugger Robot    private:
176*7c3d14c8STreehugger Robot     const Symbolizer *sym_;
177*7c3d14c8STreehugger Robot   };
178*7c3d14c8STreehugger Robot };
179*7c3d14c8STreehugger Robot 
180*7c3d14c8STreehugger Robot }  // namespace __sanitizer
181*7c3d14c8STreehugger Robot 
182*7c3d14c8STreehugger Robot #endif  // SANITIZER_SYMBOLIZER_H
183