xref: /aosp_15_r20/external/perfetto/src/kallsyms/kernel_symbol_map.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1*6dbdd20aSAndroid Build Coastguard Worker /*
2*6dbdd20aSAndroid Build Coastguard Worker  * Copyright (C) 2019 The Android Open Source Project
3*6dbdd20aSAndroid Build Coastguard Worker  *
4*6dbdd20aSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*6dbdd20aSAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*6dbdd20aSAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*6dbdd20aSAndroid Build Coastguard Worker  *
8*6dbdd20aSAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*6dbdd20aSAndroid Build Coastguard Worker  *
10*6dbdd20aSAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*6dbdd20aSAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*6dbdd20aSAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6dbdd20aSAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*6dbdd20aSAndroid Build Coastguard Worker  * limitations under the License.
15*6dbdd20aSAndroid Build Coastguard Worker  */
16*6dbdd20aSAndroid Build Coastguard Worker 
17*6dbdd20aSAndroid Build Coastguard Worker #include "src/kallsyms/kernel_symbol_map.h"
18*6dbdd20aSAndroid Build Coastguard Worker 
19*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/base/build_config.h"
20*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/base/logging.h"
21*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/file_utils.h"
22*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/metatrace.h"
23*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/paged_memory.h"
24*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/scoped_file.h"
25*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/string_view.h"
26*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/ext/base/utils.h"
27*6dbdd20aSAndroid Build Coastguard Worker #include "perfetto/protozero/proto_utils.h"
28*6dbdd20aSAndroid Build Coastguard Worker 
29*6dbdd20aSAndroid Build Coastguard Worker #include <stdio.h>
30*6dbdd20aSAndroid Build Coastguard Worker 
31*6dbdd20aSAndroid Build Coastguard Worker #include <algorithm>
32*6dbdd20aSAndroid Build Coastguard Worker #include <cinttypes>
33*6dbdd20aSAndroid Build Coastguard Worker #include <functional>
34*6dbdd20aSAndroid Build Coastguard Worker #include <map>
35*6dbdd20aSAndroid Build Coastguard Worker #include <unordered_map>
36*6dbdd20aSAndroid Build Coastguard Worker #include <utility>
37*6dbdd20aSAndroid Build Coastguard Worker 
38*6dbdd20aSAndroid Build Coastguard Worker namespace perfetto {
39*6dbdd20aSAndroid Build Coastguard Worker 
40*6dbdd20aSAndroid Build Coastguard Worker // On a Pixel 3 this gives an avg. lookup time of 600 ns and a memory usage
41*6dbdd20aSAndroid Build Coastguard Worker // of 1.1 MB for 65k symbols. See go/kallsyms-parser-bench.
42*6dbdd20aSAndroid Build Coastguard Worker size_t KernelSymbolMap::kSymIndexSampling = 16;
43*6dbdd20aSAndroid Build Coastguard Worker size_t KernelSymbolMap::kTokenIndexSampling = 4;
44*6dbdd20aSAndroid Build Coastguard Worker 
45*6dbdd20aSAndroid Build Coastguard Worker namespace {
46*6dbdd20aSAndroid Build Coastguard Worker 
47*6dbdd20aSAndroid Build Coastguard Worker using TokenId = KernelSymbolMap::TokenTable::TokenId;
48*6dbdd20aSAndroid Build Coastguard Worker // TODO(rsavitski): the ToT kernel can supposedly contain symbols >255 bytes in
49*6dbdd20aSAndroid Build Coastguard Worker // length (in particular due to Rust). Consider bumping this.
50*6dbdd20aSAndroid Build Coastguard Worker // https://github.com/torvalds/linux/commit/73bbb94
51*6dbdd20aSAndroid Build Coastguard Worker constexpr size_t kSymNameMaxLen = 128;
52*6dbdd20aSAndroid Build Coastguard Worker constexpr size_t kSymMaxSizeBytes = 1024 * 1024;
53*6dbdd20aSAndroid Build Coastguard Worker 
54*6dbdd20aSAndroid Build Coastguard Worker // Reads a kallsyms file in blocks of 4 pages each and decode its lines using
55*6dbdd20aSAndroid Build Coastguard Worker // a simple FSM. Calls the passed lambda for each valid symbol.
56*6dbdd20aSAndroid Build Coastguard Worker // It skips undefined symbols and other useless stuff.
57*6dbdd20aSAndroid Build Coastguard Worker template <typename Lambda /* void(uint64_t, char, base::StringView) */>
ForEachSym(const std::string & kallsyms_path,Lambda fn)58*6dbdd20aSAndroid Build Coastguard Worker void ForEachSym(const std::string& kallsyms_path, Lambda fn) {
59*6dbdd20aSAndroid Build Coastguard Worker   base::ScopedFile fd = base::OpenFile(kallsyms_path.c_str(), O_RDONLY);
60*6dbdd20aSAndroid Build Coastguard Worker   if (!fd) {
61*6dbdd20aSAndroid Build Coastguard Worker     PERFETTO_PLOG("Cannot open %s", kallsyms_path.c_str());
62*6dbdd20aSAndroid Build Coastguard Worker     return;
63*6dbdd20aSAndroid Build Coastguard Worker   }
64*6dbdd20aSAndroid Build Coastguard Worker 
65*6dbdd20aSAndroid Build Coastguard Worker   // /proc/kallsyms looks as follows:
66*6dbdd20aSAndroid Build Coastguard Worker   // 0000000000026a80 A bpf_trace_sds
67*6dbdd20aSAndroid Build Coastguard Worker   //
68*6dbdd20aSAndroid Build Coastguard Worker   // ffffffffc03a6000 T cpufreq_gov_powersave_init<TAB> [cpufreq_powersave]
69*6dbdd20aSAndroid Build Coastguard Worker   // ffffffffc035d000 T cpufreq_gov_userspace_init<TAB> [cpufreq_userspace]
70*6dbdd20aSAndroid Build Coastguard Worker   //
71*6dbdd20aSAndroid Build Coastguard Worker   // We parse it with a state machine that has four states, one for each column.
72*6dbdd20aSAndroid Build Coastguard Worker   // We don't care about the part in the square brackets and ignore everything
73*6dbdd20aSAndroid Build Coastguard Worker   // after the symbol name.
74*6dbdd20aSAndroid Build Coastguard Worker 
75*6dbdd20aSAndroid Build Coastguard Worker   static constexpr size_t kBufSize = 16 * 1024;
76*6dbdd20aSAndroid Build Coastguard Worker   base::PagedMemory buffer = base::PagedMemory::Allocate(kBufSize);
77*6dbdd20aSAndroid Build Coastguard Worker   enum { kSymAddr, kSymType, kSymName, kEatRestOfLine } state = kSymAddr;
78*6dbdd20aSAndroid Build Coastguard Worker   uint64_t sym_addr = 0;
79*6dbdd20aSAndroid Build Coastguard Worker   char sym_type = '\0';
80*6dbdd20aSAndroid Build Coastguard Worker   char sym_name[kSymNameMaxLen + 1];
81*6dbdd20aSAndroid Build Coastguard Worker   size_t sym_name_len = 0;
82*6dbdd20aSAndroid Build Coastguard Worker   for (;;) {
83*6dbdd20aSAndroid Build Coastguard Worker     char* buf = static_cast<char*>(buffer.Get());
84*6dbdd20aSAndroid Build Coastguard Worker     auto rsize = base::Read(*fd, buf, kBufSize);
85*6dbdd20aSAndroid Build Coastguard Worker     if (rsize < 0) {
86*6dbdd20aSAndroid Build Coastguard Worker       PERFETTO_PLOG("read(%s) failed", kallsyms_path.c_str());
87*6dbdd20aSAndroid Build Coastguard Worker       return;
88*6dbdd20aSAndroid Build Coastguard Worker     }
89*6dbdd20aSAndroid Build Coastguard Worker     if (rsize == 0)
90*6dbdd20aSAndroid Build Coastguard Worker       return;  // EOF
91*6dbdd20aSAndroid Build Coastguard Worker     for (size_t i = 0; i < static_cast<size_t>(rsize); i++) {
92*6dbdd20aSAndroid Build Coastguard Worker       char c = buf[i];
93*6dbdd20aSAndroid Build Coastguard Worker       const bool is_space = c == ' ' || c == '\t';
94*6dbdd20aSAndroid Build Coastguard Worker       switch (state) {
95*6dbdd20aSAndroid Build Coastguard Worker         case kSymAddr:
96*6dbdd20aSAndroid Build Coastguard Worker           if (c >= '0' && c <= '9') {
97*6dbdd20aSAndroid Build Coastguard Worker             sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - '0');
98*6dbdd20aSAndroid Build Coastguard Worker           } else if (c >= 'a' && c <= 'f') {
99*6dbdd20aSAndroid Build Coastguard Worker             sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - 'a' + 10);
100*6dbdd20aSAndroid Build Coastguard Worker           } else if (is_space) {
101*6dbdd20aSAndroid Build Coastguard Worker             state = kSymType;
102*6dbdd20aSAndroid Build Coastguard Worker           } else if (c == '\0') {
103*6dbdd20aSAndroid Build Coastguard Worker             return;
104*6dbdd20aSAndroid Build Coastguard Worker           } else {
105*6dbdd20aSAndroid Build Coastguard Worker             PERFETTO_ELOG("kallsyms parser error: chr 0x%x @ off=%zu", c, i);
106*6dbdd20aSAndroid Build Coastguard Worker             return;
107*6dbdd20aSAndroid Build Coastguard Worker           }
108*6dbdd20aSAndroid Build Coastguard Worker           break;
109*6dbdd20aSAndroid Build Coastguard Worker 
110*6dbdd20aSAndroid Build Coastguard Worker         case kSymType:
111*6dbdd20aSAndroid Build Coastguard Worker           if (is_space)
112*6dbdd20aSAndroid Build Coastguard Worker             break;  // Eat leading spaces.
113*6dbdd20aSAndroid Build Coastguard Worker           sym_type = c;
114*6dbdd20aSAndroid Build Coastguard Worker           state = kSymName;
115*6dbdd20aSAndroid Build Coastguard Worker           sym_name_len = 0;
116*6dbdd20aSAndroid Build Coastguard Worker           break;
117*6dbdd20aSAndroid Build Coastguard Worker 
118*6dbdd20aSAndroid Build Coastguard Worker         case kSymName:
119*6dbdd20aSAndroid Build Coastguard Worker           if (is_space && sym_name_len == 0)
120*6dbdd20aSAndroid Build Coastguard Worker             break;  // Eat leading spaces.
121*6dbdd20aSAndroid Build Coastguard Worker           if (c && c != '\n' && !is_space && sym_name_len < kSymNameMaxLen) {
122*6dbdd20aSAndroid Build Coastguard Worker             sym_name[sym_name_len++] = c;
123*6dbdd20aSAndroid Build Coastguard Worker             break;
124*6dbdd20aSAndroid Build Coastguard Worker           }
125*6dbdd20aSAndroid Build Coastguard Worker           fn(sym_addr, sym_type, base::StringView(sym_name, sym_name_len));
126*6dbdd20aSAndroid Build Coastguard Worker           sym_addr = 0;
127*6dbdd20aSAndroid Build Coastguard Worker           sym_type = '\0';
128*6dbdd20aSAndroid Build Coastguard Worker           state = c == '\n' ? kSymAddr : kEatRestOfLine;
129*6dbdd20aSAndroid Build Coastguard Worker           break;
130*6dbdd20aSAndroid Build Coastguard Worker 
131*6dbdd20aSAndroid Build Coastguard Worker         case kEatRestOfLine:
132*6dbdd20aSAndroid Build Coastguard Worker           if (c == '\n')
133*6dbdd20aSAndroid Build Coastguard Worker             state = kSymAddr;
134*6dbdd20aSAndroid Build Coastguard Worker           break;
135*6dbdd20aSAndroid Build Coastguard Worker       }  // switch(state)
136*6dbdd20aSAndroid Build Coastguard Worker     }    // for (char in buf)
137*6dbdd20aSAndroid Build Coastguard Worker   }      // for (read chunk)
138*6dbdd20aSAndroid Build Coastguard Worker }
139*6dbdd20aSAndroid Build Coastguard Worker 
140*6dbdd20aSAndroid Build Coastguard Worker // Splits a symbol name into tokens using '_' as a separator, calling the passed
141*6dbdd20aSAndroid Build Coastguard Worker // lambda for each token. It splits tokens in a way that allows the original
142*6dbdd20aSAndroid Build Coastguard Worker // string to be rebuilt as-is by re-joining using a '_' between each token.
143*6dbdd20aSAndroid Build Coastguard Worker // For instance:
144*6dbdd20aSAndroid Build Coastguard Worker // _fo_a_b      ->  ["", fo, a, b]
145*6dbdd20aSAndroid Build Coastguard Worker // __fo_a_b     ->  [_, fo, a, b]
146*6dbdd20aSAndroid Build Coastguard Worker // __fo_a_b_    ->  [_, fo, a, b, ""]
147*6dbdd20aSAndroid Build Coastguard Worker // __fo_a_b____ ->  [_, fo, a, b, ___]
148*6dbdd20aSAndroid Build Coastguard Worker template <typename Lambda /* void(base::StringView) */>
Tokenize(const base::StringView name,Lambda fn)149*6dbdd20aSAndroid Build Coastguard Worker void Tokenize(const base::StringView name, Lambda fn) {
150*6dbdd20aSAndroid Build Coastguard Worker   size_t tok_start = 0;
151*6dbdd20aSAndroid Build Coastguard Worker   bool tok_is_sep = !name.empty() && name.at(tok_start) == '_';
152*6dbdd20aSAndroid Build Coastguard Worker   for (size_t i = 0; i < name.size(); i++) {
153*6dbdd20aSAndroid Build Coastguard Worker     char c = name.at(i);
154*6dbdd20aSAndroid Build Coastguard Worker     // Scan until either the end of string or the next character (which is a
155*6dbdd20aSAndroid Build Coastguard Worker     // '_' in nominal cases, or anything != '_' for tokens made by 1+ '_').
156*6dbdd20aSAndroid Build Coastguard Worker     if (!tok_is_sep && c == '_') {
157*6dbdd20aSAndroid Build Coastguard Worker       fn(name.substr(tok_start, i - tok_start));
158*6dbdd20aSAndroid Build Coastguard Worker       tok_start = i + 1;
159*6dbdd20aSAndroid Build Coastguard Worker       if (tok_start < name.size()) {
160*6dbdd20aSAndroid Build Coastguard Worker         tok_is_sep = name.at(tok_start) == '_';
161*6dbdd20aSAndroid Build Coastguard Worker       }
162*6dbdd20aSAndroid Build Coastguard Worker     } else if (tok_is_sep && c != '_') {
163*6dbdd20aSAndroid Build Coastguard Worker       fn(name.substr(tok_start, i - tok_start - 1));
164*6dbdd20aSAndroid Build Coastguard Worker       tok_start = i;
165*6dbdd20aSAndroid Build Coastguard Worker       tok_is_sep = false;
166*6dbdd20aSAndroid Build Coastguard Worker     }
167*6dbdd20aSAndroid Build Coastguard Worker   }
168*6dbdd20aSAndroid Build Coastguard Worker   fn(name.substr(tok_start));  // last token
169*6dbdd20aSAndroid Build Coastguard Worker }
170*6dbdd20aSAndroid Build Coastguard Worker 
171*6dbdd20aSAndroid Build Coastguard Worker }  // namespace
172*6dbdd20aSAndroid Build Coastguard Worker 
TokenTable()173*6dbdd20aSAndroid Build Coastguard Worker KernelSymbolMap::TokenTable::TokenTable() {
174*6dbdd20aSAndroid Build Coastguard Worker   // Insert a null token as id 0. We can't just add "" because the empty string
175*6dbdd20aSAndroid Build Coastguard Worker   // is special-cased and doesn't insert an actual token. So we push a string of
176*6dbdd20aSAndroid Build Coastguard Worker   // size one that contains only the null character instead.
177*6dbdd20aSAndroid Build Coastguard Worker   char null_tok = 0;
178*6dbdd20aSAndroid Build Coastguard Worker   Add(std::string(&null_tok, 1));
179*6dbdd20aSAndroid Build Coastguard Worker }
180*6dbdd20aSAndroid Build Coastguard Worker 
181*6dbdd20aSAndroid Build Coastguard Worker KernelSymbolMap::TokenTable::~TokenTable() = default;
182*6dbdd20aSAndroid Build Coastguard Worker 
183*6dbdd20aSAndroid Build Coastguard Worker // Adds a new token to the db. Does not dedupe identical token (with the
184*6dbdd20aSAndroid Build Coastguard Worker // exception of the empty string). The caller has to deal with that.
185*6dbdd20aSAndroid Build Coastguard Worker // Supports only ASCII characters in the range [1, 127].
186*6dbdd20aSAndroid Build Coastguard Worker // The last character of the token will have the MSB set.
Add(const std::string & token)187*6dbdd20aSAndroid Build Coastguard Worker TokenId KernelSymbolMap::TokenTable::Add(const std::string& token) {
188*6dbdd20aSAndroid Build Coastguard Worker   const size_t token_size = token.size();
189*6dbdd20aSAndroid Build Coastguard Worker   if (token_size == 0)
190*6dbdd20aSAndroid Build Coastguard Worker     return 0;
191*6dbdd20aSAndroid Build Coastguard Worker   TokenId id = num_tokens_++;
192*6dbdd20aSAndroid Build Coastguard Worker 
193*6dbdd20aSAndroid Build Coastguard Worker   const size_t buf_size_before_insertion = buf_.size();
194*6dbdd20aSAndroid Build Coastguard Worker   if (id % kTokenIndexSampling == 0)
195*6dbdd20aSAndroid Build Coastguard Worker     index_.emplace_back(buf_size_before_insertion);
196*6dbdd20aSAndroid Build Coastguard Worker 
197*6dbdd20aSAndroid Build Coastguard Worker   const size_t prev_size = buf_.size();
198*6dbdd20aSAndroid Build Coastguard Worker   buf_.resize(prev_size + token_size);
199*6dbdd20aSAndroid Build Coastguard Worker   char* tok_wptr = &buf_[prev_size];
200*6dbdd20aSAndroid Build Coastguard Worker   for (size_t i = 0; i < token_size - 1; i++) {
201*6dbdd20aSAndroid Build Coastguard Worker     PERFETTO_DCHECK((token.at(i) & 0x80) == 0);  // |token| must be ASCII only.
202*6dbdd20aSAndroid Build Coastguard Worker     *(tok_wptr++) = token.at(i) & 0x7f;
203*6dbdd20aSAndroid Build Coastguard Worker   }
204*6dbdd20aSAndroid Build Coastguard Worker   *(tok_wptr++) = static_cast<char>(token.at(token_size - 1) | 0x80);
205*6dbdd20aSAndroid Build Coastguard Worker   PERFETTO_DCHECK(tok_wptr == buf_.data() + buf_.size());
206*6dbdd20aSAndroid Build Coastguard Worker   return id;
207*6dbdd20aSAndroid Build Coastguard Worker }
208*6dbdd20aSAndroid Build Coastguard Worker 
209*6dbdd20aSAndroid Build Coastguard Worker // NOTE: the caller need to mask the returned chars with 0x7f. The last char of
210*6dbdd20aSAndroid Build Coastguard Worker // the StringView will have its MSB set (it's used as a EOF char internally).
Lookup(TokenId id)211*6dbdd20aSAndroid Build Coastguard Worker base::StringView KernelSymbolMap::TokenTable::Lookup(TokenId id) {
212*6dbdd20aSAndroid Build Coastguard Worker   if (id == 0)
213*6dbdd20aSAndroid Build Coastguard Worker     return base::StringView();
214*6dbdd20aSAndroid Build Coastguard Worker   if (id > num_tokens_)
215*6dbdd20aSAndroid Build Coastguard Worker     return base::StringView("<error>");
216*6dbdd20aSAndroid Build Coastguard Worker   // We don't know precisely where the id-th token starts in the buffer. We
217*6dbdd20aSAndroid Build Coastguard Worker   // store only one position every kTokenIndexSampling. From there, the token
218*6dbdd20aSAndroid Build Coastguard Worker   // can be found with a linear scan of at most kTokenIndexSampling steps.
219*6dbdd20aSAndroid Build Coastguard Worker   size_t index_off = id / kTokenIndexSampling;
220*6dbdd20aSAndroid Build Coastguard Worker   PERFETTO_DCHECK(index_off < index_.size());
221*6dbdd20aSAndroid Build Coastguard Worker   TokenId cur_id = static_cast<TokenId>(index_off * kTokenIndexSampling);
222*6dbdd20aSAndroid Build Coastguard Worker   uint32_t begin = index_[index_off];
223*6dbdd20aSAndroid Build Coastguard Worker   PERFETTO_DCHECK(begin == 0 || buf_[begin - 1] & 0x80);
224*6dbdd20aSAndroid Build Coastguard Worker   const size_t buf_size = buf_.size();
225*6dbdd20aSAndroid Build Coastguard Worker   for (uint32_t off = begin; off < buf_size; ++off) {
226*6dbdd20aSAndroid Build Coastguard Worker     // Advance |off| until the end of the token (which has the MSB set).
227*6dbdd20aSAndroid Build Coastguard Worker     if ((buf_[off] & 0x80) == 0)
228*6dbdd20aSAndroid Build Coastguard Worker       continue;
229*6dbdd20aSAndroid Build Coastguard Worker     if (cur_id == id)
230*6dbdd20aSAndroid Build Coastguard Worker       return base::StringView(&buf_[begin], off - begin + 1);
231*6dbdd20aSAndroid Build Coastguard Worker     ++cur_id;
232*6dbdd20aSAndroid Build Coastguard Worker     begin = off + 1;
233*6dbdd20aSAndroid Build Coastguard Worker   }
234*6dbdd20aSAndroid Build Coastguard Worker   return base::StringView();
235*6dbdd20aSAndroid Build Coastguard Worker }
236*6dbdd20aSAndroid Build Coastguard Worker 
Parse(const std::string & kallsyms_path)237*6dbdd20aSAndroid Build Coastguard Worker size_t KernelSymbolMap::Parse(const std::string& kallsyms_path) {
238*6dbdd20aSAndroid Build Coastguard Worker   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, KALLSYMS_PARSE);
239*6dbdd20aSAndroid Build Coastguard Worker   using SymAddr = uint64_t;
240*6dbdd20aSAndroid Build Coastguard Worker 
241*6dbdd20aSAndroid Build Coastguard Worker   struct TokenInfo {
242*6dbdd20aSAndroid Build Coastguard Worker     uint32_t count = 0;
243*6dbdd20aSAndroid Build Coastguard Worker     TokenId id = 0;
244*6dbdd20aSAndroid Build Coastguard Worker   };
245*6dbdd20aSAndroid Build Coastguard Worker 
246*6dbdd20aSAndroid Build Coastguard Worker   // Note if changing the container: the code below doesn't rely on stable
247*6dbdd20aSAndroid Build Coastguard Worker   // iterators, but relies on stable pointers.
248*6dbdd20aSAndroid Build Coastguard Worker   using TokenMap = std::unordered_map<std::string, TokenInfo>;
249*6dbdd20aSAndroid Build Coastguard Worker   using TokenMapPtr = TokenMap::value_type*;
250*6dbdd20aSAndroid Build Coastguard Worker   TokenMap tokens;
251*6dbdd20aSAndroid Build Coastguard Worker 
252*6dbdd20aSAndroid Build Coastguard Worker   // Keep the (ordered) list of tokens for each symbol.
253*6dbdd20aSAndroid Build Coastguard Worker   struct SymAddrAndTokenPtr {
254*6dbdd20aSAndroid Build Coastguard Worker     SymAddr addr;
255*6dbdd20aSAndroid Build Coastguard Worker     TokenMapPtr token_map_entry;
256*6dbdd20aSAndroid Build Coastguard Worker 
257*6dbdd20aSAndroid Build Coastguard Worker     bool operator<(const SymAddrAndTokenPtr& other) const {
258*6dbdd20aSAndroid Build Coastguard Worker       return addr < other.addr;
259*6dbdd20aSAndroid Build Coastguard Worker     }
260*6dbdd20aSAndroid Build Coastguard Worker   };
261*6dbdd20aSAndroid Build Coastguard Worker   std::vector<SymAddrAndTokenPtr> symbol_tokens;
262*6dbdd20aSAndroid Build Coastguard Worker 
263*6dbdd20aSAndroid Build Coastguard Worker   // Based on `cat /proc/kallsyms | egrep "\b[tT]\b" | wc -l`.
264*6dbdd20aSAndroid Build Coastguard Worker   symbol_tokens.reserve(128 * 1024);
265*6dbdd20aSAndroid Build Coastguard Worker 
266*6dbdd20aSAndroid Build Coastguard Worker   ForEachSym(kallsyms_path, [&](SymAddr addr, char type,
267*6dbdd20aSAndroid Build Coastguard Worker                                 base::StringView name) {
268*6dbdd20aSAndroid Build Coastguard Worker     // Special cases:
269*6dbdd20aSAndroid Build Coastguard Worker     //
270*6dbdd20aSAndroid Build Coastguard Worker     // Skip arm mapping symbols such as $x, $x.123, $d, $d.123. They exist to
271*6dbdd20aSAndroid Build Coastguard Worker     // delineate interleaved data and text for certain tools, and do not
272*6dbdd20aSAndroid Build Coastguard Worker     // identify real functions. Should be fine to ignore on non-arm platforms
273*6dbdd20aSAndroid Build Coastguard Worker     // since '$' isn't a valid C identifier and therefore unlikely to mark a
274*6dbdd20aSAndroid Build Coastguard Worker     // real function.
275*6dbdd20aSAndroid Build Coastguard Worker     //
276*6dbdd20aSAndroid Build Coastguard Worker     // Strip .cfi/.cfi_jt suffixes if the kernel is built with clang control
277*6dbdd20aSAndroid Build Coastguard Worker     // flow integrity checks (where for "my_func" there will be a
278*6dbdd20aSAndroid Build Coastguard Worker     // "my_func.cfi_jt"). These can account for a third of the total symbols
279*6dbdd20aSAndroid Build Coastguard Worker     // after the above filters, and tracing users want to see the unadorned
280*6dbdd20aSAndroid Build Coastguard Worker     // name anyway. Normally we'd record the full string here and remove the
281*6dbdd20aSAndroid Build Coastguard Worker     // suffix during trace ingestion, but it makes a nontrivial impact on the
282*6dbdd20aSAndroid Build Coastguard Worker     // size of the in-memory token table since we tokenize only on underscore
283*6dbdd20aSAndroid Build Coastguard Worker     // boundaries.
284*6dbdd20aSAndroid Build Coastguard Worker     if (addr == 0 || (type != 't' && type != 'T') || name.at(0) == '$') {
285*6dbdd20aSAndroid Build Coastguard Worker       return;
286*6dbdd20aSAndroid Build Coastguard Worker     }
287*6dbdd20aSAndroid Build Coastguard Worker     const base::StringView cfi = ".cfi";
288*6dbdd20aSAndroid Build Coastguard Worker     const base::StringView cfi_jt = ".cfi_jt";
289*6dbdd20aSAndroid Build Coastguard Worker     if (name.EndsWith(cfi)) {
290*6dbdd20aSAndroid Build Coastguard Worker       name = name.substr(0, name.size() - cfi.size());
291*6dbdd20aSAndroid Build Coastguard Worker     } else if (name.EndsWith(cfi_jt)) {
292*6dbdd20aSAndroid Build Coastguard Worker       name = name.substr(0, name.size() - cfi_jt.size());
293*6dbdd20aSAndroid Build Coastguard Worker     }
294*6dbdd20aSAndroid Build Coastguard Worker 
295*6dbdd20aSAndroid Build Coastguard Worker     // Split each symbol name in tokens, using '_' as a separator (so that
296*6dbdd20aSAndroid Build Coastguard Worker     // "foo_bar" -> ["foo", "bar"]). For each token hash:
297*6dbdd20aSAndroid Build Coastguard Worker     // 1. Keep track of the frequency of each token.
298*6dbdd20aSAndroid Build Coastguard Worker     // 2. Keep track of the list of token hashes for each symbol.
299*6dbdd20aSAndroid Build Coastguard Worker     Tokenize(name, [&tokens, &symbol_tokens, addr](base::StringView token) {
300*6dbdd20aSAndroid Build Coastguard Worker       auto it_and_ins = tokens.emplace(token.ToStdString(), TokenInfo{});
301*6dbdd20aSAndroid Build Coastguard Worker       it_and_ins.first->second.count++;
302*6dbdd20aSAndroid Build Coastguard Worker       symbol_tokens.emplace_back(SymAddrAndTokenPtr{addr, &*it_and_ins.first});
303*6dbdd20aSAndroid Build Coastguard Worker     });
304*6dbdd20aSAndroid Build Coastguard Worker   });
305*6dbdd20aSAndroid Build Coastguard Worker 
306*6dbdd20aSAndroid Build Coastguard Worker   symbol_tokens.shrink_to_fit();
307*6dbdd20aSAndroid Build Coastguard Worker 
308*6dbdd20aSAndroid Build Coastguard Worker   // For each symbol address, T entries are inserted into |symbol_tokens|, one
309*6dbdd20aSAndroid Build Coastguard Worker   // for each token. These symbols are added in arbitrary address (as seen in
310*6dbdd20aSAndroid Build Coastguard Worker   // /proc/kallsyms). Here we want to sort symbols by addresses, but at the same
311*6dbdd20aSAndroid Build Coastguard Worker   // time preserve the order of tokens within each address.
312*6dbdd20aSAndroid Build Coastguard Worker   // For instance, if kallsyms has: {0x41: connect_socket, 0x42: write_file}:
313*6dbdd20aSAndroid Build Coastguard Worker   // Before sort: [(0x42, write), (0x42, file), (0x41, connect), (0x41, socket)]
314*6dbdd20aSAndroid Build Coastguard Worker   // After sort: [(0x41, connect), (0x41, socket), (0x42, write), (0x42, file)]
315*6dbdd20aSAndroid Build Coastguard Worker   std::stable_sort(symbol_tokens.begin(), symbol_tokens.end());
316*6dbdd20aSAndroid Build Coastguard Worker 
317*6dbdd20aSAndroid Build Coastguard Worker   // At this point we have broken down each symbol into a set of token hashes.
318*6dbdd20aSAndroid Build Coastguard Worker   // Now generate the token ids, putting high freq tokens first, so they use
319*6dbdd20aSAndroid Build Coastguard Worker   // only one byte to varint encode.
320*6dbdd20aSAndroid Build Coastguard Worker 
321*6dbdd20aSAndroid Build Coastguard Worker   // This block limits the lifetime of |tokens_by_freq|.
322*6dbdd20aSAndroid Build Coastguard Worker   {
323*6dbdd20aSAndroid Build Coastguard Worker     std::vector<TokenMapPtr> tokens_by_freq;
324*6dbdd20aSAndroid Build Coastguard Worker     tokens_by_freq.resize(tokens.size());
325*6dbdd20aSAndroid Build Coastguard Worker     size_t tok_idx = 0;
326*6dbdd20aSAndroid Build Coastguard Worker     for (auto& kv : tokens)
327*6dbdd20aSAndroid Build Coastguard Worker       tokens_by_freq[tok_idx++] = &kv;
328*6dbdd20aSAndroid Build Coastguard Worker 
329*6dbdd20aSAndroid Build Coastguard Worker     auto comparer = [](TokenMapPtr a, TokenMapPtr b) {
330*6dbdd20aSAndroid Build Coastguard Worker       PERFETTO_DCHECK(a && b);
331*6dbdd20aSAndroid Build Coastguard Worker       return b->second.count < a->second.count;
332*6dbdd20aSAndroid Build Coastguard Worker     };
333*6dbdd20aSAndroid Build Coastguard Worker     std::sort(tokens_by_freq.begin(), tokens_by_freq.end(), comparer);
334*6dbdd20aSAndroid Build Coastguard Worker     for (TokenMapPtr tinfo : tokens_by_freq) {
335*6dbdd20aSAndroid Build Coastguard Worker       tinfo->second.id = tokens_.Add(tinfo->first);
336*6dbdd20aSAndroid Build Coastguard Worker     }
337*6dbdd20aSAndroid Build Coastguard Worker   }
338*6dbdd20aSAndroid Build Coastguard Worker   tokens_.shrink_to_fit();
339*6dbdd20aSAndroid Build Coastguard Worker 
340*6dbdd20aSAndroid Build Coastguard Worker   buf_.resize(2 * 1024 * 1024);  // Based on real-word observations.
341*6dbdd20aSAndroid Build Coastguard Worker   base_addr_ = symbol_tokens.empty() ? 0 : symbol_tokens.begin()->addr;
342*6dbdd20aSAndroid Build Coastguard Worker   SymAddr prev_sym_addr = base_addr_;
343*6dbdd20aSAndroid Build Coastguard Worker   uint8_t* wptr = buf_.data();
344*6dbdd20aSAndroid Build Coastguard Worker 
345*6dbdd20aSAndroid Build Coastguard Worker   for (auto it = symbol_tokens.begin(); it != symbol_tokens.end();) {
346*6dbdd20aSAndroid Build Coastguard Worker     const SymAddr sym_addr = it->addr;
347*6dbdd20aSAndroid Build Coastguard Worker 
348*6dbdd20aSAndroid Build Coastguard Worker     // Find the iterator to the first token of the next symbol (or the end).
349*6dbdd20aSAndroid Build Coastguard Worker     auto sym_start = it;
350*6dbdd20aSAndroid Build Coastguard Worker     auto sym_end = it;
351*6dbdd20aSAndroid Build Coastguard Worker     while (sym_end != symbol_tokens.end() && sym_end->addr == sym_addr)
352*6dbdd20aSAndroid Build Coastguard Worker       ++sym_end;
353*6dbdd20aSAndroid Build Coastguard Worker 
354*6dbdd20aSAndroid Build Coastguard Worker     // The range [sym_start, sym_end) has all the tokens for the current symbol.
355*6dbdd20aSAndroid Build Coastguard Worker     uint32_t size_before = static_cast<uint32_t>(wptr - buf_.data());
356*6dbdd20aSAndroid Build Coastguard Worker 
357*6dbdd20aSAndroid Build Coastguard Worker     // Make sure there is enough headroom to write the symbol.
358*6dbdd20aSAndroid Build Coastguard Worker     if (buf_.size() - size_before < 1024) {
359*6dbdd20aSAndroid Build Coastguard Worker       buf_.resize(buf_.size() + 32768);
360*6dbdd20aSAndroid Build Coastguard Worker       wptr = buf_.data() + size_before;
361*6dbdd20aSAndroid Build Coastguard Worker     }
362*6dbdd20aSAndroid Build Coastguard Worker 
363*6dbdd20aSAndroid Build Coastguard Worker     uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_);
364*6dbdd20aSAndroid Build Coastguard Worker     const size_t sym_num = num_syms_++;
365*6dbdd20aSAndroid Build Coastguard Worker     if (sym_num % kSymIndexSampling == 0)
366*6dbdd20aSAndroid Build Coastguard Worker       index_.emplace_back(std::make_pair(sym_rel_addr, size_before));
367*6dbdd20aSAndroid Build Coastguard Worker     PERFETTO_DCHECK(sym_addr >= prev_sym_addr);
368*6dbdd20aSAndroid Build Coastguard Worker     uint32_t delta = static_cast<uint32_t>(sym_addr - prev_sym_addr);
369*6dbdd20aSAndroid Build Coastguard Worker     wptr = protozero::proto_utils::WriteVarInt(delta, wptr);
370*6dbdd20aSAndroid Build Coastguard Worker     // Append all the token ids.
371*6dbdd20aSAndroid Build Coastguard Worker     for (it = sym_start; it != sym_end;) {
372*6dbdd20aSAndroid Build Coastguard Worker       PERFETTO_DCHECK(it->addr == sym_addr);
373*6dbdd20aSAndroid Build Coastguard Worker       TokenMapPtr const token_map_entry = it->token_map_entry;
374*6dbdd20aSAndroid Build Coastguard Worker       const TokenInfo& token_info = token_map_entry->second;
375*6dbdd20aSAndroid Build Coastguard Worker       TokenId token_id = token_info.id << 1;
376*6dbdd20aSAndroid Build Coastguard Worker       ++it;
377*6dbdd20aSAndroid Build Coastguard Worker       token_id |= (it == sym_end) ? 1 : 0;  // Last one has LSB set to 1.
378*6dbdd20aSAndroid Build Coastguard Worker       wptr = protozero::proto_utils::WriteVarInt(token_id, wptr);
379*6dbdd20aSAndroid Build Coastguard Worker     }
380*6dbdd20aSAndroid Build Coastguard Worker     prev_sym_addr = sym_addr;
381*6dbdd20aSAndroid Build Coastguard Worker   }  // for (symbols)
382*6dbdd20aSAndroid Build Coastguard Worker 
383*6dbdd20aSAndroid Build Coastguard Worker   buf_.resize(static_cast<size_t>(wptr - buf_.data()));
384*6dbdd20aSAndroid Build Coastguard Worker   buf_.shrink_to_fit();
385*6dbdd20aSAndroid Build Coastguard Worker   base::MaybeReleaseAllocatorMemToOS();  // For Scudo, b/170217718.
386*6dbdd20aSAndroid Build Coastguard Worker 
387*6dbdd20aSAndroid Build Coastguard Worker   if (num_syms_ == 0) {
388*6dbdd20aSAndroid Build Coastguard Worker     PERFETTO_ELOG(
389*6dbdd20aSAndroid Build Coastguard Worker         "Failed to parse kallsyms. Kernel functions will not be symbolized. On "
390*6dbdd20aSAndroid Build Coastguard Worker         "Linux this requires either running traced_probes as root or manually "
391*6dbdd20aSAndroid Build Coastguard Worker         "lowering /proc/sys/kernel/kptr_restrict");
392*6dbdd20aSAndroid Build Coastguard Worker   } else {
393*6dbdd20aSAndroid Build Coastguard Worker     PERFETTO_DLOG(
394*6dbdd20aSAndroid Build Coastguard Worker         "Loaded %zu kalllsyms entries. Mem usage: %zu B (addresses) + %zu B "
395*6dbdd20aSAndroid Build Coastguard Worker         "(tokens), total: %zu B",
396*6dbdd20aSAndroid Build Coastguard Worker         num_syms_, addr_bytes(), tokens_.size_bytes(), size_bytes());
397*6dbdd20aSAndroid Build Coastguard Worker   }
398*6dbdd20aSAndroid Build Coastguard Worker 
399*6dbdd20aSAndroid Build Coastguard Worker   return num_syms_;
400*6dbdd20aSAndroid Build Coastguard Worker }
401*6dbdd20aSAndroid Build Coastguard Worker 
Lookup(uint64_t sym_addr)402*6dbdd20aSAndroid Build Coastguard Worker std::string KernelSymbolMap::Lookup(uint64_t sym_addr) {
403*6dbdd20aSAndroid Build Coastguard Worker   if (index_.empty() || sym_addr < base_addr_)
404*6dbdd20aSAndroid Build Coastguard Worker     return "";
405*6dbdd20aSAndroid Build Coastguard Worker 
406*6dbdd20aSAndroid Build Coastguard Worker   // First find the highest symbol address <= sym_addr.
407*6dbdd20aSAndroid Build Coastguard Worker   // Start with a binary search using the sparse index.
408*6dbdd20aSAndroid Build Coastguard Worker 
409*6dbdd20aSAndroid Build Coastguard Worker   const uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_);
410*6dbdd20aSAndroid Build Coastguard Worker   auto it = std::upper_bound(index_.cbegin(), index_.cend(),
411*6dbdd20aSAndroid Build Coastguard Worker                              std::make_pair(sym_rel_addr, 0u));
412*6dbdd20aSAndroid Build Coastguard Worker   if (it != index_.cbegin())
413*6dbdd20aSAndroid Build Coastguard Worker     --it;
414*6dbdd20aSAndroid Build Coastguard Worker 
415*6dbdd20aSAndroid Build Coastguard Worker   // Then continue with a linear scan (of at most kSymIndexSampling steps).
416*6dbdd20aSAndroid Build Coastguard Worker   uint32_t addr = it->first;
417*6dbdd20aSAndroid Build Coastguard Worker   uint32_t off = it->second;
418*6dbdd20aSAndroid Build Coastguard Worker   const uint8_t* rdptr = &buf_[off];
419*6dbdd20aSAndroid Build Coastguard Worker   const uint8_t* const buf_end = buf_.data() + buf_.size();
420*6dbdd20aSAndroid Build Coastguard Worker   bool parsing_addr = true;
421*6dbdd20aSAndroid Build Coastguard Worker   const uint8_t* next_rdptr = nullptr;
422*6dbdd20aSAndroid Build Coastguard Worker   uint64_t sym_start_addr = 0;
423*6dbdd20aSAndroid Build Coastguard Worker   for (bool is_first_addr = true;; is_first_addr = false) {
424*6dbdd20aSAndroid Build Coastguard Worker     uint64_t v = 0;
425*6dbdd20aSAndroid Build Coastguard Worker     const auto* prev_rdptr = rdptr;
426*6dbdd20aSAndroid Build Coastguard Worker     rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v);
427*6dbdd20aSAndroid Build Coastguard Worker     if (rdptr == prev_rdptr)
428*6dbdd20aSAndroid Build Coastguard Worker       break;
429*6dbdd20aSAndroid Build Coastguard Worker     if (parsing_addr) {
430*6dbdd20aSAndroid Build Coastguard Worker       addr += is_first_addr ? 0 : static_cast<uint32_t>(v);
431*6dbdd20aSAndroid Build Coastguard Worker       parsing_addr = false;
432*6dbdd20aSAndroid Build Coastguard Worker       if (addr > sym_rel_addr)
433*6dbdd20aSAndroid Build Coastguard Worker         break;
434*6dbdd20aSAndroid Build Coastguard Worker       next_rdptr = rdptr;
435*6dbdd20aSAndroid Build Coastguard Worker       sym_start_addr = addr;
436*6dbdd20aSAndroid Build Coastguard Worker     } else {
437*6dbdd20aSAndroid Build Coastguard Worker       // This is a token. Wait for the EOF maker.
438*6dbdd20aSAndroid Build Coastguard Worker       parsing_addr = (v & 1) == 1;
439*6dbdd20aSAndroid Build Coastguard Worker     }
440*6dbdd20aSAndroid Build Coastguard Worker   }
441*6dbdd20aSAndroid Build Coastguard Worker 
442*6dbdd20aSAndroid Build Coastguard Worker   if (!next_rdptr)
443*6dbdd20aSAndroid Build Coastguard Worker     return "";
444*6dbdd20aSAndroid Build Coastguard Worker 
445*6dbdd20aSAndroid Build Coastguard Worker   PERFETTO_DCHECK(sym_rel_addr >= sym_start_addr);
446*6dbdd20aSAndroid Build Coastguard Worker 
447*6dbdd20aSAndroid Build Coastguard Worker   // If this address is too far from the start of the symbol, this is likely
448*6dbdd20aSAndroid Build Coastguard Worker   // a pointer to something else (e.g. some vmalloc struct) and we just picked
449*6dbdd20aSAndroid Build Coastguard Worker   // the very last symbol for a loader region.
450*6dbdd20aSAndroid Build Coastguard Worker   if (sym_rel_addr - sym_start_addr > kSymMaxSizeBytes)
451*6dbdd20aSAndroid Build Coastguard Worker     return "";
452*6dbdd20aSAndroid Build Coastguard Worker 
453*6dbdd20aSAndroid Build Coastguard Worker   // The address has been found. Now rejoin the tokens to form the symbol name.
454*6dbdd20aSAndroid Build Coastguard Worker 
455*6dbdd20aSAndroid Build Coastguard Worker   rdptr = next_rdptr;
456*6dbdd20aSAndroid Build Coastguard Worker   std::string sym_name;
457*6dbdd20aSAndroid Build Coastguard Worker   sym_name.reserve(kSymNameMaxLen);
458*6dbdd20aSAndroid Build Coastguard Worker   for (bool eof = false, is_first_token = true; !eof; is_first_token = false) {
459*6dbdd20aSAndroid Build Coastguard Worker     uint64_t v = 0;
460*6dbdd20aSAndroid Build Coastguard Worker     const auto* old = rdptr;
461*6dbdd20aSAndroid Build Coastguard Worker     rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v);
462*6dbdd20aSAndroid Build Coastguard Worker     if (rdptr == old)
463*6dbdd20aSAndroid Build Coastguard Worker       break;
464*6dbdd20aSAndroid Build Coastguard Worker     eof = v & 1;
465*6dbdd20aSAndroid Build Coastguard Worker     base::StringView token = tokens_.Lookup(static_cast<TokenId>(v >> 1));
466*6dbdd20aSAndroid Build Coastguard Worker     if (!is_first_token)
467*6dbdd20aSAndroid Build Coastguard Worker       sym_name.push_back('_');
468*6dbdd20aSAndroid Build Coastguard Worker     for (size_t i = 0; i < token.size(); i++)
469*6dbdd20aSAndroid Build Coastguard Worker       sym_name.push_back(token.at(i) & 0x7f);
470*6dbdd20aSAndroid Build Coastguard Worker   }
471*6dbdd20aSAndroid Build Coastguard Worker   return sym_name;
472*6dbdd20aSAndroid Build Coastguard Worker }
473*6dbdd20aSAndroid Build Coastguard Worker 
474*6dbdd20aSAndroid Build Coastguard Worker }  // namespace perfetto
475