1*6dbdd20aSAndroid Build Coastguard Worker /* 2*6dbdd20aSAndroid Build Coastguard Worker * Copyright (C) 2019 The Android Open Source Project 3*6dbdd20aSAndroid Build Coastguard Worker * 4*6dbdd20aSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*6dbdd20aSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*6dbdd20aSAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*6dbdd20aSAndroid Build Coastguard Worker * 8*6dbdd20aSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*6dbdd20aSAndroid Build Coastguard Worker * 10*6dbdd20aSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*6dbdd20aSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*6dbdd20aSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*6dbdd20aSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*6dbdd20aSAndroid Build Coastguard Worker * limitations under the License. 15*6dbdd20aSAndroid Build Coastguard Worker */ 16*6dbdd20aSAndroid Build Coastguard Worker 17*6dbdd20aSAndroid Build Coastguard Worker #ifndef SRC_KALLSYMS_KERNEL_SYMBOL_MAP_H_ 18*6dbdd20aSAndroid Build Coastguard Worker #define SRC_KALLSYMS_KERNEL_SYMBOL_MAP_H_ 19*6dbdd20aSAndroid Build Coastguard Worker 20*6dbdd20aSAndroid Build Coastguard Worker #include <stdint.h> 21*6dbdd20aSAndroid Build Coastguard Worker 22*6dbdd20aSAndroid Build Coastguard Worker #include <array> 23*6dbdd20aSAndroid Build Coastguard Worker #include <forward_list> 24*6dbdd20aSAndroid Build Coastguard Worker #include <string> 25*6dbdd20aSAndroid Build Coastguard Worker #include <vector> 26*6dbdd20aSAndroid Build Coastguard Worker 27*6dbdd20aSAndroid Build Coastguard Worker namespace perfetto { 28*6dbdd20aSAndroid Build Coastguard Worker 29*6dbdd20aSAndroid Build Coastguard Worker namespace base { 30*6dbdd20aSAndroid Build Coastguard Worker class StringView; 31*6dbdd20aSAndroid Build Coastguard Worker } 32*6dbdd20aSAndroid Build Coastguard Worker 33*6dbdd20aSAndroid Build Coastguard Worker // A parser and memory-efficient container for /proc/kallsyms. 34*6dbdd20aSAndroid Build Coastguard Worker // It can store a full kernel symbol table in ~1.2MB of memory and perform fast 35*6dbdd20aSAndroid Build Coastguard Worker // lookups using logarithmic binary searches + bounded linear scans. 36*6dbdd20aSAndroid Build Coastguard Worker // 37*6dbdd20aSAndroid Build Coastguard Worker // /proc/kallsyms is a ~10 MB text file that contains the map of kernel symbols, 38*6dbdd20aSAndroid Build Coastguard Worker // as follows: 39*6dbdd20aSAndroid Build Coastguard Worker // ffffff8f77682f8c t el0_sync_invalid 40*6dbdd20aSAndroid Build Coastguard Worker // ffffff8f77683060 t el0_irq_invalid 41*6dbdd20aSAndroid Build Coastguard Worker // ... 42*6dbdd20aSAndroid Build Coastguard Worker // In a typipcal Android kernel, it consists of 213K lines. Out of these, only 43*6dbdd20aSAndroid Build Coastguard Worker // 116K are interesting for the sake of symbolizing kernel functions, the rest 44*6dbdd20aSAndroid Build Coastguard Worker // are .rodata (variables), weak or other useless symbols. 45*6dbdd20aSAndroid Build Coastguard Worker // Still, even keeping around 116K pointers would require 116K * 8 ~= 1 MB of 46*6dbdd20aSAndroid Build Coastguard Worker // memory, without accounting for any strings for the symbols names. 47*6dbdd20aSAndroid Build Coastguard Worker // The SUM(str.len) for the 116K symbol names adds up to 2.7 MB (without 48*6dbdd20aSAndroid Build Coastguard Worker // counting their addresses). 49*6dbdd20aSAndroid Build Coastguard Worker // However consider the following: 50*6dbdd20aSAndroid Build Coastguard Worker // - Symbol addresses are mostly contiguous. Modulo the initial KASLR loading 51*6dbdd20aSAndroid Build Coastguard Worker // address, most symbols are few hundreds bytes apart from each other. 52*6dbdd20aSAndroid Build Coastguard Worker // - Symbol names are made of tokens that are quite frequent (token: the result 53*6dbdd20aSAndroid Build Coastguard Worker // of name.split('_')). If we tokenize the 2.7 MB of strings, the resulting 54*6dbdd20aSAndroid Build Coastguard Worker // SUM(distinct_token.len) goes down 2.7MB -> 146 KB. This is because tokens 55*6dbdd20aSAndroid Build Coastguard Worker // like "get", "set" or "event" show up thousands of times. 56*6dbdd20aSAndroid Build Coastguard Worker // - Symbol names are ASCII strings using only 7 out of 8 bits. 57*6dbdd20aSAndroid Build Coastguard Worker // 58*6dbdd20aSAndroid Build Coastguard Worker // In the light of this, the in-memory architecture of this data structure is 59*6dbdd20aSAndroid Build Coastguard Worker // as follows: 60*6dbdd20aSAndroid Build Coastguard Worker // We keep two tables around: (1) a token table and (2) a symbol table. Both 61*6dbdd20aSAndroid Build Coastguard Worker // table are a flat byte vector with some sparse lookaside index to make lookups 62*6dbdd20aSAndroid Build Coastguard Worker // faster and avoid full linear scans. 63*6dbdd20aSAndroid Build Coastguard Worker // 64*6dbdd20aSAndroid Build Coastguard Worker // Token table 65*6dbdd20aSAndroid Build Coastguard Worker // ----------- 66*6dbdd20aSAndroid Build Coastguard Worker // The token table is a flat char buffer. Tokens are variable size (>0). Each 67*6dbdd20aSAndroid Build Coastguard Worker // token is identified by its ordinality, so token id 3 is the 3rd token in 68*6dbdd20aSAndroid Build Coastguard Worker // the table. All tokens are concatenated together. 69*6dbdd20aSAndroid Build Coastguard Worker // Given the ASCII encoding, the MSB is used as a terminator. So instead of 70*6dbdd20aSAndroid Build Coastguard Worker // wasting an extra NUL byte for each string, the last char of each token has 71*6dbdd20aSAndroid Build Coastguard Worker // the MSB set. 72*6dbdd20aSAndroid Build Coastguard Worker // Furthermore, a lookaside index stores the offset of tokens (i.e. Token N 73*6dbdd20aSAndroid Build Coastguard Worker // starts at offset O in the buffer) to allow fast lookups. In order to avoid 74*6dbdd20aSAndroid Build Coastguard Worker // wasting too much memory, the index is sparse and track the offsets of only 75*6dbdd20aSAndroid Build Coastguard Worker // one every kTokenIndexSamplinig tokens. 76*6dbdd20aSAndroid Build Coastguard Worker // When looking up a token ID N, the table seeks at the offset of the closest 77*6dbdd20aSAndroid Build Coastguard Worker // token <= N, and then scans linearly the next (at most kTokenIndexSamplinig) 78*6dbdd20aSAndroid Build Coastguard Worker // tokens, counting the MSBs found, until the right token id is found. 79*6dbdd20aSAndroid Build Coastguard Worker // buf: set*get*kernel*load*fpsimd*return*wrapper*el0*skip*sync*neon*bit*aes 80*6dbdd20aSAndroid Build Coastguard Worker // ^ ^ ^ 81*6dbdd20aSAndroid Build Coastguard Worker // | | | 82*6dbdd20aSAndroid Build Coastguard Worker // index: 0@0 4@15 8@21 83*6dbdd20aSAndroid Build Coastguard Worker 84*6dbdd20aSAndroid Build Coastguard Worker // Symbol table 85*6dbdd20aSAndroid Build Coastguard Worker // ------------ 86*6dbdd20aSAndroid Build Coastguard Worker // The symbol table is a flat char buffer that stores for each symbol: its 87*6dbdd20aSAndroid Build Coastguard Worker // address + the list of token indexes in the token table. The main caveats are 88*6dbdd20aSAndroid Build Coastguard Worker // that: 89*6dbdd20aSAndroid Build Coastguard Worker // - Symbol addresses are delta encoded (delta from prev symbol's addr). 90*6dbdd20aSAndroid Build Coastguard Worker // - Both delta addresses and token indexes are var-int encoded. 91*6dbdd20aSAndroid Build Coastguard Worker // - The LSB of token indexes is used as EOF marker (i.e. the next varint is 92*6dbdd20aSAndroid Build Coastguard Worker // the delta-addr for the next symbol). This time the LSB is used because of 93*6dbdd20aSAndroid Build Coastguard Worker // the varint encoding. 94*6dbdd20aSAndroid Build Coastguard Worker // At parsing time symbols are ordered by address and tokens are sorted by 95*6dbdd20aSAndroid Build Coastguard Worker // frequency, so that the top used 64 tokens can be represented with 1 byte. 96*6dbdd20aSAndroid Build Coastguard Worker // (Rationale for 64: 1 byte = 8 bits. The MSB bit of each byte is used for the 97*6dbdd20aSAndroid Build Coastguard Worker // varint encoding, the LSB bit of each number is used as end-of-tokens marker. 98*6dbdd20aSAndroid Build Coastguard Worker // There are 6 bits left -> 64 indexes can be represented using one byte). 99*6dbdd20aSAndroid Build Coastguard Worker // In summary the symbol table looks as follows: 100*6dbdd20aSAndroid Build Coastguard Worker // 101*6dbdd20aSAndroid Build Coastguard Worker // Base address: 0xbeef0000 102*6dbdd20aSAndroid Build Coastguard Worker // Symbol buffer: 103*6dbdd20aSAndroid Build Coastguard Worker // 0 1|0 4|0 6|1 // 0xbeef0000: 1,4,6 -> get_fpsimd_wrapper 104*6dbdd20aSAndroid Build Coastguard Worker // 8 7|0 3|1 // 0xbeef0008: 7,3 -> el0_load 105*6dbdd20aSAndroid Build Coastguard Worker // ... 106*6dbdd20aSAndroid Build Coastguard Worker // Like in the case of the token table, a lookaside index keeps track of the 107*6dbdd20aSAndroid Build Coastguard Worker // offset of one every kSymIndexSamplinig addresses. 108*6dbdd20aSAndroid Build Coastguard Worker // The Lookup(ADDR) function operates as follows: 109*6dbdd20aSAndroid Build Coastguard Worker // 1. Performs a logarithmic binary search in the symbols index, finding the 110*6dbdd20aSAndroid Build Coastguard Worker // offset of the closest addres <= ADDR. 111*6dbdd20aSAndroid Build Coastguard Worker // 2. Skip over at most kSymIndexSamplinig until the symbol is found. 112*6dbdd20aSAndroid Build Coastguard Worker // 3. For each token index, lookup the corresponding token string and 113*6dbdd20aSAndroid Build Coastguard Worker // concatenate them to build the symbol name. 114*6dbdd20aSAndroid Build Coastguard Worker 115*6dbdd20aSAndroid Build Coastguard Worker class KernelSymbolMap { 116*6dbdd20aSAndroid Build Coastguard Worker public: 117*6dbdd20aSAndroid Build Coastguard Worker // The two constants below are changeable only for the benchmark use. 118*6dbdd20aSAndroid Build Coastguard Worker // Trades off size of the root |index_| vs worst-case linear scans size. 119*6dbdd20aSAndroid Build Coastguard Worker // A higher number makes the index more sparse. 120*6dbdd20aSAndroid Build Coastguard Worker static size_t kSymIndexSampling; 121*6dbdd20aSAndroid Build Coastguard Worker 122*6dbdd20aSAndroid Build Coastguard Worker // Trades off size of the TokenTable |index_| vs worst-case linear scans size. 123*6dbdd20aSAndroid Build Coastguard Worker static size_t kTokenIndexSampling; 124*6dbdd20aSAndroid Build Coastguard Worker 125*6dbdd20aSAndroid Build Coastguard Worker // Parses a kallsyms file. Returns the number of valid symbols decoded. 126*6dbdd20aSAndroid Build Coastguard Worker size_t Parse(const std::string& kallsyms_path); 127*6dbdd20aSAndroid Build Coastguard Worker 128*6dbdd20aSAndroid Build Coastguard Worker // Looks up the closest symbol (i.e. the one with the highest address <= 129*6dbdd20aSAndroid Build Coastguard Worker // |addr|) from its absolute 64-bit address. 130*6dbdd20aSAndroid Build Coastguard Worker // Returns an empty string if the symbol is not found (which can happen only 131*6dbdd20aSAndroid Build Coastguard Worker // if the passed |addr| is < min(addr)). 132*6dbdd20aSAndroid Build Coastguard Worker std::string Lookup(uint64_t addr); 133*6dbdd20aSAndroid Build Coastguard Worker 134*6dbdd20aSAndroid Build Coastguard Worker // Returns the numberr of valid symbols decoded. num_syms()135*6dbdd20aSAndroid Build Coastguard Worker size_t num_syms() const { return num_syms_; } 136*6dbdd20aSAndroid Build Coastguard Worker 137*6dbdd20aSAndroid Build Coastguard Worker // Returns the size in bytes used by the adddress table (without counting 138*6dbdd20aSAndroid Build Coastguard Worker // the tokens). addr_bytes()139*6dbdd20aSAndroid Build Coastguard Worker size_t addr_bytes() const { return buf_.size() + index_.size() * 8; } 140*6dbdd20aSAndroid Build Coastguard Worker 141*6dbdd20aSAndroid Build Coastguard Worker // Returns the total memory usage in bytes. size_bytes()142*6dbdd20aSAndroid Build Coastguard Worker size_t size_bytes() const { return addr_bytes() + tokens_.size_bytes(); } 143*6dbdd20aSAndroid Build Coastguard Worker 144*6dbdd20aSAndroid Build Coastguard Worker // Token table. 145*6dbdd20aSAndroid Build Coastguard Worker class TokenTable { 146*6dbdd20aSAndroid Build Coastguard Worker public: 147*6dbdd20aSAndroid Build Coastguard Worker using TokenId = uint32_t; 148*6dbdd20aSAndroid Build Coastguard Worker TokenTable(); 149*6dbdd20aSAndroid Build Coastguard Worker ~TokenTable(); 150*6dbdd20aSAndroid Build Coastguard Worker TokenId Add(const std::string&); 151*6dbdd20aSAndroid Build Coastguard Worker base::StringView Lookup(TokenId); size_bytes()152*6dbdd20aSAndroid Build Coastguard Worker size_t size_bytes() const { return buf_.size() + index_.size() * 4; } 153*6dbdd20aSAndroid Build Coastguard Worker shrink_to_fit()154*6dbdd20aSAndroid Build Coastguard Worker void shrink_to_fit() { 155*6dbdd20aSAndroid Build Coastguard Worker buf_.shrink_to_fit(); 156*6dbdd20aSAndroid Build Coastguard Worker index_.shrink_to_fit(); 157*6dbdd20aSAndroid Build Coastguard Worker } 158*6dbdd20aSAndroid Build Coastguard Worker 159*6dbdd20aSAndroid Build Coastguard Worker private: 160*6dbdd20aSAndroid Build Coastguard Worker TokenId num_tokens_ = 0; 161*6dbdd20aSAndroid Build Coastguard Worker 162*6dbdd20aSAndroid Build Coastguard Worker std::vector<char> buf_; // Token buffer. 163*6dbdd20aSAndroid Build Coastguard Worker 164*6dbdd20aSAndroid Build Coastguard Worker // The value i-th in the vector contains the offset (within |buf_|) of the 165*6dbdd20aSAndroid Build Coastguard Worker // (i * kTokenIndexSamplinig)-th token. 166*6dbdd20aSAndroid Build Coastguard Worker std::vector<uint32_t> index_; 167*6dbdd20aSAndroid Build Coastguard Worker }; 168*6dbdd20aSAndroid Build Coastguard Worker 169*6dbdd20aSAndroid Build Coastguard Worker private: 170*6dbdd20aSAndroid Build Coastguard Worker TokenTable tokens_; // Token table. 171*6dbdd20aSAndroid Build Coastguard Worker 172*6dbdd20aSAndroid Build Coastguard Worker uint64_t base_addr_ = 0; // Address of the first symbol (after sorting). 173*6dbdd20aSAndroid Build Coastguard Worker size_t num_syms_ = 0; // Number of valid symbols stored. 174*6dbdd20aSAndroid Build Coastguard Worker std::vector<uint8_t> buf_; // Symbol buffer. 175*6dbdd20aSAndroid Build Coastguard Worker 176*6dbdd20aSAndroid Build Coastguard Worker // The key is (address - base_addr_), the value is the byte offset in |buf_| 177*6dbdd20aSAndroid Build Coastguard Worker // where the symbol entry starts (i.e. the start of the varint that tells the 178*6dbdd20aSAndroid Build Coastguard Worker // delta from the previous symbol). 179*6dbdd20aSAndroid Build Coastguard Worker std::vector<std::pair<uint32_t /*rel_addr*/, uint32_t /*offset*/>> index_; 180*6dbdd20aSAndroid Build Coastguard Worker }; 181*6dbdd20aSAndroid Build Coastguard Worker 182*6dbdd20aSAndroid Build Coastguard Worker } // namespace perfetto 183*6dbdd20aSAndroid Build Coastguard Worker 184*6dbdd20aSAndroid Build Coastguard Worker #endif // SRC_KALLSYMS_KERNEL_SYMBOL_MAP_H_ 185