xref: /aosp_15_r20/external/perfetto/src/profiling/perf/regs_parsing.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/perf/regs_parsing.h"
18 
19 #include <linux/perf_event.h>
20 #include <stdint.h>
21 #include <unistd.h>
22 
23 #include <cinttypes>
24 #include <memory>
25 
26 #include <unwindstack/Elf.h>
27 #include <unwindstack/MachineArm.h>
28 #include <unwindstack/MachineArm64.h>
29 #include <unwindstack/MachineRiscv64.h>
30 #include <unwindstack/Regs.h>
31 #include <unwindstack/RegsArm.h>
32 #include <unwindstack/RegsArm64.h>
33 #include <unwindstack/RegsRiscv64.h>
34 #include <unwindstack/RegsX86.h>
35 #include <unwindstack/RegsX86_64.h>
36 #include <unwindstack/UserArm.h>
37 #include <unwindstack/UserArm64.h>
38 #include <unwindstack/UserRiscv64.h>
39 #include <unwindstack/UserX86.h>
40 #include <unwindstack/UserX86_64.h>
41 
42 // kernel uapi headers
43 #include <uapi/asm-arm/asm/perf_regs.h>
44 #undef PERF_REG_EXTENDED_MASK
45 #include <uapi/asm-x86/asm/perf_regs.h>
46 #undef PERF_REG_EXTENDED_MASK
47 #define perf_event_arm_regs perf_event_arm64_regs
48 #include <uapi/asm-arm64/asm/perf_regs.h>
49 #undef PERF_REG_EXTENDED_MASK
50 #undef perf_event_arm_regs
51 #include <uapi/asm-riscv/asm/perf_regs.h>
52 #undef PERF_REG_EXTENDED_MASK
53 
54 namespace perfetto {
55 namespace profiling {
56 
57 namespace {
58 
constexpr_max(size_t x,size_t y)59 constexpr size_t constexpr_max(size_t x, size_t y) {
60   return x > y ? x : y;
61 }
62 
63 template <typename T>
ReadValue(T * value_out,const char * ptr)64 const char* ReadValue(T* value_out, const char* ptr) {
65   memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
66   return ptr + sizeof(T);
67 }
68 
69 // Supported configurations:
70 // * 32 bit daemon, 32 bit userspace
71 // * 64 bit daemon, mixed bitness userspace
72 // Therefore give the kernel the mask corresponding to our build architecture.
73 // Register parsing handles the mixed userspace ABI cases.
74 // For simplicity, we ask for as many registers as we can, even if not all of
75 // them will be used during unwinding.
76 // TODO(rsavitski): cleanly detect 32 bit traced_perf builds being side-loaded
77 // onto a system with 64 bit userspace processes.
PerfUserRegsMask(unwindstack::ArchEnum arch)78 uint64_t PerfUserRegsMask(unwindstack::ArchEnum arch) {
79   switch (static_cast<uint8_t>(arch)) {  // cast to please -Wswitch-enum
80     case unwindstack::ARCH_ARM64:
81       return (1ULL << PERF_REG_ARM64_MAX) - 1;
82     case unwindstack::ARCH_ARM:
83       return (1ULL << PERF_REG_ARM_MAX) - 1;
84     // perf on x86_64 doesn't allow sampling ds/es/fs/gs registers. See
85     // arch/x86/kernel/perf_regs.c in the kernel.
86     case unwindstack::ARCH_X86_64:
87       return (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
88               ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
89               ~(1ULL << PERF_REG_X86_GS));
90     // Note: excluding these segment registers might not be necessary on x86,
91     // but they won't be used anyway (so follow x64).
92     case unwindstack::ARCH_X86:
93       return ((1ULL << PERF_REG_X86_32_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
94              ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
95              ~(1ULL << PERF_REG_X86_GS);
96     case unwindstack::ARCH_RISCV64:
97       return (1ULL << PERF_REG_RISCV_MAX) - 1;
98     default:
99       PERFETTO_FATAL("Unsupported architecture");
100   }
101 }
102 
103 // Adjusts the given architecture enum based on the ABI (as recorded in the perf
104 // sample). Note: we do not support 64 bit samples on a 32 bit daemon build, so
105 // this only converts from 64 bit to 32 bit architectures.
106 // TODO(rsavitski): on riscv64, are 32 bit userspace processes posible?
ArchForAbi(unwindstack::ArchEnum arch,uint64_t abi)107 unwindstack::ArchEnum ArchForAbi(unwindstack::ArchEnum arch, uint64_t abi) {
108   if (arch == unwindstack::ARCH_ARM64 && abi == PERF_SAMPLE_REGS_ABI_32) {
109     return unwindstack::ARCH_ARM;
110   }
111   if (arch == unwindstack::ARCH_X86_64 && abi == PERF_SAMPLE_REGS_ABI_32) {
112     return unwindstack::ARCH_X86;
113   }
114   return arch;
115 }
116 
117 // Register values as an array, indexed using the kernel uapi perf_events.h enum
118 // values. Unsampled values will be left as zeroes.
119 struct RawRegisterData {
120   static constexpr uint64_t kMaxSize =
121       constexpr_max(constexpr_max(PERF_REG_ARM_MAX, PERF_REG_ARM64_MAX),
122                     constexpr_max(PERF_REG_X86_64_MAX, PERF_REG_RISCV_MAX));
123   uint64_t regs[kMaxSize] = {};
124 };
125 
126 // First converts the |RawRegisterData| array to libunwindstack's "user"
127 // register structs (which match the ptrace/coredump format, also available at
128 // <sys/user.h>), then constructs the relevant unwindstack::Regs subclass out
129 // of the latter.
ToLibUnwindstackRegs(const RawRegisterData & raw_regs,unwindstack::ArchEnum arch)130 std::unique_ptr<unwindstack::Regs> ToLibUnwindstackRegs(
131     const RawRegisterData& raw_regs,
132     unwindstack::ArchEnum arch) {
133   if (arch == unwindstack::ARCH_ARM64) {
134     static_assert(static_cast<int>(unwindstack::ARM64_REG_R0) ==
135                           static_cast<int>(PERF_REG_ARM64_X0) &&
136                       static_cast<int>(unwindstack::ARM64_REG_R0) == 0,
137                   "register layout mismatch");
138     static_assert(static_cast<int>(unwindstack::ARM64_REG_R30) ==
139                       static_cast<int>(PERF_REG_ARM64_LR),
140                   "register layout mismatch");
141     // Both the perf_event register order and the "user" format are derived from
142     // "struct pt_regs", so we can directly memcpy the first 31 regs (up to and
143     // including LR).
144     unwindstack::arm64_user_regs arm64_user_regs = {};
145     memcpy(&arm64_user_regs.regs[0], &raw_regs.regs[0],
146            sizeof(uint64_t) * (PERF_REG_ARM64_LR + 1));
147     arm64_user_regs.sp = raw_regs.regs[PERF_REG_ARM64_SP];
148     arm64_user_regs.pc = raw_regs.regs[PERF_REG_ARM64_PC];
149     return std::unique_ptr<unwindstack::Regs>(
150         unwindstack::RegsArm64::Read(&arm64_user_regs));
151   }
152 
153   if (arch == unwindstack::ARCH_ARM) {
154     static_assert(static_cast<int>(unwindstack::ARM_REG_R0) ==
155                           static_cast<int>(PERF_REG_ARM_R0) &&
156                       static_cast<int>(unwindstack::ARM_REG_R0) == 0,
157                   "register layout mismatch");
158     static_assert(static_cast<int>(unwindstack::ARM_REG_LAST) ==
159                       static_cast<int>(PERF_REG_ARM_MAX),
160                   "register layout mismatch");
161     // As with arm64, the layouts match, but we need to downcast to u32.
162     unwindstack::arm_user_regs arm_user_regs = {};
163     for (size_t i = 0; i < unwindstack::ARM_REG_LAST; i++) {
164       arm_user_regs.regs[i] = static_cast<uint32_t>(raw_regs.regs[i]);
165     }
166     return std::unique_ptr<unwindstack::Regs>(
167         unwindstack::RegsArm::Read(&arm_user_regs));
168   }
169 
170   if (arch == unwindstack::ARCH_X86_64) {
171     // We've sampled more registers than what libunwindstack will use. Don't
172     // copy over cs/ss/flags.
173     unwindstack::x86_64_user_regs x86_64_user_regs = {};
174     x86_64_user_regs.rax = raw_regs.regs[PERF_REG_X86_AX];
175     x86_64_user_regs.rbx = raw_regs.regs[PERF_REG_X86_BX];
176     x86_64_user_regs.rcx = raw_regs.regs[PERF_REG_X86_CX];
177     x86_64_user_regs.rdx = raw_regs.regs[PERF_REG_X86_DX];
178     x86_64_user_regs.r8 = raw_regs.regs[PERF_REG_X86_R8];
179     x86_64_user_regs.r9 = raw_regs.regs[PERF_REG_X86_R9];
180     x86_64_user_regs.r10 = raw_regs.regs[PERF_REG_X86_R10];
181     x86_64_user_regs.r11 = raw_regs.regs[PERF_REG_X86_R11];
182     x86_64_user_regs.r12 = raw_regs.regs[PERF_REG_X86_R12];
183     x86_64_user_regs.r13 = raw_regs.regs[PERF_REG_X86_R13];
184     x86_64_user_regs.r14 = raw_regs.regs[PERF_REG_X86_R14];
185     x86_64_user_regs.r15 = raw_regs.regs[PERF_REG_X86_R15];
186     x86_64_user_regs.rdi = raw_regs.regs[PERF_REG_X86_DI];
187     x86_64_user_regs.rsi = raw_regs.regs[PERF_REG_X86_SI];
188     x86_64_user_regs.rbp = raw_regs.regs[PERF_REG_X86_BP];
189     x86_64_user_regs.rsp = raw_regs.regs[PERF_REG_X86_SP];
190     x86_64_user_regs.rip = raw_regs.regs[PERF_REG_X86_IP];
191     return std::unique_ptr<unwindstack::Regs>(
192         unwindstack::RegsX86_64::Read(&x86_64_user_regs));
193   }
194 
195   if (arch == unwindstack::ARCH_X86) {
196     // We've sampled more registers than what libunwindstack will use. Don't
197     // copy over cs/ss/flags.
198     unwindstack::x86_user_regs x86_user_regs = {};
199     x86_user_regs.eax = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_AX]);
200     x86_user_regs.ebx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BX]);
201     x86_user_regs.ecx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_CX]);
202     x86_user_regs.edx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DX]);
203     x86_user_regs.ebp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BP]);
204     x86_user_regs.edi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DI]);
205     x86_user_regs.esi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SI]);
206     x86_user_regs.esp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SP]);
207     x86_user_regs.eip = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_IP]);
208     return std::unique_ptr<unwindstack::Regs>(
209         unwindstack::RegsX86::Read(&x86_user_regs));
210   }
211 
212   if (arch == unwindstack::ARCH_RISCV64) {
213     static_assert(static_cast<int>(unwindstack::RISCV64_REG_PC) ==
214                           static_cast<int>(PERF_REG_RISCV_PC) &&
215                       static_cast<int>(unwindstack::RISCV64_REG_PC) == 0,
216                   "register layout mismatch");
217     static_assert(static_cast<int>(unwindstack::RISCV64_REG_REAL_COUNT) ==
218                       static_cast<int>(PERF_REG_RISCV_MAX),
219                   "register layout mismatch");
220     // Register layout matches, pass the raw data to the Read call.
221     return std::unique_ptr<unwindstack::Regs>(
222         unwindstack::RegsRiscv64::Read(&raw_regs.regs[0]));
223   }
224 
225   PERFETTO_FATAL("Unsupported architecture");
226 }
227 
228 }  // namespace
229 
PerfUserRegsMaskForArch(unwindstack::ArchEnum arch)230 uint64_t PerfUserRegsMaskForArch(unwindstack::ArchEnum arch) {
231   return PerfUserRegsMask(arch);
232 }
233 
234 // Assumes that the sampling was configured with
235 // |PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch())|.
ReadPerfUserRegsData(const char ** data)236 std::unique_ptr<unwindstack::Regs> ReadPerfUserRegsData(const char** data) {
237   unwindstack::ArchEnum requested_arch = unwindstack::Regs::CurrentArch();
238 
239   // Layout, assuming a sparse bitmask requesting r1 and r15:
240   // userspace thread: [u64 abi] [u64 r1] [u64 r15]
241   // kernel thread:    [u64 abi]
242   const char* parse_pos = *data;
243   uint64_t sampled_abi;
244   parse_pos = ReadValue(&sampled_abi, parse_pos);
245 
246   // ABI_NONE means there were no registers, as we've sampled a kernel thread,
247   // which doesn't have userspace registers.
248   if (sampled_abi == PERF_SAMPLE_REGS_ABI_NONE) {
249     *data = parse_pos;  // adjust caller's parsing position
250     return nullptr;
251   }
252 
253   // Unpack the densely-packed register values into |RawRegisterData|, which has
254   // a value for every register (unsampled registers will be left at zero).
255   RawRegisterData raw_regs{};
256   uint64_t regs_mask = PerfUserRegsMaskForArch(requested_arch);
257   for (size_t i = 0; regs_mask && (i < RawRegisterData::kMaxSize); i++) {
258     if (regs_mask & (1ULL << i)) {
259       parse_pos = ReadValue(&raw_regs.regs[i], parse_pos);
260     }
261   }
262 
263   // Special case: we've requested arm64 registers from a 64 bit kernel, but
264   // ended up sampling a 32 bit arm userspace process. The 32 bit execution
265   // state of the target process was saved by the exception entry in an
266   // ISA-specific way. The userspace R0-R14 end up saved as arm64 W0-W14, but
267   // the program counter (R15 on arm32) is still in PERF_REG_ARM64_PC (the 33rd
268   // register). So we can take the kernel-dumped 64 bit register state, reassign
269   // the PC into the R15 slot, and treat the resulting RawRegisterData as an
270   // arm32 register bank. See "Fundamentals of ARMv8-A" (ARM DOC
271   // 100878_0100_en), page 28.
272   // x86-64 doesn't need any such fixups.
273   if (requested_arch == unwindstack::ARCH_ARM64 &&
274       sampled_abi == PERF_SAMPLE_REGS_ABI_32) {
275     raw_regs.regs[PERF_REG_ARM_PC] = raw_regs.regs[PERF_REG_ARM64_PC];
276   }
277 
278   *data = parse_pos;  // adjust caller's parsing position
279 
280   unwindstack::ArchEnum sampled_arch = ArchForAbi(requested_arch, sampled_abi);
281   return ToLibUnwindstackRegs(raw_regs, sampled_arch);
282 }
283 
284 }  // namespace profiling
285 }  // namespace perfetto
286