xref: /aosp_15_r20/external/google-breakpad/src/processor/disassembler_objdump.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // Copyright (c) 2022, Google LLC
2*9712c20fSFrederick Mayle //
3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
5*9712c20fSFrederick Mayle // met:
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
9*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
12*9712c20fSFrederick Mayle // distribution.
13*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
15*9712c20fSFrederick Mayle // this software without specific prior written permission.
16*9712c20fSFrederick Mayle //
17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*9712c20fSFrederick Mayle 
29*9712c20fSFrederick Mayle // disassembler_objdump.: Disassembler that invokes objdump for disassembly.
30*9712c20fSFrederick Mayle //
31*9712c20fSFrederick Mayle // Author: Mark Brand
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle #ifdef HAVE_CONFIG_H
34*9712c20fSFrederick Mayle #include <config.h>  // Must come first
35*9712c20fSFrederick Mayle #endif
36*9712c20fSFrederick Mayle 
37*9712c20fSFrederick Mayle #include "processor/disassembler_objdump.h"
38*9712c20fSFrederick Mayle 
39*9712c20fSFrederick Mayle #include <unistd.h>
40*9712c20fSFrederick Mayle #include <sys/wait.h>
41*9712c20fSFrederick Mayle 
42*9712c20fSFrederick Mayle #include <array>
43*9712c20fSFrederick Mayle #include <fstream>
44*9712c20fSFrederick Mayle #include <iostream>
45*9712c20fSFrederick Mayle #include <iterator>
46*9712c20fSFrederick Mayle #include <regex>
47*9712c20fSFrederick Mayle #include <sstream>
48*9712c20fSFrederick Mayle #include <vector>
49*9712c20fSFrederick Mayle 
50*9712c20fSFrederick Mayle #include "common/linux/eintr_wrapper.h"
51*9712c20fSFrederick Mayle #include "common/linux/scoped_pipe.h"
52*9712c20fSFrederick Mayle #include "common/linux/scoped_tmpfile.h"
53*9712c20fSFrederick Mayle #include "processor/logging.h"
54*9712c20fSFrederick Mayle 
55*9712c20fSFrederick Mayle namespace google_breakpad {
56*9712c20fSFrederick Mayle namespace {
57*9712c20fSFrederick Mayle 
58*9712c20fSFrederick Mayle const size_t kMaxX86InstructionLength = 15;
59*9712c20fSFrederick Mayle 
IsInstructionPrefix(const string & token)60*9712c20fSFrederick Mayle bool IsInstructionPrefix(const string& token) {
61*9712c20fSFrederick Mayle   if (token == "lock" || token == "rep" || token == "repz" ||
62*9712c20fSFrederick Mayle       token == "repnz") {
63*9712c20fSFrederick Mayle     return true;
64*9712c20fSFrederick Mayle   }
65*9712c20fSFrederick Mayle   return false;
66*9712c20fSFrederick Mayle }
67*9712c20fSFrederick Mayle 
IsOperandSize(const string & token)68*9712c20fSFrederick Mayle bool IsOperandSize(const string& token) {
69*9712c20fSFrederick Mayle   if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
70*9712c20fSFrederick Mayle       token == "QWORD" || token == "PTR") {
71*9712c20fSFrederick Mayle     return true;
72*9712c20fSFrederick Mayle   }
73*9712c20fSFrederick Mayle   return false;
74*9712c20fSFrederick Mayle }
75*9712c20fSFrederick Mayle 
GetSegmentAddressX86(const DumpContext & context,string segment_name,uint64_t & address)76*9712c20fSFrederick Mayle bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
77*9712c20fSFrederick Mayle                           uint64_t& address) {
78*9712c20fSFrederick Mayle   if (segment_name == "ds") {
79*9712c20fSFrederick Mayle     address = context.GetContextX86()->ds;
80*9712c20fSFrederick Mayle   } else if (segment_name == "es") {
81*9712c20fSFrederick Mayle     address = context.GetContextX86()->es;
82*9712c20fSFrederick Mayle   } else if (segment_name == "fs") {
83*9712c20fSFrederick Mayle     address = context.GetContextX86()->fs;
84*9712c20fSFrederick Mayle   } else if (segment_name == "gs") {
85*9712c20fSFrederick Mayle     address = context.GetContextX86()->gs;
86*9712c20fSFrederick Mayle   } else {
87*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
88*9712c20fSFrederick Mayle     return false;
89*9712c20fSFrederick Mayle   }
90*9712c20fSFrederick Mayle 
91*9712c20fSFrederick Mayle   return true;
92*9712c20fSFrederick Mayle }
93*9712c20fSFrederick Mayle 
GetSegmentAddressAMD64(const DumpContext & context,string segment_name,uint64_t & address)94*9712c20fSFrederick Mayle bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
95*9712c20fSFrederick Mayle                             uint64_t& address) {
96*9712c20fSFrederick Mayle   if (segment_name == "ds") {
97*9712c20fSFrederick Mayle     address = 0;
98*9712c20fSFrederick Mayle   } else if (segment_name == "es") {
99*9712c20fSFrederick Mayle     address = 0;
100*9712c20fSFrederick Mayle   } else {
101*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
102*9712c20fSFrederick Mayle     return false;
103*9712c20fSFrederick Mayle   }
104*9712c20fSFrederick Mayle 
105*9712c20fSFrederick Mayle   return true;
106*9712c20fSFrederick Mayle }
107*9712c20fSFrederick Mayle 
GetSegmentAddress(const DumpContext & context,string segment_name,uint64_t & address)108*9712c20fSFrederick Mayle bool GetSegmentAddress(const DumpContext& context, string segment_name,
109*9712c20fSFrederick Mayle                        uint64_t& address) {
110*9712c20fSFrederick Mayle   if (context.GetContextCPU() == MD_CONTEXT_X86) {
111*9712c20fSFrederick Mayle     return GetSegmentAddressX86(context, segment_name, address);
112*9712c20fSFrederick Mayle   } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
113*9712c20fSFrederick Mayle     return GetSegmentAddressAMD64(context, segment_name, address);
114*9712c20fSFrederick Mayle   } else {
115*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
116*9712c20fSFrederick Mayle     return false;
117*9712c20fSFrederick Mayle   }
118*9712c20fSFrederick Mayle }
119*9712c20fSFrederick Mayle 
GetRegisterValueX86(const DumpContext & context,string register_name,uint64_t & value)120*9712c20fSFrederick Mayle bool GetRegisterValueX86(const DumpContext& context, string register_name,
121*9712c20fSFrederick Mayle                          uint64_t& value) {
122*9712c20fSFrederick Mayle   if (register_name == "eax") {
123*9712c20fSFrederick Mayle     value = context.GetContextX86()->eax;
124*9712c20fSFrederick Mayle   } else if (register_name == "ebx") {
125*9712c20fSFrederick Mayle     value = context.GetContextX86()->ebx;
126*9712c20fSFrederick Mayle   } else if (register_name == "ecx") {
127*9712c20fSFrederick Mayle     value = context.GetContextX86()->ecx;
128*9712c20fSFrederick Mayle   } else if (register_name == "edx") {
129*9712c20fSFrederick Mayle     value = context.GetContextX86()->edx;
130*9712c20fSFrederick Mayle   } else if (register_name == "edi") {
131*9712c20fSFrederick Mayle     value = context.GetContextX86()->edi;
132*9712c20fSFrederick Mayle   } else if (register_name == "esi") {
133*9712c20fSFrederick Mayle     value = context.GetContextX86()->esi;
134*9712c20fSFrederick Mayle   } else if (register_name == "ebp") {
135*9712c20fSFrederick Mayle     value = context.GetContextX86()->ebp;
136*9712c20fSFrederick Mayle   } else if (register_name == "esp") {
137*9712c20fSFrederick Mayle     value = context.GetContextX86()->esp;
138*9712c20fSFrederick Mayle   } else if (register_name == "eip") {
139*9712c20fSFrederick Mayle     value = context.GetContextX86()->eip;
140*9712c20fSFrederick Mayle   } else {
141*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported register: " << register_name;
142*9712c20fSFrederick Mayle     return false;
143*9712c20fSFrederick Mayle   }
144*9712c20fSFrederick Mayle 
145*9712c20fSFrederick Mayle   return true;
146*9712c20fSFrederick Mayle }
147*9712c20fSFrederick Mayle 
GetRegisterValueAMD64(const DumpContext & context,string register_name,uint64_t & value)148*9712c20fSFrederick Mayle bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
149*9712c20fSFrederick Mayle                            uint64_t& value) {
150*9712c20fSFrederick Mayle   if (register_name == "rax") {
151*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rax;
152*9712c20fSFrederick Mayle   } else if (register_name == "rbx") {
153*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rbx;
154*9712c20fSFrederick Mayle   } else if (register_name == "rcx") {
155*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rcx;
156*9712c20fSFrederick Mayle   } else if (register_name == "rdx") {
157*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rdx;
158*9712c20fSFrederick Mayle   } else if (register_name == "rdi") {
159*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rdi;
160*9712c20fSFrederick Mayle   } else if (register_name == "rsi") {
161*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rsi;
162*9712c20fSFrederick Mayle   } else if (register_name == "rbp") {
163*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rbp;
164*9712c20fSFrederick Mayle   } else if (register_name == "rsp") {
165*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rsp;
166*9712c20fSFrederick Mayle   } else if (register_name == "r8") {
167*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r8;
168*9712c20fSFrederick Mayle   } else if (register_name == "r9") {
169*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r9;
170*9712c20fSFrederick Mayle   } else if (register_name == "r10") {
171*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r10;
172*9712c20fSFrederick Mayle   } else if (register_name == "r11") {
173*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r11;
174*9712c20fSFrederick Mayle   } else if (register_name == "r12") {
175*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r12;
176*9712c20fSFrederick Mayle   } else if (register_name == "r13") {
177*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r13;
178*9712c20fSFrederick Mayle   } else if (register_name == "r14") {
179*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r14;
180*9712c20fSFrederick Mayle   } else if (register_name == "r15") {
181*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->r15;
182*9712c20fSFrederick Mayle   } else if (register_name == "rip") {
183*9712c20fSFrederick Mayle     value = context.GetContextAMD64()->rip;
184*9712c20fSFrederick Mayle   } else {
185*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported register: " << register_name;
186*9712c20fSFrederick Mayle     return false;
187*9712c20fSFrederick Mayle   }
188*9712c20fSFrederick Mayle 
189*9712c20fSFrederick Mayle   return true;
190*9712c20fSFrederick Mayle }
191*9712c20fSFrederick Mayle 
192*9712c20fSFrederick Mayle // Lookup the value of `register_name` in `context`, store it into `value` on
193*9712c20fSFrederick Mayle // success.
194*9712c20fSFrederick Mayle // Support for non-full-size registers not implemented, since we're only using
195*9712c20fSFrederick Mayle // this to evaluate address expressions.
GetRegisterValue(const DumpContext & context,string register_name,uint64_t & value)196*9712c20fSFrederick Mayle bool GetRegisterValue(const DumpContext& context, string register_name,
197*9712c20fSFrederick Mayle                       uint64_t& value) {
198*9712c20fSFrederick Mayle   if (context.GetContextCPU() == MD_CONTEXT_X86) {
199*9712c20fSFrederick Mayle     return GetRegisterValueX86(context, register_name, value);
200*9712c20fSFrederick Mayle   } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
201*9712c20fSFrederick Mayle     return GetRegisterValueAMD64(context, register_name, value);
202*9712c20fSFrederick Mayle   } else {
203*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
204*9712c20fSFrederick Mayle     return false;
205*9712c20fSFrederick Mayle   }
206*9712c20fSFrederick Mayle }
207*9712c20fSFrederick Mayle }  // namespace
208*9712c20fSFrederick Mayle 
209*9712c20fSFrederick Mayle // static
DisassembleInstruction(uint32_t cpu,const uint8_t * raw_bytes,unsigned int raw_bytes_len,string & instruction)210*9712c20fSFrederick Mayle bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
211*9712c20fSFrederick Mayle                                                  const uint8_t* raw_bytes,
212*9712c20fSFrederick Mayle                                                  unsigned int raw_bytes_len,
213*9712c20fSFrederick Mayle                                                  string& instruction) {
214*9712c20fSFrederick Mayle   // Always initialize outputs
215*9712c20fSFrederick Mayle   instruction = "";
216*9712c20fSFrederick Mayle 
217*9712c20fSFrederick Mayle   if (!raw_bytes || raw_bytes_len == 0) {
218*9712c20fSFrederick Mayle     // There's no need to perform any operation in this case, as there's
219*9712c20fSFrederick Mayle     // clearly no instruction there.
220*9712c20fSFrederick Mayle     return false;
221*9712c20fSFrederick Mayle   }
222*9712c20fSFrederick Mayle 
223*9712c20fSFrederick Mayle   string architecture;
224*9712c20fSFrederick Mayle   if (cpu == MD_CONTEXT_X86) {
225*9712c20fSFrederick Mayle     architecture = "i386";
226*9712c20fSFrederick Mayle   } else if (cpu == MD_CONTEXT_AMD64) {
227*9712c20fSFrederick Mayle     architecture = "i386:x86-64";
228*9712c20fSFrederick Mayle   } else {
229*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Unsupported architecture.";
230*9712c20fSFrederick Mayle     return false;
231*9712c20fSFrederick Mayle   }
232*9712c20fSFrederick Mayle 
233*9712c20fSFrederick Mayle   // Create a temporary file for the raw instruction bytes to pass to
234*9712c20fSFrederick Mayle   // objdump, and write the bytes to the input file.
235*9712c20fSFrederick Mayle   ScopedTmpFile raw_bytes_file;
236*9712c20fSFrederick Mayle   if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) {
237*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Failed creating temporary file.";
238*9712c20fSFrederick Mayle     return false;
239*9712c20fSFrederick Mayle   }
240*9712c20fSFrederick Mayle 
241*9712c20fSFrederick Mayle   // Create a pipe to use to read the disassembly back from objdump.
242*9712c20fSFrederick Mayle   ScopedPipe disassembly_pipe;
243*9712c20fSFrederick Mayle   if (!disassembly_pipe.Init()) {
244*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Failed creating pipe for output.";
245*9712c20fSFrederick Mayle     return false;
246*9712c20fSFrederick Mayle   }
247*9712c20fSFrederick Mayle 
248*9712c20fSFrederick Mayle   pid_t child_pid = fork();
249*9712c20fSFrederick Mayle   if (child_pid < 0) {
250*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Fork failed.";
251*9712c20fSFrederick Mayle     return false;
252*9712c20fSFrederick Mayle   }
253*9712c20fSFrederick Mayle 
254*9712c20fSFrederick Mayle   if (child_pid == 0) {
255*9712c20fSFrederick Mayle     // In the child process, set up the input and output file descriptors.
256*9712c20fSFrederick Mayle     if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 ||
257*9712c20fSFrederick Mayle         disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 ||
258*9712c20fSFrederick Mayle         disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) {
259*9712c20fSFrederick Mayle       BPLOG(ERROR) << "Failed dup'ing file descriptors.";
260*9712c20fSFrederick Mayle       exit(-1);
261*9712c20fSFrederick Mayle     }
262*9712c20fSFrederick Mayle 
263*9712c20fSFrederick Mayle     // We need to close the read end of the pipe in the child process so that
264*9712c20fSFrederick Mayle     // when the parent closes it, the pipe is disconnected.
265*9712c20fSFrederick Mayle     disassembly_pipe.CloseReadFd();
266*9712c20fSFrederick Mayle 
267*9712c20fSFrederick Mayle     // We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file,
268*9712c20fSFrederick Mayle     // since objdump does not have a mode to read from stdin. This cannot be
269*9712c20fSFrederick Mayle     // used with a pipe, since objdump requires that the input is a standard
270*9712c20fSFrederick Mayle     // file.
271*9712c20fSFrederick Mayle     execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary",
272*9712c20fSFrederick Mayle            "-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0",
273*9712c20fSFrederick Mayle            nullptr);
274*9712c20fSFrederick Mayle 
275*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Failed to exec objdump.";
276*9712c20fSFrederick Mayle     exit(-1);
277*9712c20fSFrederick Mayle   } else {
278*9712c20fSFrederick Mayle     // In the parent process, parse the objdump output.
279*9712c20fSFrederick Mayle 
280*9712c20fSFrederick Mayle     // Match the instruction line, from:
281*9712c20fSFrederick Mayle     //    0:        lock cmpxchg DWORD PTR [esi+0x10],eax
282*9712c20fSFrederick Mayle     // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
283*9712c20fSFrederick Mayle     std::regex instruction_regex(
284*9712c20fSFrederick Mayle         "^\\s+[0-9a-f]+:\\s+"  // "   0:"
285*9712c20fSFrederick Mayle         "((?:\\s*\\S*)+)$");   // "lock cmpxchg..."
286*9712c20fSFrederick Mayle 
287*9712c20fSFrederick Mayle     std::string line;
288*9712c20fSFrederick Mayle     std::smatch match;
289*9712c20fSFrederick Mayle     while (disassembly_pipe.ReadLine(line)) {
290*9712c20fSFrederick Mayle       if (std::regex_match(line, match, instruction_regex)) {
291*9712c20fSFrederick Mayle         instruction = match[1].str();
292*9712c20fSFrederick Mayle         break;
293*9712c20fSFrederick Mayle       }
294*9712c20fSFrederick Mayle     }
295*9712c20fSFrederick Mayle 
296*9712c20fSFrederick Mayle     // Close the read pipe so that objdump will exit (in case we broke out of
297*9712c20fSFrederick Mayle     // the loop above before reading all of the output).
298*9712c20fSFrederick Mayle     disassembly_pipe.CloseReadFd();
299*9712c20fSFrederick Mayle 
300*9712c20fSFrederick Mayle     // Now wait for objdump to exit.
301*9712c20fSFrederick Mayle     int status = 0;
302*9712c20fSFrederick Mayle     HANDLE_EINTR(waitpid(child_pid, &status, 0));
303*9712c20fSFrederick Mayle 
304*9712c20fSFrederick Mayle     if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
305*9712c20fSFrederick Mayle       BPLOG(ERROR) << "objdump didn't run successfully.";
306*9712c20fSFrederick Mayle       return false;
307*9712c20fSFrederick Mayle     }
308*9712c20fSFrederick Mayle 
309*9712c20fSFrederick Mayle     if (instruction == "") {
310*9712c20fSFrederick Mayle       BPLOG(ERROR) << "Failed to find instruction in objdump output.";
311*9712c20fSFrederick Mayle       return false;
312*9712c20fSFrederick Mayle     }
313*9712c20fSFrederick Mayle   }
314*9712c20fSFrederick Mayle 
315*9712c20fSFrederick Mayle   return true;
316*9712c20fSFrederick Mayle }
317*9712c20fSFrederick Mayle 
318*9712c20fSFrederick Mayle // static
TokenizeInstruction(const string & instruction,string & operation,string & dest,string & src)319*9712c20fSFrederick Mayle bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
320*9712c20fSFrederick Mayle                                               string& operation, string& dest,
321*9712c20fSFrederick Mayle                                               string& src) {
322*9712c20fSFrederick Mayle   // Always initialize outputs.
323*9712c20fSFrederick Mayle   operation = "";
324*9712c20fSFrederick Mayle   dest = "";
325*9712c20fSFrederick Mayle   src = "";
326*9712c20fSFrederick Mayle 
327*9712c20fSFrederick Mayle   // Split the instruction into tokens by either whitespace or comma.
328*9712c20fSFrederick Mayle   std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
329*9712c20fSFrederick Mayle   std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
330*9712c20fSFrederick Mayle                                     token_regex);
331*9712c20fSFrederick Mayle 
332*9712c20fSFrederick Mayle   bool found_comma = false;
333*9712c20fSFrederick Mayle   for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
334*9712c20fSFrederick Mayle        ++tokens_iter) {
335*9712c20fSFrederick Mayle     auto token = (*tokens_iter)[1].str();
336*9712c20fSFrederick Mayle     if (operation.size() == 0) {
337*9712c20fSFrederick Mayle       if (IsInstructionPrefix(token))
338*9712c20fSFrederick Mayle         continue;
339*9712c20fSFrederick Mayle       operation = token;
340*9712c20fSFrederick Mayle     } else if (dest.size() == 0) {
341*9712c20fSFrederick Mayle       if (IsOperandSize(token))
342*9712c20fSFrederick Mayle         continue;
343*9712c20fSFrederick Mayle       dest = token;
344*9712c20fSFrederick Mayle     } else if (!found_comma) {
345*9712c20fSFrederick Mayle       if (token == ",") {
346*9712c20fSFrederick Mayle         found_comma = true;
347*9712c20fSFrederick Mayle       } else {
348*9712c20fSFrederick Mayle         BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
349*9712c20fSFrederick Mayle                         " comma but found \""
350*9712c20fSFrederick Mayle                      << token << "\"";
351*9712c20fSFrederick Mayle         return false;
352*9712c20fSFrederick Mayle       }
353*9712c20fSFrederick Mayle     } else if (src.size() == 0) {
354*9712c20fSFrederick Mayle       if (IsOperandSize(token))
355*9712c20fSFrederick Mayle         continue;
356*9712c20fSFrederick Mayle       src = token;
357*9712c20fSFrederick Mayle     } else {
358*9712c20fSFrederick Mayle       if (token == ",") {
359*9712c20fSFrederick Mayle         BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
360*9712c20fSFrederick Mayle                         "unexpected comma after last operand.";
361*9712c20fSFrederick Mayle         return false;
362*9712c20fSFrederick Mayle       } else {
363*9712c20fSFrederick Mayle         // We just ignore other junk after the last operand unless it's a
364*9712c20fSFrederick Mayle         // comma, which would indicate we're probably still in the middle
365*9712c20fSFrederick Mayle         // of the operands and something has gone wrong
366*9712c20fSFrederick Mayle       }
367*9712c20fSFrederick Mayle     }
368*9712c20fSFrederick Mayle   }
369*9712c20fSFrederick Mayle 
370*9712c20fSFrederick Mayle   if (found_comma && src.size() == 0) {
371*9712c20fSFrederick Mayle     BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
372*9712c20fSFrederick Mayle                     "but no src operand.";
373*9712c20fSFrederick Mayle     return false;
374*9712c20fSFrederick Mayle   }
375*9712c20fSFrederick Mayle 
376*9712c20fSFrederick Mayle   return true;
377*9712c20fSFrederick Mayle }
378*9712c20fSFrederick Mayle 
379*9712c20fSFrederick Mayle // static
CalculateAddress(const DumpContext & context,const string & expression,uint64_t & address)380*9712c20fSFrederick Mayle bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
381*9712c20fSFrederick Mayle                                            const string& expression,
382*9712c20fSFrederick Mayle                                            uint64_t& address) {
383*9712c20fSFrederick Mayle   address = 0;
384*9712c20fSFrederick Mayle 
385*9712c20fSFrederick Mayle   // Extract the components of the expression.
386*9712c20fSFrederick Mayle   // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
387*9712c20fSFrederick Mayle   std::regex expression_regex(
388*9712c20fSFrederick Mayle       "^(?:(\\ws):)?"                // "fs:"
389*9712c20fSFrederick Mayle       "\\[(\\w+)"                    // "[esi"
390*9712c20fSFrederick Mayle       "(?:\\+(\\w+)(?:\\*(\\d+)))?"  // "+edi*4"
391*9712c20fSFrederick Mayle       "(?:([\\+-])(0x[0-9a-f]+))?"   // "-0x80"
392*9712c20fSFrederick Mayle       "\\]$");                       // "]"
393*9712c20fSFrederick Mayle 
394*9712c20fSFrederick Mayle   std::smatch match;
395*9712c20fSFrederick Mayle   if (!std::regex_match(expression, match, expression_regex) ||
396*9712c20fSFrederick Mayle       match.size() != 7) {
397*9712c20fSFrederick Mayle     return false;
398*9712c20fSFrederick Mayle   }
399*9712c20fSFrederick Mayle 
400*9712c20fSFrederick Mayle   string segment_name = match[1].str();
401*9712c20fSFrederick Mayle   string register_name = match[2].str();
402*9712c20fSFrederick Mayle   string index_name = match[3].str();
403*9712c20fSFrederick Mayle   string index_stride = match[4].str();
404*9712c20fSFrederick Mayle   string offset_sign = match[5].str();
405*9712c20fSFrederick Mayle   string offset = match[6].str();
406*9712c20fSFrederick Mayle 
407*9712c20fSFrederick Mayle   uint64_t segment_address = 0;
408*9712c20fSFrederick Mayle   uint64_t register_value = 0;
409*9712c20fSFrederick Mayle   uint64_t index_value = 0;
410*9712c20fSFrederick Mayle   uint64_t index_stride_value = 1;
411*9712c20fSFrederick Mayle   uint64_t offset_value = 0;
412*9712c20fSFrederick Mayle 
413*9712c20fSFrederick Mayle   if (segment_name.size() &&
414*9712c20fSFrederick Mayle       !GetSegmentAddress(context, segment_name, segment_address)) {
415*9712c20fSFrederick Mayle     return false;
416*9712c20fSFrederick Mayle   }
417*9712c20fSFrederick Mayle 
418*9712c20fSFrederick Mayle   if (!GetRegisterValue(context, register_name, register_value)) {
419*9712c20fSFrederick Mayle     return false;
420*9712c20fSFrederick Mayle   }
421*9712c20fSFrederick Mayle 
422*9712c20fSFrederick Mayle   if (index_name.size() &&
423*9712c20fSFrederick Mayle       !GetRegisterValue(context, index_name, index_value)) {
424*9712c20fSFrederick Mayle     return false;
425*9712c20fSFrederick Mayle   }
426*9712c20fSFrederick Mayle 
427*9712c20fSFrederick Mayle   if (index_stride.size()) {
428*9712c20fSFrederick Mayle     index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
429*9712c20fSFrederick Mayle   }
430*9712c20fSFrederick Mayle 
431*9712c20fSFrederick Mayle   if (offset.size()) {
432*9712c20fSFrederick Mayle     offset_value = strtoull(offset.c_str(), nullptr, 0);
433*9712c20fSFrederick Mayle   }
434*9712c20fSFrederick Mayle 
435*9712c20fSFrederick Mayle   address =
436*9712c20fSFrederick Mayle       segment_address + register_value + (index_value * index_stride_value);
437*9712c20fSFrederick Mayle   if (offset_sign == "+") {
438*9712c20fSFrederick Mayle     address += offset_value;
439*9712c20fSFrederick Mayle   } else if (offset_sign == "-") {
440*9712c20fSFrederick Mayle     address -= offset_value;
441*9712c20fSFrederick Mayle   }
442*9712c20fSFrederick Mayle 
443*9712c20fSFrederick Mayle   return true;
444*9712c20fSFrederick Mayle }
445*9712c20fSFrederick Mayle 
DisassemblerObjdump(const uint32_t cpu,const MemoryRegion * memory_region,uint64_t address)446*9712c20fSFrederick Mayle DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
447*9712c20fSFrederick Mayle                                          const MemoryRegion* memory_region,
448*9712c20fSFrederick Mayle                                          uint64_t address) {
449*9712c20fSFrederick Mayle   if (address < memory_region->GetBase() ||
450*9712c20fSFrederick Mayle       memory_region->GetBase() + memory_region->GetSize() <= address) {
451*9712c20fSFrederick Mayle     return;
452*9712c20fSFrederick Mayle   }
453*9712c20fSFrederick Mayle 
454*9712c20fSFrederick Mayle   uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
455*9712c20fSFrederick Mayle   size_t ip_bytes_length;
456*9712c20fSFrederick Mayle   for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
457*9712c20fSFrederick Mayle        ++ip_bytes_length) {
458*9712c20fSFrederick Mayle     // We have to read byte-by-byte here, since we still want to try and
459*9712c20fSFrederick Mayle     // disassemble an instruction even if we don't have enough bytes.
460*9712c20fSFrederick Mayle     if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
461*9712c20fSFrederick Mayle                                            &ip_bytes[ip_bytes_length])) {
462*9712c20fSFrederick Mayle       break;
463*9712c20fSFrederick Mayle     }
464*9712c20fSFrederick Mayle   }
465*9712c20fSFrederick Mayle 
466*9712c20fSFrederick Mayle   string instruction;
467*9712c20fSFrederick Mayle   if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
468*9712c20fSFrederick Mayle                               instruction)) {
469*9712c20fSFrederick Mayle     return;
470*9712c20fSFrederick Mayle   }
471*9712c20fSFrederick Mayle 
472*9712c20fSFrederick Mayle   if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
473*9712c20fSFrederick Mayle     return;
474*9712c20fSFrederick Mayle   }
475*9712c20fSFrederick Mayle }
476*9712c20fSFrederick Mayle 
CalculateSrcAddress(const DumpContext & context,uint64_t & address)477*9712c20fSFrederick Mayle bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
478*9712c20fSFrederick Mayle                                               uint64_t& address) {
479*9712c20fSFrederick Mayle   return CalculateAddress(context, src_, address);
480*9712c20fSFrederick Mayle }
481*9712c20fSFrederick Mayle 
CalculateDestAddress(const DumpContext & context,uint64_t & address)482*9712c20fSFrederick Mayle bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
483*9712c20fSFrederick Mayle                                                uint64_t& address) {
484*9712c20fSFrederick Mayle   return CalculateAddress(context, dest_, address);
485*9712c20fSFrederick Mayle }
486*9712c20fSFrederick Mayle 
487*9712c20fSFrederick Mayle }  // namespace google_breakpad