xref: /aosp_15_r20/external/google-breakpad/src/processor/disassembler_objdump.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright (c) 2022, Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // disassembler_objdump.h: Disassembler that invokes objdump for disassembly.
30 //
31 // Author: Mark Brand
32 
33 #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
34 #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
35 
36 #include <string>
37 
38 #include "common/using_std_string.h"
39 #include "google_breakpad/common/breakpad_types.h"
40 #include "google_breakpad/processor/dump_context.h"
41 #include "google_breakpad/processor/memory_region.h"
42 
43 namespace google_breakpad {
44 
45 // Uses objdump to disassemble a single instruction.
46 //
47 // Currently supports disassembly for x86 and x86_64 on linux hosts only; on
48 // unsupported platform or for unsupported architectures disassembly will fail.
49 //
50 // If disassembly is successful, then this allows extracting the instruction
51 // opcode, source and destination operands, and computing the source and
52 // destination addresses for instructions that operate on memory.
53 //
54 // Example:
55 //   DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
56 //                                    instruction_ptr);
57 //   if (disassembler.IsValid()) {
58 //     uint64_t src_address = 0;
59 //     std::cerr << disassembler.operation() << " " << disassembler.src()
60 //               << ", " << disassembler.dest() << std::endl;
61 //     if (disassembler.CalculateSrcAddress(*context, src_address)) {
62 //       std::cerr << "[src_address = " << std::hex << src_address << "]\n";
63 //     }
64 //   }
65 class DisassemblerObjdump {
66  public:
67   // Construct an ObjdumpDisassembler for the provided `cpu` type, where this is
68   // one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is
69   // within `memory_region`, and the memory referenced is a valid instruction,
70   // this will then be initialized with the disassembly for that instruction.
71   DisassemblerObjdump(uint32_t cpu,
72                       const MemoryRegion* memory_region,
73                       uint64_t address);
74   ~DisassemblerObjdump() = default;
75 
76   // If the source operand of the instruction is a memory operand, compute the
77   // address referred to by the operand, and store this in `address`. On success
78   // returns true, otherwise (if computation fails, or if the source operand is
79   // not a memory operand) returns false and sets `address` to 0.
80   bool CalculateSrcAddress(const DumpContext& context, uint64_t& address);
81 
82   // If the destination operand of the instruction is a memory operand, compute
83   // the address referred to by the operand, and store this in `address`. On
84   // success returns true, otherwise (if computation fails, or if the source
85   // operand is not a memory operand) returns false and sets `address` to 0.
86   bool CalculateDestAddress(const DumpContext& context, uint64_t& address);
87 
88   // If the instruction was disassembled successfully, this will be true.
IsValid()89   bool IsValid() const { return operation_.size() != 0; }
90 
91   // Returns the operation part of the disassembly, without any prefixes:
92   //   "pop" eax
93   //   lock "xchg" eax, edx
operation()94   const string& operation() const { return operation_; }
95 
96   // Returns the destination operand of the disassembly, without memory operand
97   // size prefixes:
98   //   mov DWORD PTR "[rax + 16]", edx
dest()99   const string& dest() const { return dest_; }
100 
101   // Returns the source operand of the disassembly, without memory operand
102   // size prefixes:
103   //   mov rax, QWORD PTR "[rdx]"
src()104   const string& src() const { return src_; }
105 
106  private:
107   friend class DisassemblerObjdumpForTest;
108 
109   // Writes out the provided `raw_bytes` to a temporary file, and executes objdump
110   // to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or
111   // MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction
112   // string from the first instruction in the output and stores it in
113   // `instruction`.
114   static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes,
115                                      unsigned int raw_bytes_len,
116                                      string& instruction);
117 
118   // Splits an `instruction` into three parts, the "main" `operation` and
119   // the `dest` and `src` operands.
120   // Example:
121   //   instruction = "lock cmpxchg QWORD PTR [rdi], rsi"
122   //   operation = "cmpxchg", dest = "[rdi]", src = "rsi"
123   static bool TokenizeInstruction(const string& instruction, string& operation,
124                                   string& dest, string& src);
125 
126   // Compute the address referenced by `expression` in `context`.
127   // Supports memory operands in the form
128   //   (segment:)[base_reg(+index_reg*index_stride)(+-offset)]
129   // Returns false if evaluation fails, or if the operand is not a supported
130   // memory operand.
131   static bool CalculateAddress(const DumpContext& context,
132                                const string& expression,
133                                uint64_t& address);
134 
135   // The parsed components of the disassembly for the instruction.
136   string operation_ = "";
137   string dest_ = "";
138   string src_ = "";
139 };
140 }  // namespace google_breakpad
141 
142 #endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_