1 // Copyright (c) 2022, Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // disassembler_objdump.h: Disassembler that invokes objdump for disassembly. 30 // 31 // Author: Mark Brand 32 33 #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_ 34 #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_ 35 36 #include <string> 37 38 #include "common/using_std_string.h" 39 #include "google_breakpad/common/breakpad_types.h" 40 #include "google_breakpad/processor/dump_context.h" 41 #include "google_breakpad/processor/memory_region.h" 42 43 namespace google_breakpad { 44 45 // Uses objdump to disassemble a single instruction. 46 // 47 // Currently supports disassembly for x86 and x86_64 on linux hosts only; on 48 // unsupported platform or for unsupported architectures disassembly will fail. 49 // 50 // If disassembly is successful, then this allows extracting the instruction 51 // opcode, source and destination operands, and computing the source and 52 // destination addresses for instructions that operate on memory. 53 // 54 // Example: 55 // DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region, 56 // instruction_ptr); 57 // if (disassembler.IsValid()) { 58 // uint64_t src_address = 0; 59 // std::cerr << disassembler.operation() << " " << disassembler.src() 60 // << ", " << disassembler.dest() << std::endl; 61 // if (disassembler.CalculateSrcAddress(*context, src_address)) { 62 // std::cerr << "[src_address = " << std::hex << src_address << "]\n"; 63 // } 64 // } 65 class DisassemblerObjdump { 66 public: 67 // Construct an ObjdumpDisassembler for the provided `cpu` type, where this is 68 // one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is 69 // within `memory_region`, and the memory referenced is a valid instruction, 70 // this will then be initialized with the disassembly for that instruction. 71 DisassemblerObjdump(uint32_t cpu, 72 const MemoryRegion* memory_region, 73 uint64_t address); 74 ~DisassemblerObjdump() = default; 75 76 // If the source operand of the instruction is a memory operand, compute the 77 // address referred to by the operand, and store this in `address`. On success 78 // returns true, otherwise (if computation fails, or if the source operand is 79 // not a memory operand) returns false and sets `address` to 0. 80 bool CalculateSrcAddress(const DumpContext& context, uint64_t& address); 81 82 // If the destination operand of the instruction is a memory operand, compute 83 // the address referred to by the operand, and store this in `address`. On 84 // success returns true, otherwise (if computation fails, or if the source 85 // operand is not a memory operand) returns false and sets `address` to 0. 86 bool CalculateDestAddress(const DumpContext& context, uint64_t& address); 87 88 // If the instruction was disassembled successfully, this will be true. IsValid()89 bool IsValid() const { return operation_.size() != 0; } 90 91 // Returns the operation part of the disassembly, without any prefixes: 92 // "pop" eax 93 // lock "xchg" eax, edx operation()94 const string& operation() const { return operation_; } 95 96 // Returns the destination operand of the disassembly, without memory operand 97 // size prefixes: 98 // mov DWORD PTR "[rax + 16]", edx dest()99 const string& dest() const { return dest_; } 100 101 // Returns the source operand of the disassembly, without memory operand 102 // size prefixes: 103 // mov rax, QWORD PTR "[rdx]" src()104 const string& src() const { return src_; } 105 106 private: 107 friend class DisassemblerObjdumpForTest; 108 109 // Writes out the provided `raw_bytes` to a temporary file, and executes objdump 110 // to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or 111 // MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction 112 // string from the first instruction in the output and stores it in 113 // `instruction`. 114 static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes, 115 unsigned int raw_bytes_len, 116 string& instruction); 117 118 // Splits an `instruction` into three parts, the "main" `operation` and 119 // the `dest` and `src` operands. 120 // Example: 121 // instruction = "lock cmpxchg QWORD PTR [rdi], rsi" 122 // operation = "cmpxchg", dest = "[rdi]", src = "rsi" 123 static bool TokenizeInstruction(const string& instruction, string& operation, 124 string& dest, string& src); 125 126 // Compute the address referenced by `expression` in `context`. 127 // Supports memory operands in the form 128 // (segment:)[base_reg(+index_reg*index_stride)(+-offset)] 129 // Returns false if evaluation fails, or if the operand is not a supported 130 // memory operand. 131 static bool CalculateAddress(const DumpContext& context, 132 const string& expression, 133 uint64_t& address); 134 135 // The parsed components of the disassembly for the instruction. 136 string operation_ = ""; 137 string dest_ = ""; 138 string src_ = ""; 139 }; 140 } // namespace google_breakpad 141 142 #endif // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_