1*9712c20fSFrederick Mayle // Copyright 2010 Google LLC 2*9712c20fSFrederick Mayle // 3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without 4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are 5*9712c20fSFrederick Mayle // met: 6*9712c20fSFrederick Mayle // 7*9712c20fSFrederick Mayle // * Redistributions of source code must retain the above copyright 8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer. 9*9712c20fSFrederick Mayle // * Redistributions in binary form must reproduce the above 10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer 11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the 12*9712c20fSFrederick Mayle // distribution. 13*9712c20fSFrederick Mayle // * Neither the name of Google LLC nor the names of its 14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from 15*9712c20fSFrederick Mayle // this software without specific prior written permission. 16*9712c20fSFrederick Mayle // 17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*9712c20fSFrederick Mayle 29*9712c20fSFrederick Mayle // disassembler_x86.h: Basic x86 bytecode disassembler 30*9712c20fSFrederick Mayle // 31*9712c20fSFrederick Mayle // Provides a simple disassembler which wraps libdisasm. This allows simple 32*9712c20fSFrederick Mayle // tests to be run against bytecode to test for various properties. 33*9712c20fSFrederick Mayle // 34*9712c20fSFrederick Mayle // Author: Cris Neckar 35*9712c20fSFrederick Mayle 36*9712c20fSFrederick Mayle #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_ 37*9712c20fSFrederick Mayle #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_ 38*9712c20fSFrederick Mayle 39*9712c20fSFrederick Mayle #include <stddef.h> 40*9712c20fSFrederick Mayle #include <sys/types.h> 41*9712c20fSFrederick Mayle 42*9712c20fSFrederick Mayle #include "google_breakpad/common/breakpad_types.h" 43*9712c20fSFrederick Mayle 44*9712c20fSFrederick Mayle namespace libdis { 45*9712c20fSFrederick Mayle #include "third_party/libdisasm/libdis.h" 46*9712c20fSFrederick Mayle } 47*9712c20fSFrederick Mayle 48*9712c20fSFrederick Mayle namespace google_breakpad { 49*9712c20fSFrederick Mayle 50*9712c20fSFrederick Mayle enum { 51*9712c20fSFrederick Mayle DISX86_NONE = 0x0, 52*9712c20fSFrederick Mayle DISX86_BAD_BRANCH_TARGET = 0x1, 53*9712c20fSFrederick Mayle DISX86_BAD_ARGUMENT_PASSED = 0x2, 54*9712c20fSFrederick Mayle DISX86_BAD_WRITE = 0x4, 55*9712c20fSFrederick Mayle DISX86_BAD_BLOCK_WRITE = 0x8, 56*9712c20fSFrederick Mayle DISX86_BAD_READ = 0x10, 57*9712c20fSFrederick Mayle DISX86_BAD_BLOCK_READ = 0x20, 58*9712c20fSFrederick Mayle DISX86_BAD_COMPARISON = 0x40 59*9712c20fSFrederick Mayle }; 60*9712c20fSFrederick Mayle 61*9712c20fSFrederick Mayle class DisassemblerX86 { 62*9712c20fSFrederick Mayle public: 63*9712c20fSFrederick Mayle // TODO(cdn): Modify this class to take a MemoryRegion instead of just 64*9712c20fSFrederick Mayle // a raw buffer. This will make it easier to use this on arbitrary 65*9712c20fSFrederick Mayle // minidumps without first copying out the code segment. 66*9712c20fSFrederick Mayle DisassemblerX86(const uint8_t* bytecode, uint32_t, uint32_t); 67*9712c20fSFrederick Mayle ~DisassemblerX86(); 68*9712c20fSFrederick Mayle 69*9712c20fSFrederick Mayle // This walks to the next instruction in the memory region and 70*9712c20fSFrederick Mayle // sets flags based on the type of instruction and previous state 71*9712c20fSFrederick Mayle // including any registers marked as bad through setBadRead() 72*9712c20fSFrederick Mayle // or setBadWrite(). This method can be called in a loop to 73*9712c20fSFrederick Mayle // disassemble until the end of a region. 74*9712c20fSFrederick Mayle uint32_t NextInstruction(); 75*9712c20fSFrederick Mayle 76*9712c20fSFrederick Mayle // Indicates whether the current disassembled instruction was valid. currentInstructionValid()77*9712c20fSFrederick Mayle bool currentInstructionValid() { return instr_valid_; } 78*9712c20fSFrederick Mayle 79*9712c20fSFrederick Mayle // Returns the current instruction as defined in libdis.h, 80*9712c20fSFrederick Mayle // or NULL if the current instruction is not valid. currentInstruction()81*9712c20fSFrederick Mayle const libdis::x86_insn_t* currentInstruction() { 82*9712c20fSFrederick Mayle return instr_valid_ ? ¤t_instr_ : NULL; 83*9712c20fSFrederick Mayle } 84*9712c20fSFrederick Mayle 85*9712c20fSFrederick Mayle // Returns the type of the current instruction as defined in libdis.h. currentInstructionGroup()86*9712c20fSFrederick Mayle libdis::x86_insn_group currentInstructionGroup() { 87*9712c20fSFrederick Mayle return current_instr_.group; 88*9712c20fSFrederick Mayle } 89*9712c20fSFrederick Mayle 90*9712c20fSFrederick Mayle // Indicates whether a return instruction has been encountered. endOfBlock()91*9712c20fSFrederick Mayle bool endOfBlock() { return end_of_block_; } 92*9712c20fSFrederick Mayle 93*9712c20fSFrederick Mayle // The flags set so far for the disassembly. flags()94*9712c20fSFrederick Mayle uint16_t flags() { return flags_; } 95*9712c20fSFrederick Mayle 96*9712c20fSFrederick Mayle // This sets an indicator that the register used to determine 97*9712c20fSFrederick Mayle // src or dest for the current instruction is tainted. These can 98*9712c20fSFrederick Mayle // be used after examining the current instruction to indicate, 99*9712c20fSFrederick Mayle // for example that a bad read or write occurred and the pointer 100*9712c20fSFrederick Mayle // stored in the register is currently invalid. 101*9712c20fSFrederick Mayle bool setBadRead(); 102*9712c20fSFrederick Mayle bool setBadWrite(); 103*9712c20fSFrederick Mayle 104*9712c20fSFrederick Mayle protected: 105*9712c20fSFrederick Mayle const uint8_t* bytecode_; 106*9712c20fSFrederick Mayle uint32_t size_; 107*9712c20fSFrederick Mayle uint32_t virtual_address_; 108*9712c20fSFrederick Mayle uint32_t current_byte_offset_; 109*9712c20fSFrederick Mayle uint32_t current_inst_offset_; 110*9712c20fSFrederick Mayle 111*9712c20fSFrederick Mayle bool instr_valid_; 112*9712c20fSFrederick Mayle libdis::x86_insn_t current_instr_; 113*9712c20fSFrederick Mayle 114*9712c20fSFrederick Mayle // TODO(cdn): Maybe also track an expression's index register. 115*9712c20fSFrederick Mayle // ex: mov eax, [ebx + ecx]; ebx is base, ecx is index. 116*9712c20fSFrederick Mayle bool register_valid_; 117*9712c20fSFrederick Mayle libdis::x86_reg_t bad_register_; 118*9712c20fSFrederick Mayle 119*9712c20fSFrederick Mayle bool pushed_bad_value_; 120*9712c20fSFrederick Mayle bool end_of_block_; 121*9712c20fSFrederick Mayle 122*9712c20fSFrederick Mayle uint16_t flags_; 123*9712c20fSFrederick Mayle }; 124*9712c20fSFrederick Mayle 125*9712c20fSFrederick Mayle } // namespace google_breakpad 126*9712c20fSFrederick Mayle 127*9712c20fSFrederick Mayle #endif // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_ 128