1*9a0e4156SSadaf Ebrahimi(* Capstone Disassembly Engine 2*9a0e4156SSadaf Ebrahimi * By Nguyen Anh Quynh <[email protected]>, 2013-2014 *) 3*9a0e4156SSadaf Ebrahimi 4*9a0e4156SSadaf Ebrahimiopen Arm 5*9a0e4156SSadaf Ebrahimiopen Arm64 6*9a0e4156SSadaf Ebrahimiopen Mips 7*9a0e4156SSadaf Ebrahimiopen Ppc 8*9a0e4156SSadaf Ebrahimiopen X86 9*9a0e4156SSadaf Ebrahimiopen Sparc 10*9a0e4156SSadaf Ebrahimiopen Systemz 11*9a0e4156SSadaf Ebrahimiopen Xcore 12*9a0e4156SSadaf Ebrahimiopen M680x 13*9a0e4156SSadaf Ebrahimiopen Printf (* debug *) 14*9a0e4156SSadaf Ebrahimi 15*9a0e4156SSadaf Ebrahimi(* Hardware architectures *) 16*9a0e4156SSadaf Ebrahimitype arch = 17*9a0e4156SSadaf Ebrahimi | CS_ARCH_ARM 18*9a0e4156SSadaf Ebrahimi | CS_ARCH_ARM64 19*9a0e4156SSadaf Ebrahimi | CS_ARCH_MIPS 20*9a0e4156SSadaf Ebrahimi | CS_ARCH_X86 21*9a0e4156SSadaf Ebrahimi | CS_ARCH_PPC 22*9a0e4156SSadaf Ebrahimi | CS_ARCH_SPARC 23*9a0e4156SSadaf Ebrahimi | CS_ARCH_SYSZ 24*9a0e4156SSadaf Ebrahimi | CS_ARCH_XCORE 25*9a0e4156SSadaf Ebrahimi | CS_ARCH_M68K 26*9a0e4156SSadaf Ebrahimi | CS_ARCH_TMS320C64X 27*9a0e4156SSadaf Ebrahimi | CS_ARCH_M680X 28*9a0e4156SSadaf Ebrahimi 29*9a0e4156SSadaf Ebrahimi(* Hardware modes *) 30*9a0e4156SSadaf Ebrahimitype mode = 31*9a0e4156SSadaf Ebrahimi | CS_MODE_LITTLE_ENDIAN (* little-endian mode (default mode) *) 32*9a0e4156SSadaf Ebrahimi | CS_MODE_ARM (* ARM mode *) 33*9a0e4156SSadaf Ebrahimi | CS_MODE_16 (* 16-bit mode (for X86) *) 34*9a0e4156SSadaf Ebrahimi | CS_MODE_32 (* 32-bit mode (for X86) *) 35*9a0e4156SSadaf Ebrahimi | CS_MODE_64 (* 64-bit mode (for X86, PPC) *) 36*9a0e4156SSadaf Ebrahimi | CS_MODE_THUMB (* ARM's Thumb mode, including Thumb-2 *) 37*9a0e4156SSadaf Ebrahimi | CS_MODE_MCLASS (* ARM's MClass mode *) 38*9a0e4156SSadaf Ebrahimi | CS_MODE_V8 (* ARMv8 A32 encodings for ARM *) 39*9a0e4156SSadaf Ebrahimi | CS_MODE_MICRO (* MicroMips mode (MIPS architecture) *) 40*9a0e4156SSadaf Ebrahimi | CS_MODE_MIPS3 (* Mips3 mode (MIPS architecture) *) 41*9a0e4156SSadaf Ebrahimi | CS_MODE_MIPS32R6 (* Mips32-R6 mode (MIPS architecture) *) 42*9a0e4156SSadaf Ebrahimi | CS_MODE_MIPS2 (* Mips2 mode (MIPS architecture) *) 43*9a0e4156SSadaf Ebrahimi | CS_MODE_V9 (* SparcV9 mode (Sparc architecture) *) 44*9a0e4156SSadaf Ebrahimi | CS_MODE_BIG_ENDIAN (* big-endian mode *) 45*9a0e4156SSadaf Ebrahimi | CS_MODE_MIPS32 (* Mips32 mode (for Mips) *) 46*9a0e4156SSadaf Ebrahimi | CS_MODE_MIPS64 (* Mips64 mode (for Mips) *) 47*9a0e4156SSadaf Ebrahimi | CS_MODE_QPX (* Quad Processing eXtensions mode (PowerPC) *) 48*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6301 (* M680X Hitachi 6301,6303 mode *) 49*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6309 (* M680X Hitachi 6309 mode *) 50*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6800 (* M680X Motorola 6800,6802 mode *) 51*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6801 (* M680X Motorola 6801,6803 mode *) 52*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6805 (* M680X Motorola 6805 mode *) 53*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6808 (* M680X Motorola 6808 mode *) 54*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6809 (* M680X Motorola 6809 mode *) 55*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_6811 (* M680X Motorola/Freescale 68HC11 mode *) 56*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_CPU12 (* M680X Motorola/Freescale/NXP CPU12 mode *) 57*9a0e4156SSadaf Ebrahimi | CS_MODE_M680X_HCS08 (* M680X Freescale HCS08 mode *) 58*9a0e4156SSadaf Ebrahimi 59*9a0e4156SSadaf Ebrahimi 60*9a0e4156SSadaf Ebrahimi 61*9a0e4156SSadaf Ebrahimi(* Runtime option for the disassembled engine *) 62*9a0e4156SSadaf Ebrahimitype opt_type = 63*9a0e4156SSadaf Ebrahimi | CS_OPT_SYNTAX (* Asssembly output syntax *) 64*9a0e4156SSadaf Ebrahimi | CS_OPT_DETAIL (* Break down instruction structure into details *) 65*9a0e4156SSadaf Ebrahimi | CS_OPT_MODE (* Change engine's mode at run-time *) 66*9a0e4156SSadaf Ebrahimi | CS_OPT_MEM (* User-defined dynamic memory related functions *) 67*9a0e4156SSadaf Ebrahimi | CS_OPT_SKIPDATA (* Skip data when disassembling. Then engine is in SKIPDATA mode. *) 68*9a0e4156SSadaf Ebrahimi | CS_OPT_SKIPDATA_SETUP (* Setup user-defined function for SKIPDATA option *) 69*9a0e4156SSadaf Ebrahimi 70*9a0e4156SSadaf Ebrahimi 71*9a0e4156SSadaf Ebrahimi(* Common instruction operand access types - to be consistent across all architectures. *) 72*9a0e4156SSadaf Ebrahimi(* It is possible to combine access types, for example: CS_AC_READ | CS_AC_WRITE *) 73*9a0e4156SSadaf Ebrahimilet _CS_AC_INVALID = 0;; (* Uninitialized/invalid access type. *) 74*9a0e4156SSadaf Ebrahimilet _CS_AC_READ = 1 lsl 0;; (* Operand read from memory or register. *) 75*9a0e4156SSadaf Ebrahimilet _CS_AC_WRITE = 1 lsl 1;; (* Operand write to memory or register. *) 76*9a0e4156SSadaf Ebrahimi 77*9a0e4156SSadaf Ebrahimi(* Runtime option value (associated with option type above) *) 78*9a0e4156SSadaf Ebrahimilet _CS_OPT_OFF = 0L;; (* Turn OFF an option - default option of CS_OPT_DETAIL, CS_OPT_SKIPDATA. *) 79*9a0e4156SSadaf Ebrahimilet _CS_OPT_ON = 3L;; (* Turn ON an option (CS_OPT_DETAIL, CS_OPT_SKIPDATA). *) 80*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_DEFAULT = 0L;; (* Default asm syntax (CS_OPT_SYNTAX). *) 81*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_INTEL = 1L;; (* X86 Intel asm syntax - default on X86 (CS_OPT_SYNTAX). *) 82*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_ATT = 2L;; (* X86 ATT asm syntax (CS_OPT_SYNTAX). *) 83*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_NOREGNAME = 3L;; (* Prints register name with only number (CS_OPT_SYNTAX) *) 84*9a0e4156SSadaf Ebrahimi 85*9a0e4156SSadaf Ebrahimi(* Common instruction operand types - to be consistent across all architectures. *) 86*9a0e4156SSadaf Ebrahimilet _CS_OP_INVALID = 0;; (* uninitialized/invalid operand. *) 87*9a0e4156SSadaf Ebrahimilet _CS_OP_REG = 1;; (* Register operand. *) 88*9a0e4156SSadaf Ebrahimilet _CS_OP_IMM = 2;; (* Immediate operand. *) 89*9a0e4156SSadaf Ebrahimilet _CS_OP_MEM = 3;; (* Memory operand. *) 90*9a0e4156SSadaf Ebrahimilet _CS_OP_FP = 4;; (* Floating-Point operand. *) 91*9a0e4156SSadaf Ebrahimi 92*9a0e4156SSadaf Ebrahimi(* Common instruction groups - to be consistent across all architectures. *) 93*9a0e4156SSadaf Ebrahimilet _CS_GRP_INVALID = 0;; (* uninitialized/invalid group. *) 94*9a0e4156SSadaf Ebrahimilet _CS_GRP_JUMP = 1;; (* all jump instructions (conditional+direct+indirect jumps) *) 95*9a0e4156SSadaf Ebrahimilet _CS_GRP_CALL = 2;; (* all call instructions *) 96*9a0e4156SSadaf Ebrahimilet _CS_GRP_RET = 3;; (* all return instructions *) 97*9a0e4156SSadaf Ebrahimilet _CS_GRP_INT = 4;; (* all interrupt instructions (int+syscall) *) 98*9a0e4156SSadaf Ebrahimilet _CS_GRP_IRET = 5;; (* all interrupt return instructions *) 99*9a0e4156SSadaf Ebrahimilet _CS_GRP_PRIVILEGE = 6;; (* all privileged instructions *) 100*9a0e4156SSadaf Ebrahimi 101*9a0e4156SSadaf Ebrahimitype cs_arch = 102*9a0e4156SSadaf Ebrahimi | CS_INFO_ARM of cs_arm 103*9a0e4156SSadaf Ebrahimi | CS_INFO_ARM64 of cs_arm64 104*9a0e4156SSadaf Ebrahimi | CS_INFO_MIPS of cs_mips 105*9a0e4156SSadaf Ebrahimi | CS_INFO_X86 of cs_x86 106*9a0e4156SSadaf Ebrahimi | CS_INFO_PPC of cs_ppc 107*9a0e4156SSadaf Ebrahimi | CS_INFO_SPARC of cs_sparc 108*9a0e4156SSadaf Ebrahimi | CS_INFO_SYSZ of cs_sysz 109*9a0e4156SSadaf Ebrahimi | CS_INFO_XCORE of cs_xcore 110*9a0e4156SSadaf Ebrahimi | CS_INFO_M680X of cs_m680x 111*9a0e4156SSadaf Ebrahimi 112*9a0e4156SSadaf Ebrahimi 113*9a0e4156SSadaf Ebrahimitype csh = { 114*9a0e4156SSadaf Ebrahimi h: Int64.t; 115*9a0e4156SSadaf Ebrahimi a: arch; 116*9a0e4156SSadaf Ebrahimi} 117*9a0e4156SSadaf Ebrahimi 118*9a0e4156SSadaf Ebrahimitype cs_insn0 = { 119*9a0e4156SSadaf Ebrahimi id: int; 120*9a0e4156SSadaf Ebrahimi address: int; 121*9a0e4156SSadaf Ebrahimi size: int; 122*9a0e4156SSadaf Ebrahimi bytes: int array; 123*9a0e4156SSadaf Ebrahimi mnemonic: string; 124*9a0e4156SSadaf Ebrahimi op_str: string; 125*9a0e4156SSadaf Ebrahimi regs_read: int array; 126*9a0e4156SSadaf Ebrahimi regs_write: int array; 127*9a0e4156SSadaf Ebrahimi groups: int array; 128*9a0e4156SSadaf Ebrahimi arch: cs_arch; 129*9a0e4156SSadaf Ebrahimi} 130*9a0e4156SSadaf Ebrahimi 131*9a0e4156SSadaf Ebrahimiexternal _cs_open: arch -> mode list -> Int64.t option = "ocaml_open" 132*9a0e4156SSadaf Ebrahimiexternal cs_disasm_quick: arch -> mode list -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm" 133*9a0e4156SSadaf Ebrahimiexternal _cs_disasm_internal: arch -> Int64.t -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm_internal" 134*9a0e4156SSadaf Ebrahimiexternal _cs_reg_name: Int64.t -> int -> string = "ocaml_register_name" 135*9a0e4156SSadaf Ebrahimiexternal _cs_insn_name: Int64.t -> int -> string = "ocaml_instruction_name" 136*9a0e4156SSadaf Ebrahimiexternal _cs_group_name: Int64.t -> int -> string = "ocaml_group_name" 137*9a0e4156SSadaf Ebrahimiexternal cs_version: unit -> int = "ocaml_version" 138*9a0e4156SSadaf Ebrahimiexternal _cs_option: Int64.t -> opt_type -> Int64.t -> int = "ocaml_option" 139*9a0e4156SSadaf Ebrahimiexternal _cs_close: Int64.t -> int = "ocaml_close" 140*9a0e4156SSadaf Ebrahimi 141*9a0e4156SSadaf Ebrahimi 142*9a0e4156SSadaf Ebrahimilet cs_open _arch _mode: csh = ( 143*9a0e4156SSadaf Ebrahimi let _handle = _cs_open _arch _mode in ( 144*9a0e4156SSadaf Ebrahimi match _handle with 145*9a0e4156SSadaf Ebrahimi | None -> { h = 0L; a = _arch } 146*9a0e4156SSadaf Ebrahimi | Some v -> { h = v; a = _arch } 147*9a0e4156SSadaf Ebrahimi ); 148*9a0e4156SSadaf Ebrahimi);; 149*9a0e4156SSadaf Ebrahimi 150*9a0e4156SSadaf Ebrahimilet cs_close handle = ( 151*9a0e4156SSadaf Ebrahimi _cs_close handle.h; 152*9a0e4156SSadaf Ebrahimi) 153*9a0e4156SSadaf Ebrahimi 154*9a0e4156SSadaf Ebrahimilet cs_option handle opt value = ( 155*9a0e4156SSadaf Ebrahimi _cs_option handle.h opt value; 156*9a0e4156SSadaf Ebrahimi);; 157*9a0e4156SSadaf Ebrahimi 158*9a0e4156SSadaf Ebrahimilet cs_disasm handle code address count = ( 159*9a0e4156SSadaf Ebrahimi _cs_disasm_internal handle.a handle.h code address count; 160*9a0e4156SSadaf Ebrahimi);; 161*9a0e4156SSadaf Ebrahimi 162*9a0e4156SSadaf Ebrahimilet cs_reg_name handle id = ( 163*9a0e4156SSadaf Ebrahimi _cs_reg_name handle.h id; 164*9a0e4156SSadaf Ebrahimi);; 165*9a0e4156SSadaf Ebrahimi 166*9a0e4156SSadaf Ebrahimilet cs_insn_name handle id = ( 167*9a0e4156SSadaf Ebrahimi _cs_insn_name handle.h id; 168*9a0e4156SSadaf Ebrahimi);; 169*9a0e4156SSadaf Ebrahimi 170*9a0e4156SSadaf Ebrahimilet cs_group_name handle id = ( 171*9a0e4156SSadaf Ebrahimi _cs_group_name handle.h id; 172*9a0e4156SSadaf Ebrahimi);; 173*9a0e4156SSadaf Ebrahimi 174*9a0e4156SSadaf Ebrahimiclass cs_insn c a = 175*9a0e4156SSadaf Ebrahimi let csh = c in 176*9a0e4156SSadaf Ebrahimi let (id, address, size, bytes, mnemonic, op_str, regs_read, 177*9a0e4156SSadaf Ebrahimi regs_write, groups, arch) = 178*9a0e4156SSadaf Ebrahimi (a.id, a.address, a.size, a.bytes, a.mnemonic, a.op_str, 179*9a0e4156SSadaf Ebrahimi a.regs_read, a.regs_write, a.groups, a.arch) in 180*9a0e4156SSadaf Ebrahimi object 181*9a0e4156SSadaf Ebrahimi method id = id; 182*9a0e4156SSadaf Ebrahimi method address = address; 183*9a0e4156SSadaf Ebrahimi method size = size; 184*9a0e4156SSadaf Ebrahimi method bytes = bytes; 185*9a0e4156SSadaf Ebrahimi method mnemonic = mnemonic; 186*9a0e4156SSadaf Ebrahimi method op_str = op_str; 187*9a0e4156SSadaf Ebrahimi method regs_read = regs_read; 188*9a0e4156SSadaf Ebrahimi method regs_write = regs_write; 189*9a0e4156SSadaf Ebrahimi method groups = groups; 190*9a0e4156SSadaf Ebrahimi method arch = arch; 191*9a0e4156SSadaf Ebrahimi method reg_name id = _cs_reg_name csh.h id; 192*9a0e4156SSadaf Ebrahimi method insn_name id = _cs_insn_name csh.h id; 193*9a0e4156SSadaf Ebrahimi method group_name id = _cs_group_name csh.h id; 194*9a0e4156SSadaf Ebrahimi end;; 195*9a0e4156SSadaf Ebrahimi 196*9a0e4156SSadaf Ebrahimilet cs_insn_group handle insn group_id = 197*9a0e4156SSadaf Ebrahimi List.exists (fun g -> g == group_id) (Array.to_list insn.groups);; 198*9a0e4156SSadaf Ebrahimi 199*9a0e4156SSadaf Ebrahimilet cs_reg_read handle insn reg_id = 200*9a0e4156SSadaf Ebrahimi List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_read);; 201*9a0e4156SSadaf Ebrahimi 202*9a0e4156SSadaf Ebrahimilet cs_reg_write handle insn reg_id = 203*9a0e4156SSadaf Ebrahimi List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_write);; 204*9a0e4156SSadaf Ebrahimi 205*9a0e4156SSadaf Ebrahimi 206*9a0e4156SSadaf Ebrahimiclass cs a m = 207*9a0e4156SSadaf Ebrahimi let mode = m and arch = a in 208*9a0e4156SSadaf Ebrahimi let handle = cs_open arch mode in 209*9a0e4156SSadaf Ebrahimi object 210*9a0e4156SSadaf Ebrahimi method disasm code offset count = 211*9a0e4156SSadaf Ebrahimi let insns = (_cs_disasm_internal arch handle.h code offset count) in 212*9a0e4156SSadaf Ebrahimi List.map (fun x -> new cs_insn handle x) insns; 213*9a0e4156SSadaf Ebrahimi 214*9a0e4156SSadaf Ebrahimi end;; 215