xref: /aosp_15_r20/external/capstone/bindings/ocaml/capstone.ml (revision 9a0e4156d50a75a99ec4f1653a0e9602a5d45c18)
1*9a0e4156SSadaf Ebrahimi(* Capstone Disassembly Engine
2*9a0e4156SSadaf Ebrahimi * By Nguyen Anh Quynh <[email protected]>, 2013-2014 *)
3*9a0e4156SSadaf Ebrahimi
4*9a0e4156SSadaf Ebrahimiopen Arm
5*9a0e4156SSadaf Ebrahimiopen Arm64
6*9a0e4156SSadaf Ebrahimiopen Mips
7*9a0e4156SSadaf Ebrahimiopen Ppc
8*9a0e4156SSadaf Ebrahimiopen X86
9*9a0e4156SSadaf Ebrahimiopen Sparc
10*9a0e4156SSadaf Ebrahimiopen Systemz
11*9a0e4156SSadaf Ebrahimiopen Xcore
12*9a0e4156SSadaf Ebrahimiopen M680x
13*9a0e4156SSadaf Ebrahimiopen Printf	(* debug *)
14*9a0e4156SSadaf Ebrahimi
15*9a0e4156SSadaf Ebrahimi(* Hardware architectures *)
16*9a0e4156SSadaf Ebrahimitype arch =
17*9a0e4156SSadaf Ebrahimi  | CS_ARCH_ARM
18*9a0e4156SSadaf Ebrahimi  | CS_ARCH_ARM64
19*9a0e4156SSadaf Ebrahimi  | CS_ARCH_MIPS
20*9a0e4156SSadaf Ebrahimi  | CS_ARCH_X86
21*9a0e4156SSadaf Ebrahimi  | CS_ARCH_PPC
22*9a0e4156SSadaf Ebrahimi  | CS_ARCH_SPARC
23*9a0e4156SSadaf Ebrahimi  | CS_ARCH_SYSZ
24*9a0e4156SSadaf Ebrahimi  | CS_ARCH_XCORE
25*9a0e4156SSadaf Ebrahimi  | CS_ARCH_M68K
26*9a0e4156SSadaf Ebrahimi  | CS_ARCH_TMS320C64X
27*9a0e4156SSadaf Ebrahimi  | CS_ARCH_M680X
28*9a0e4156SSadaf Ebrahimi
29*9a0e4156SSadaf Ebrahimi(* Hardware modes *)
30*9a0e4156SSadaf Ebrahimitype mode =
31*9a0e4156SSadaf Ebrahimi  |	CS_MODE_LITTLE_ENDIAN	(* little-endian mode (default mode) *)
32*9a0e4156SSadaf Ebrahimi  |	CS_MODE_ARM			(* ARM mode *)
33*9a0e4156SSadaf Ebrahimi  |	CS_MODE_16			(* 16-bit mode (for X86) *)
34*9a0e4156SSadaf Ebrahimi  |	CS_MODE_32			(* 32-bit mode (for X86) *)
35*9a0e4156SSadaf Ebrahimi  |	CS_MODE_64			(* 64-bit mode (for X86, PPC) *)
36*9a0e4156SSadaf Ebrahimi  |	CS_MODE_THUMB		(* ARM's Thumb mode, including Thumb-2 *)
37*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MCLASS		(* ARM's MClass mode *)
38*9a0e4156SSadaf Ebrahimi  |	CS_MODE_V8    		(* ARMv8 A32 encodings for ARM *)
39*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MICRO		(* MicroMips mode (MIPS architecture) *)
40*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MIPS3		(* Mips3 mode (MIPS architecture) *)
41*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MIPS32R6	(* Mips32-R6 mode (MIPS architecture) *)
42*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MIPS2	    (* Mips2 mode (MIPS architecture) *)
43*9a0e4156SSadaf Ebrahimi  |	CS_MODE_V9			(* SparcV9 mode (Sparc architecture) *)
44*9a0e4156SSadaf Ebrahimi  |	CS_MODE_BIG_ENDIAN	(* big-endian mode *)
45*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MIPS32		(* Mips32 mode (for Mips) *)
46*9a0e4156SSadaf Ebrahimi  |	CS_MODE_MIPS64		(* Mips64 mode (for Mips) *)
47*9a0e4156SSadaf Ebrahimi  |	CS_MODE_QPX         (* Quad Processing eXtensions mode (PowerPC) *)
48*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6301	(* M680X Hitachi 6301,6303 mode *)
49*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6309	(* M680X Hitachi 6309 mode *)
50*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6800	(* M680X Motorola 6800,6802 mode *)
51*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6801	(* M680X Motorola 6801,6803 mode *)
52*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6805	(* M680X Motorola 6805 mode *)
53*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6808	(* M680X Motorola 6808 mode *)
54*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6809	(* M680X Motorola 6809 mode *)
55*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_6811	(* M680X Motorola/Freescale 68HC11 mode *)
56*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_CPU12	(* M680X Motorola/Freescale/NXP CPU12 mode *)
57*9a0e4156SSadaf Ebrahimi  |	CS_MODE_M680X_HCS08	(* M680X Freescale HCS08 mode *)
58*9a0e4156SSadaf Ebrahimi
59*9a0e4156SSadaf Ebrahimi
60*9a0e4156SSadaf Ebrahimi
61*9a0e4156SSadaf Ebrahimi(* Runtime option for the disassembled engine *)
62*9a0e4156SSadaf Ebrahimitype opt_type =
63*9a0e4156SSadaf Ebrahimi  |	CS_OPT_SYNTAX		(*  Asssembly output syntax *)
64*9a0e4156SSadaf Ebrahimi  |	CS_OPT_DETAIL		(* Break down instruction structure into details *)
65*9a0e4156SSadaf Ebrahimi  |	CS_OPT_MODE		(* Change engine's mode at run-time *)
66*9a0e4156SSadaf Ebrahimi  |	CS_OPT_MEM		(* User-defined dynamic memory related functions *)
67*9a0e4156SSadaf Ebrahimi  |	CS_OPT_SKIPDATA		(* Skip data when disassembling. Then engine is in SKIPDATA mode. *)
68*9a0e4156SSadaf Ebrahimi  |	CS_OPT_SKIPDATA_SETUP 	(* Setup user-defined function for SKIPDATA option *)
69*9a0e4156SSadaf Ebrahimi
70*9a0e4156SSadaf Ebrahimi
71*9a0e4156SSadaf Ebrahimi(* Common instruction operand access types - to be consistent across all architectures. *)
72*9a0e4156SSadaf Ebrahimi(* It is possible to combine access types, for example: CS_AC_READ | CS_AC_WRITE *)
73*9a0e4156SSadaf Ebrahimilet _CS_AC_INVALID = 0;;	(* Uninitialized/invalid access type. *)
74*9a0e4156SSadaf Ebrahimilet _CS_AC_READ    = 1 lsl 0;; (* Operand read from memory or register. *)
75*9a0e4156SSadaf Ebrahimilet _CS_AC_WRITE   = 1 lsl 1;; (* Operand write to memory or register. *)
76*9a0e4156SSadaf Ebrahimi
77*9a0e4156SSadaf Ebrahimi(* Runtime option value (associated with option type above) *)
78*9a0e4156SSadaf Ebrahimilet _CS_OPT_OFF = 0L;; (* Turn OFF an option - default option of CS_OPT_DETAIL, CS_OPT_SKIPDATA. *)
79*9a0e4156SSadaf Ebrahimilet _CS_OPT_ON = 3L;;  (* Turn ON an option (CS_OPT_DETAIL, CS_OPT_SKIPDATA). *)
80*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_DEFAULT = 0L;; (* Default asm syntax (CS_OPT_SYNTAX). *)
81*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_INTEL = 1L;; (* X86 Intel asm syntax - default on X86 (CS_OPT_SYNTAX). *)
82*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_ATT = 2L;; (* X86 ATT asm syntax (CS_OPT_SYNTAX). *)
83*9a0e4156SSadaf Ebrahimilet _CS_OPT_SYNTAX_NOREGNAME = 3L;; (* Prints register name with only number (CS_OPT_SYNTAX) *)
84*9a0e4156SSadaf Ebrahimi
85*9a0e4156SSadaf Ebrahimi(* Common instruction operand types - to be consistent across all architectures. *)
86*9a0e4156SSadaf Ebrahimilet _CS_OP_INVALID = 0;;  (* uninitialized/invalid operand. *)
87*9a0e4156SSadaf Ebrahimilet _CS_OP_REG     = 1;;  (* Register operand. *)
88*9a0e4156SSadaf Ebrahimilet _CS_OP_IMM     = 2;;  (* Immediate operand. *)
89*9a0e4156SSadaf Ebrahimilet _CS_OP_MEM     = 3;;  (* Memory operand. *)
90*9a0e4156SSadaf Ebrahimilet _CS_OP_FP      = 4;;  (* Floating-Point operand. *)
91*9a0e4156SSadaf Ebrahimi
92*9a0e4156SSadaf Ebrahimi(* Common instruction groups - to be consistent across all architectures. *)
93*9a0e4156SSadaf Ebrahimilet _CS_GRP_INVALID = 0;;  (* uninitialized/invalid group. *)
94*9a0e4156SSadaf Ebrahimilet _CS_GRP_JUMP    = 1;;  (* all jump instructions (conditional+direct+indirect jumps) *)
95*9a0e4156SSadaf Ebrahimilet _CS_GRP_CALL    = 2;;  (* all call instructions *)
96*9a0e4156SSadaf Ebrahimilet _CS_GRP_RET     = 3;;  (* all return instructions *)
97*9a0e4156SSadaf Ebrahimilet _CS_GRP_INT     = 4;;  (* all interrupt instructions (int+syscall) *)
98*9a0e4156SSadaf Ebrahimilet _CS_GRP_IRET    = 5;;  (* all interrupt return instructions *)
99*9a0e4156SSadaf Ebrahimilet _CS_GRP_PRIVILEGE = 6;;  (* all privileged instructions *)
100*9a0e4156SSadaf Ebrahimi
101*9a0e4156SSadaf Ebrahimitype cs_arch =
102*9a0e4156SSadaf Ebrahimi	| CS_INFO_ARM of cs_arm
103*9a0e4156SSadaf Ebrahimi	| CS_INFO_ARM64 of cs_arm64
104*9a0e4156SSadaf Ebrahimi	| CS_INFO_MIPS of cs_mips
105*9a0e4156SSadaf Ebrahimi	| CS_INFO_X86 of cs_x86
106*9a0e4156SSadaf Ebrahimi	| CS_INFO_PPC of cs_ppc
107*9a0e4156SSadaf Ebrahimi	| CS_INFO_SPARC of cs_sparc
108*9a0e4156SSadaf Ebrahimi	| CS_INFO_SYSZ of cs_sysz
109*9a0e4156SSadaf Ebrahimi	| CS_INFO_XCORE of cs_xcore
110*9a0e4156SSadaf Ebrahimi	| CS_INFO_M680X of cs_m680x
111*9a0e4156SSadaf Ebrahimi
112*9a0e4156SSadaf Ebrahimi
113*9a0e4156SSadaf Ebrahimitype csh = {
114*9a0e4156SSadaf Ebrahimi	h: Int64.t;
115*9a0e4156SSadaf Ebrahimi	a: arch;
116*9a0e4156SSadaf Ebrahimi}
117*9a0e4156SSadaf Ebrahimi
118*9a0e4156SSadaf Ebrahimitype cs_insn0 = {
119*9a0e4156SSadaf Ebrahimi	id: int;
120*9a0e4156SSadaf Ebrahimi	address: int;
121*9a0e4156SSadaf Ebrahimi	size: int;
122*9a0e4156SSadaf Ebrahimi	bytes: int array;
123*9a0e4156SSadaf Ebrahimi	mnemonic: string;
124*9a0e4156SSadaf Ebrahimi	op_str: string;
125*9a0e4156SSadaf Ebrahimi	regs_read: int array;
126*9a0e4156SSadaf Ebrahimi	regs_write: int array;
127*9a0e4156SSadaf Ebrahimi	groups: int array;
128*9a0e4156SSadaf Ebrahimi	arch: cs_arch;
129*9a0e4156SSadaf Ebrahimi}
130*9a0e4156SSadaf Ebrahimi
131*9a0e4156SSadaf Ebrahimiexternal _cs_open: arch -> mode list -> Int64.t option = "ocaml_open"
132*9a0e4156SSadaf Ebrahimiexternal cs_disasm_quick: arch -> mode list -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm"
133*9a0e4156SSadaf Ebrahimiexternal _cs_disasm_internal: arch -> Int64.t -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm_internal"
134*9a0e4156SSadaf Ebrahimiexternal _cs_reg_name: Int64.t -> int -> string = "ocaml_register_name"
135*9a0e4156SSadaf Ebrahimiexternal _cs_insn_name: Int64.t -> int -> string = "ocaml_instruction_name"
136*9a0e4156SSadaf Ebrahimiexternal _cs_group_name: Int64.t -> int -> string = "ocaml_group_name"
137*9a0e4156SSadaf Ebrahimiexternal cs_version: unit -> int = "ocaml_version"
138*9a0e4156SSadaf Ebrahimiexternal _cs_option: Int64.t -> opt_type -> Int64.t -> int = "ocaml_option"
139*9a0e4156SSadaf Ebrahimiexternal _cs_close: Int64.t -> int = "ocaml_close"
140*9a0e4156SSadaf Ebrahimi
141*9a0e4156SSadaf Ebrahimi
142*9a0e4156SSadaf Ebrahimilet cs_open _arch _mode: csh = (
143*9a0e4156SSadaf Ebrahimi	let _handle = _cs_open _arch _mode in (
144*9a0e4156SSadaf Ebrahimi	match _handle with
145*9a0e4156SSadaf Ebrahimi	| None -> { h = 0L; a = _arch }
146*9a0e4156SSadaf Ebrahimi	| Some v -> { h = v; a = _arch }
147*9a0e4156SSadaf Ebrahimi	);
148*9a0e4156SSadaf Ebrahimi);;
149*9a0e4156SSadaf Ebrahimi
150*9a0e4156SSadaf Ebrahimilet cs_close handle = (
151*9a0e4156SSadaf Ebrahimi	_cs_close handle.h;
152*9a0e4156SSadaf Ebrahimi)
153*9a0e4156SSadaf Ebrahimi
154*9a0e4156SSadaf Ebrahimilet cs_option handle opt value = (
155*9a0e4156SSadaf Ebrahimi	_cs_option handle.h opt value;
156*9a0e4156SSadaf Ebrahimi);;
157*9a0e4156SSadaf Ebrahimi
158*9a0e4156SSadaf Ebrahimilet cs_disasm handle code address count = (
159*9a0e4156SSadaf Ebrahimi	_cs_disasm_internal handle.a handle.h code address count;
160*9a0e4156SSadaf Ebrahimi);;
161*9a0e4156SSadaf Ebrahimi
162*9a0e4156SSadaf Ebrahimilet cs_reg_name handle id = (
163*9a0e4156SSadaf Ebrahimi	_cs_reg_name handle.h id;
164*9a0e4156SSadaf Ebrahimi);;
165*9a0e4156SSadaf Ebrahimi
166*9a0e4156SSadaf Ebrahimilet cs_insn_name handle id = (
167*9a0e4156SSadaf Ebrahimi	_cs_insn_name handle.h id;
168*9a0e4156SSadaf Ebrahimi);;
169*9a0e4156SSadaf Ebrahimi
170*9a0e4156SSadaf Ebrahimilet cs_group_name handle id = (
171*9a0e4156SSadaf Ebrahimi	_cs_group_name handle.h id;
172*9a0e4156SSadaf Ebrahimi);;
173*9a0e4156SSadaf Ebrahimi
174*9a0e4156SSadaf Ebrahimiclass cs_insn c a =
175*9a0e4156SSadaf Ebrahimi	let csh = c in
176*9a0e4156SSadaf Ebrahimi	let (id, address, size, bytes, mnemonic, op_str, regs_read,
177*9a0e4156SSadaf Ebrahimi        regs_write, groups, arch) =
178*9a0e4156SSadaf Ebrahimi        (a.id, a.address, a.size, a.bytes, a.mnemonic, a.op_str,
179*9a0e4156SSadaf Ebrahimi        a.regs_read, a.regs_write, a.groups, a.arch) in
180*9a0e4156SSadaf Ebrahimi	object
181*9a0e4156SSadaf Ebrahimi		method id = id;
182*9a0e4156SSadaf Ebrahimi		method address = address;
183*9a0e4156SSadaf Ebrahimi		method size = size;
184*9a0e4156SSadaf Ebrahimi	        method bytes = bytes;
185*9a0e4156SSadaf Ebrahimi		method mnemonic = mnemonic;
186*9a0e4156SSadaf Ebrahimi		method op_str = op_str;
187*9a0e4156SSadaf Ebrahimi		method regs_read = regs_read;
188*9a0e4156SSadaf Ebrahimi		method regs_write = regs_write;
189*9a0e4156SSadaf Ebrahimi		method groups = groups;
190*9a0e4156SSadaf Ebrahimi		method arch = arch;
191*9a0e4156SSadaf Ebrahimi		method reg_name id = _cs_reg_name csh.h id;
192*9a0e4156SSadaf Ebrahimi		method insn_name id = _cs_insn_name csh.h id;
193*9a0e4156SSadaf Ebrahimi		method group_name id = _cs_group_name csh.h id;
194*9a0e4156SSadaf Ebrahimi	end;;
195*9a0e4156SSadaf Ebrahimi
196*9a0e4156SSadaf Ebrahimilet cs_insn_group handle insn group_id =
197*9a0e4156SSadaf Ebrahimi	List.exists (fun g -> g == group_id) (Array.to_list insn.groups);;
198*9a0e4156SSadaf Ebrahimi
199*9a0e4156SSadaf Ebrahimilet cs_reg_read handle insn reg_id =
200*9a0e4156SSadaf Ebrahimi	List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_read);;
201*9a0e4156SSadaf Ebrahimi
202*9a0e4156SSadaf Ebrahimilet cs_reg_write handle insn reg_id =
203*9a0e4156SSadaf Ebrahimi	List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_write);;
204*9a0e4156SSadaf Ebrahimi
205*9a0e4156SSadaf Ebrahimi
206*9a0e4156SSadaf Ebrahimiclass cs a m =
207*9a0e4156SSadaf Ebrahimi	let mode = m and arch = a in
208*9a0e4156SSadaf Ebrahimi	let handle = cs_open arch mode in
209*9a0e4156SSadaf Ebrahimi	object
210*9a0e4156SSadaf Ebrahimi		method disasm code offset count =
211*9a0e4156SSadaf Ebrahimi			let insns = (_cs_disasm_internal arch handle.h code offset count) in
212*9a0e4156SSadaf Ebrahimi			List.map (fun x -> new cs_insn handle x) insns;
213*9a0e4156SSadaf Ebrahimi
214*9a0e4156SSadaf Ebrahimi	end;;
215