1*9a0e4156SSadaf Ebrahimi# Capstone Disassembler Engine 2*9a0e4156SSadaf Ebrahimi# By Dang Hoang Vu, 2013 3*9a0e4156SSadaf Ebrahimifrom __future__ import print_function 4*9a0e4156SSadaf Ebrahimiimport sys, re 5*9a0e4156SSadaf Ebrahimi 6*9a0e4156SSadaf EbrahimiINCL_DIR = '../include/capstone/' 7*9a0e4156SSadaf Ebrahimi 8*9a0e4156SSadaf Ebrahimiinclude = [ 'arm.h', 'arm64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h' ] 9*9a0e4156SSadaf Ebrahimi 10*9a0e4156SSadaf Ebrahimitemplate = { 11*9a0e4156SSadaf Ebrahimi 'java': { 12*9a0e4156SSadaf Ebrahimi 'header': "// For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT\npackage capstone;\n\npublic class %s_const {\n", 13*9a0e4156SSadaf Ebrahimi 'footer': "}", 14*9a0e4156SSadaf Ebrahimi 'line_format': '\tpublic static final int %s = %s;\n', 15*9a0e4156SSadaf Ebrahimi 'out_file': './java/capstone/%s_const.java', 16*9a0e4156SSadaf Ebrahimi # prefixes for constant filenames of all archs - case sensitive 17*9a0e4156SSadaf Ebrahimi 'arm.h': 'Arm', 18*9a0e4156SSadaf Ebrahimi 'arm64.h': 'Arm64', 19*9a0e4156SSadaf Ebrahimi 'm68k.h': 'M68k', 20*9a0e4156SSadaf Ebrahimi 'mips.h': 'Mips', 21*9a0e4156SSadaf Ebrahimi 'x86.h': 'X86', 22*9a0e4156SSadaf Ebrahimi 'ppc.h': 'Ppc', 23*9a0e4156SSadaf Ebrahimi 'sparc.h': 'Sparc', 24*9a0e4156SSadaf Ebrahimi 'systemz.h': 'Sysz', 25*9a0e4156SSadaf Ebrahimi 'xcore.h': 'Xcore', 26*9a0e4156SSadaf Ebrahimi 'tms320c64x.h': 'TMS320C64x', 27*9a0e4156SSadaf Ebrahimi 'm680x.h': 'M680x', 28*9a0e4156SSadaf Ebrahimi 'evm.h': 'Evm', 29*9a0e4156SSadaf Ebrahimi 'comment_open': '\t//', 30*9a0e4156SSadaf Ebrahimi 'comment_close': '', 31*9a0e4156SSadaf Ebrahimi }, 32*9a0e4156SSadaf Ebrahimi 'python': { 33*9a0e4156SSadaf Ebrahimi 'header': "# For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n", 34*9a0e4156SSadaf Ebrahimi 'footer': "", 35*9a0e4156SSadaf Ebrahimi 'line_format': '%s = %s\n', 36*9a0e4156SSadaf Ebrahimi 'out_file': './python/capstone/%s_const.py', 37*9a0e4156SSadaf Ebrahimi # prefixes for constant filenames of all archs - case sensitive 38*9a0e4156SSadaf Ebrahimi 'arm.h': 'arm', 39*9a0e4156SSadaf Ebrahimi 'arm64.h': 'arm64', 40*9a0e4156SSadaf Ebrahimi 'm68k.h': 'm68k', 41*9a0e4156SSadaf Ebrahimi 'mips.h': 'mips', 42*9a0e4156SSadaf Ebrahimi 'x86.h': 'x86', 43*9a0e4156SSadaf Ebrahimi 'ppc.h': 'ppc', 44*9a0e4156SSadaf Ebrahimi 'sparc.h': 'sparc', 45*9a0e4156SSadaf Ebrahimi 'systemz.h': 'sysz', 46*9a0e4156SSadaf Ebrahimi 'xcore.h': 'xcore', 47*9a0e4156SSadaf Ebrahimi 'tms320c64x.h': 'tms320c64x', 48*9a0e4156SSadaf Ebrahimi 'm680x.h': 'm680x', 49*9a0e4156SSadaf Ebrahimi 'evm.h': 'evm', 50*9a0e4156SSadaf Ebrahimi 'mos65xx.h': 'mos65xx', 51*9a0e4156SSadaf Ebrahimi 'comment_open': '#', 52*9a0e4156SSadaf Ebrahimi 'comment_close': '', 53*9a0e4156SSadaf Ebrahimi }, 54*9a0e4156SSadaf Ebrahimi 'ocaml': { 55*9a0e4156SSadaf Ebrahimi 'header': "(* For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.ml] *)\n", 56*9a0e4156SSadaf Ebrahimi 'footer': "", 57*9a0e4156SSadaf Ebrahimi 'line_format': 'let _%s = %s;;\n', 58*9a0e4156SSadaf Ebrahimi 'out_file': './ocaml/%s_const.ml', 59*9a0e4156SSadaf Ebrahimi # prefixes for constant filenames of all archs - case sensitive 60*9a0e4156SSadaf Ebrahimi 'arm.h': 'arm', 61*9a0e4156SSadaf Ebrahimi 'arm64.h': 'arm64', 62*9a0e4156SSadaf Ebrahimi 'mips.h': 'mips', 63*9a0e4156SSadaf Ebrahimi 'm68k.h': 'm68k', 64*9a0e4156SSadaf Ebrahimi 'x86.h': 'x86', 65*9a0e4156SSadaf Ebrahimi 'ppc.h': 'ppc', 66*9a0e4156SSadaf Ebrahimi 'sparc.h': 'sparc', 67*9a0e4156SSadaf Ebrahimi 'systemz.h': 'sysz', 68*9a0e4156SSadaf Ebrahimi 'xcore.h': 'xcore', 69*9a0e4156SSadaf Ebrahimi 'tms320c64x.h': 'tms320c64x', 70*9a0e4156SSadaf Ebrahimi 'm680x.h': 'm680x', 71*9a0e4156SSadaf Ebrahimi 'evm.h': 'evm', 72*9a0e4156SSadaf Ebrahimi 'comment_open': '(*', 73*9a0e4156SSadaf Ebrahimi 'comment_close': ' *)', 74*9a0e4156SSadaf Ebrahimi }, 75*9a0e4156SSadaf Ebrahimi} 76*9a0e4156SSadaf Ebrahimi 77*9a0e4156SSadaf Ebrahimi# markup for comments to be added to autogen files 78*9a0e4156SSadaf EbrahimiMARKUP = '//>' 79*9a0e4156SSadaf Ebrahimi 80*9a0e4156SSadaf Ebrahimidef gen(lang): 81*9a0e4156SSadaf Ebrahimi global include, INCL_DIR 82*9a0e4156SSadaf Ebrahimi print('Generating bindings for', lang) 83*9a0e4156SSadaf Ebrahimi templ = template[lang] 84*9a0e4156SSadaf Ebrahimi print('Generating bindings for', lang) 85*9a0e4156SSadaf Ebrahimi for target in include: 86*9a0e4156SSadaf Ebrahimi if target not in templ: 87*9a0e4156SSadaf Ebrahimi print("Warning: No binding found for %s" % target) 88*9a0e4156SSadaf Ebrahimi continue 89*9a0e4156SSadaf Ebrahimi prefix = templ[target] 90*9a0e4156SSadaf Ebrahimi outfile = open(templ['out_file'] %(prefix), 'wb') # open as binary prevents windows newlines 91*9a0e4156SSadaf Ebrahimi outfile.write((templ['header'] % (prefix)).encode("utf-8")) 92*9a0e4156SSadaf Ebrahimi 93*9a0e4156SSadaf Ebrahimi lines = open(INCL_DIR + target).readlines() 94*9a0e4156SSadaf Ebrahimi 95*9a0e4156SSadaf Ebrahimi count = 0 96*9a0e4156SSadaf Ebrahimi for line in lines: 97*9a0e4156SSadaf Ebrahimi line = line.strip() 98*9a0e4156SSadaf Ebrahimi 99*9a0e4156SSadaf Ebrahimi if line.startswith(MARKUP): # markup for comments 100*9a0e4156SSadaf Ebrahimi outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \ 101*9a0e4156SSadaf Ebrahimi line.replace(MARKUP, ''), \ 102*9a0e4156SSadaf Ebrahimi templ['comment_close']) ).encode("utf-8")) 103*9a0e4156SSadaf Ebrahimi continue 104*9a0e4156SSadaf Ebrahimi 105*9a0e4156SSadaf Ebrahimi if line == '' or line.startswith('//'): 106*9a0e4156SSadaf Ebrahimi continue 107*9a0e4156SSadaf Ebrahimi 108*9a0e4156SSadaf Ebrahimi if line.startswith('#define '): 109*9a0e4156SSadaf Ebrahimi line = line[8:] #cut off define 110*9a0e4156SSadaf Ebrahimi xline = re.split('\s+', line, 1) #split to at most 2 express 111*9a0e4156SSadaf Ebrahimi if len(xline) != 2: 112*9a0e4156SSadaf Ebrahimi continue 113*9a0e4156SSadaf Ebrahimi if '(' in xline[0] or ')' in xline[0]: #does it look like a function 114*9a0e4156SSadaf Ebrahimi continue 115*9a0e4156SSadaf Ebrahimi xline.insert(1, '=') # insert an = so the expression below can parse it 116*9a0e4156SSadaf Ebrahimi line = ' '.join(xline) 117*9a0e4156SSadaf Ebrahimi 118*9a0e4156SSadaf Ebrahimi if not line.startswith(prefix.upper()): 119*9a0e4156SSadaf Ebrahimi continue 120*9a0e4156SSadaf Ebrahimi 121*9a0e4156SSadaf Ebrahimi tmp = line.strip().split(',') 122*9a0e4156SSadaf Ebrahimi for t in tmp: 123*9a0e4156SSadaf Ebrahimi t = t.strip() 124*9a0e4156SSadaf Ebrahimi if not t or t.startswith('//'): continue 125*9a0e4156SSadaf Ebrahimi # hacky: remove type cast (uint64_t) 126*9a0e4156SSadaf Ebrahimi t = t.replace('(uint64_t)', '') 127*9a0e4156SSadaf Ebrahimi t = re.sub(r'\((\d+)ULL << (\d+)\)', r'\1 << \2', t) # (1ULL<<1) to 1 << 1 128*9a0e4156SSadaf Ebrahimi f = re.split('\s+', t) 129*9a0e4156SSadaf Ebrahimi 130*9a0e4156SSadaf Ebrahimi if f[0].startswith(prefix.upper()): 131*9a0e4156SSadaf Ebrahimi if len(f) > 1 and f[1] not in ('//', '///<', '='): 132*9a0e4156SSadaf Ebrahimi print("Error: Unable to convert %s" % f) 133*9a0e4156SSadaf Ebrahimi continue 134*9a0e4156SSadaf Ebrahimi elif len(f) > 1 and f[1] == '=': 135*9a0e4156SSadaf Ebrahimi rhs = ''.join(f[2:]) 136*9a0e4156SSadaf Ebrahimi else: 137*9a0e4156SSadaf Ebrahimi rhs = str(count) 138*9a0e4156SSadaf Ebrahimi count += 1 139*9a0e4156SSadaf Ebrahimi 140*9a0e4156SSadaf Ebrahimi try: 141*9a0e4156SSadaf Ebrahimi count = int(rhs) + 1 142*9a0e4156SSadaf Ebrahimi if (count == 1): 143*9a0e4156SSadaf Ebrahimi outfile.write(("\n").encode("utf-8")) 144*9a0e4156SSadaf Ebrahimi except ValueError: 145*9a0e4156SSadaf Ebrahimi if lang == 'ocaml': 146*9a0e4156SSadaf Ebrahimi # ocaml uses lsl for '<<', lor for '|' 147*9a0e4156SSadaf Ebrahimi rhs = rhs.replace('<<', ' lsl ') 148*9a0e4156SSadaf Ebrahimi rhs = rhs.replace('|', ' lor ') 149*9a0e4156SSadaf Ebrahimi # ocaml variable has _ as prefix 150*9a0e4156SSadaf Ebrahimi if rhs[0].isalpha(): 151*9a0e4156SSadaf Ebrahimi rhs = '_' + rhs 152*9a0e4156SSadaf Ebrahimi 153*9a0e4156SSadaf Ebrahimi outfile.write((templ['line_format'] %(f[0].strip(), rhs)).encode("utf-8")) 154*9a0e4156SSadaf Ebrahimi 155*9a0e4156SSadaf Ebrahimi outfile.write((templ['footer']).encode("utf-8")) 156*9a0e4156SSadaf Ebrahimi outfile.close() 157*9a0e4156SSadaf Ebrahimi 158*9a0e4156SSadaf Ebrahimidef main(): 159*9a0e4156SSadaf Ebrahimi try: 160*9a0e4156SSadaf Ebrahimi if sys.argv[1] == 'all': 161*9a0e4156SSadaf Ebrahimi for key in template.keys(): 162*9a0e4156SSadaf Ebrahimi gen(key) 163*9a0e4156SSadaf Ebrahimi else: 164*9a0e4156SSadaf Ebrahimi gen(sys.argv[1]) 165*9a0e4156SSadaf Ebrahimi except: 166*9a0e4156SSadaf Ebrahimi raise RuntimeError("Unsupported binding %s" % sys.argv[1]) 167*9a0e4156SSadaf Ebrahimi 168*9a0e4156SSadaf Ebrahimiif __name__ == "__main__": 169*9a0e4156SSadaf Ebrahimi if len(sys.argv) < 2: 170*9a0e4156SSadaf Ebrahimi print("Usage:", sys.argv[0], " <bindings: java|python|ocaml|all>") 171*9a0e4156SSadaf Ebrahimi sys.exit(1) 172*9a0e4156SSadaf Ebrahimi main() 173