1*9a0e4156SSadaf Ebrahimi#!/usr/bin/python 2*9a0e4156SSadaf Ebrahimi# Test tool to disassemble MC files. By Nguyen Anh Quynh, 2017 3*9a0e4156SSadaf Ebrahimiimport array, os.path, sys 4*9a0e4156SSadaf Ebrahimifrom capstone import * 5*9a0e4156SSadaf Ebrahimi 6*9a0e4156SSadaf Ebrahimi 7*9a0e4156SSadaf Ebrahimi# convert all hex numbers to decimal numbers in a text 8*9a0e4156SSadaf Ebrahimidef normalize_hex(a): 9*9a0e4156SSadaf Ebrahimi while(True): 10*9a0e4156SSadaf Ebrahimi i = a.find('0x') 11*9a0e4156SSadaf Ebrahimi if i == -1: # no more hex number 12*9a0e4156SSadaf Ebrahimi break 13*9a0e4156SSadaf Ebrahimi hexnum = '0x' 14*9a0e4156SSadaf Ebrahimi for c in a[i + 2:]: 15*9a0e4156SSadaf Ebrahimi if c in '0123456789abcdefABCDEF': 16*9a0e4156SSadaf Ebrahimi hexnum += c 17*9a0e4156SSadaf Ebrahimi else: 18*9a0e4156SSadaf Ebrahimi break 19*9a0e4156SSadaf Ebrahimi num = int(hexnum, 16) 20*9a0e4156SSadaf Ebrahimi a = a.replace(hexnum, str(num)) 21*9a0e4156SSadaf Ebrahimi return a 22*9a0e4156SSadaf Ebrahimi 23*9a0e4156SSadaf Ebrahimi 24*9a0e4156SSadaf Ebrahimidef test_file(fname): 25*9a0e4156SSadaf Ebrahimi print("Test %s" %fname); 26*9a0e4156SSadaf Ebrahimi f = open(fname) 27*9a0e4156SSadaf Ebrahimi lines = f.readlines() 28*9a0e4156SSadaf Ebrahimi f.close() 29*9a0e4156SSadaf Ebrahimi 30*9a0e4156SSadaf Ebrahimi if not lines[0].startswith('# '): 31*9a0e4156SSadaf Ebrahimi print("ERROR: decoding information is missing") 32*9a0e4156SSadaf Ebrahimi return 33*9a0e4156SSadaf Ebrahimi 34*9a0e4156SSadaf Ebrahimi # skip '# ' at the front, then split line to get out hexcode 35*9a0e4156SSadaf Ebrahimi # Note: option can be '', or 'None' 36*9a0e4156SSadaf Ebrahimi #print lines[0] 37*9a0e4156SSadaf Ebrahimi #print lines[0][2:].split(', ') 38*9a0e4156SSadaf Ebrahimi (arch, mode, option) = lines[0][2:].split(', ') 39*9a0e4156SSadaf Ebrahimi mode = mode.replace(' ', '') 40*9a0e4156SSadaf Ebrahimi option = option.strip() 41*9a0e4156SSadaf Ebrahimi 42*9a0e4156SSadaf Ebrahimi archs = { 43*9a0e4156SSadaf Ebrahimi "CS_ARCH_ARM": CS_ARCH_ARM, 44*9a0e4156SSadaf Ebrahimi "CS_ARCH_ARM64": CS_ARCH_ARM64, 45*9a0e4156SSadaf Ebrahimi "CS_ARCH_MIPS": CS_ARCH_MIPS, 46*9a0e4156SSadaf Ebrahimi "CS_ARCH_PPC": CS_ARCH_PPC, 47*9a0e4156SSadaf Ebrahimi "CS_ARCH_SPARC": CS_ARCH_SPARC, 48*9a0e4156SSadaf Ebrahimi "CS_ARCH_SYSZ": CS_ARCH_SYSZ, 49*9a0e4156SSadaf Ebrahimi "CS_ARCH_X86": CS_ARCH_X86, 50*9a0e4156SSadaf Ebrahimi "CS_ARCH_XCORE": CS_ARCH_XCORE, 51*9a0e4156SSadaf Ebrahimi "CS_ARCH_M68K": CS_ARCH_M68K, 52*9a0e4156SSadaf Ebrahimi } 53*9a0e4156SSadaf Ebrahimi 54*9a0e4156SSadaf Ebrahimi modes = { 55*9a0e4156SSadaf Ebrahimi "CS_MODE_16": CS_MODE_16, 56*9a0e4156SSadaf Ebrahimi "CS_MODE_32": CS_MODE_32, 57*9a0e4156SSadaf Ebrahimi "CS_MODE_64": CS_MODE_64, 58*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32": CS_MODE_MIPS32, 59*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS64": CS_MODE_MIPS64, 60*9a0e4156SSadaf Ebrahimi "0": CS_MODE_ARM, 61*9a0e4156SSadaf Ebrahimi "CS_MODE_ARM": CS_MODE_ARM, 62*9a0e4156SSadaf Ebrahimi "CS_MODE_THUMB": CS_MODE_THUMB, 63*9a0e4156SSadaf Ebrahimi "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8, 64*9a0e4156SSadaf Ebrahimi "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8, 65*9a0e4156SSadaf Ebrahimi "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS, 66*9a0e4156SSadaf Ebrahimi "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN, 67*9a0e4156SSadaf Ebrahimi "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN, 68*9a0e4156SSadaf Ebrahimi "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN, 69*9a0e4156SSadaf Ebrahimi "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN, 70*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO, 71*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 72*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 73*9a0e4156SSadaf Ebrahimi "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9, 74*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN, 75*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN, 76*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN, 77*9a0e4156SSadaf Ebrahimi "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN, 78*9a0e4156SSadaf Ebrahimi } 79*9a0e4156SSadaf Ebrahimi 80*9a0e4156SSadaf Ebrahimi options = { 81*9a0e4156SSadaf Ebrahimi "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT, 82*9a0e4156SSadaf Ebrahimi "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME, 83*9a0e4156SSadaf Ebrahimi } 84*9a0e4156SSadaf Ebrahimi 85*9a0e4156SSadaf Ebrahimi mc_modes = { 86*9a0e4156SSadaf Ebrahimi ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'], 87*9a0e4156SSadaf Ebrahimi ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'], 88*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'], 89*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'], 90*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'], 91*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'], 92*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'], 93*9a0e4156SSadaf Ebrahimi ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'], 94*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'], 95*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'], 96*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'], 97*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'], 98*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'], 99*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'], 100*9a0e4156SSadaf Ebrahimi ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'], 101*9a0e4156SSadaf Ebrahimi ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'], 102*9a0e4156SSadaf Ebrahimi ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'], 103*9a0e4156SSadaf Ebrahimi ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'], 104*9a0e4156SSadaf Ebrahimi ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'], 105*9a0e4156SSadaf Ebrahimi } 106*9a0e4156SSadaf Ebrahimi 107*9a0e4156SSadaf Ebrahimi #if not option in ('', 'None'): 108*9a0e4156SSadaf Ebrahimi # print archs[arch], modes[mode], options[option] 109*9a0e4156SSadaf Ebrahimi 110*9a0e4156SSadaf Ebrahimi #print(arch, mode, option) 111*9a0e4156SSadaf Ebrahimi md = Cs(archs[arch], modes[mode]) 112*9a0e4156SSadaf Ebrahimi 113*9a0e4156SSadaf Ebrahimi if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' : 114*9a0e4156SSadaf Ebrahimi md.syntax = CS_OPT_SYNTAX_NOREGNAME 115*9a0e4156SSadaf Ebrahimi 116*9a0e4156SSadaf Ebrahimi if fname.endswith('3DNow.s.cs'): 117*9a0e4156SSadaf Ebrahimi md.syntax = CS_OPT_SYNTAX_ATT 118*9a0e4156SSadaf Ebrahimi 119*9a0e4156SSadaf Ebrahimi for line in lines[1:]: 120*9a0e4156SSadaf Ebrahimi # ignore all the input lines having # in front. 121*9a0e4156SSadaf Ebrahimi if line.startswith('#'): 122*9a0e4156SSadaf Ebrahimi continue 123*9a0e4156SSadaf Ebrahimi #print("Check %s" %line) 124*9a0e4156SSadaf Ebrahimi code = line.split(' = ')[0] 125*9a0e4156SSadaf Ebrahimi asm = ''.join(line.split(' = ')[1:]) 126*9a0e4156SSadaf Ebrahimi hex_code = code.replace('0x', '') 127*9a0e4156SSadaf Ebrahimi hex_code = hex_code.replace(',', '') 128*9a0e4156SSadaf Ebrahimi hex_data = hex_code.decode('hex') 129*9a0e4156SSadaf Ebrahimi #hex_bytes = array.array('B', hex_data) 130*9a0e4156SSadaf Ebrahimi 131*9a0e4156SSadaf Ebrahimi x = list(md.disasm(hex_data, 0)) 132*9a0e4156SSadaf Ebrahimi if len(x) > 0: 133*9a0e4156SSadaf Ebrahimi if x[0].op_str != '': 134*9a0e4156SSadaf Ebrahimi cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str) 135*9a0e4156SSadaf Ebrahimi else: 136*9a0e4156SSadaf Ebrahimi cs_output = x[0].mnemonic 137*9a0e4156SSadaf Ebrahimi else: 138*9a0e4156SSadaf Ebrahimi cs_output = 'FAILED to disassemble' 139*9a0e4156SSadaf Ebrahimi 140*9a0e4156SSadaf Ebrahimi cs_output2 = normalize_hex(cs_output) 141*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace(' ', '') 142*9a0e4156SSadaf Ebrahimi 143*9a0e4156SSadaf Ebrahimi if arch == 'CS_ARCH_MIPS': 144*9a0e4156SSadaf Ebrahimi # normalize register alias names 145*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$at', '$1') 146*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$v0', '$2') 147*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$v1', '$3') 148*9a0e4156SSadaf Ebrahimi 149*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$a0', '$4') 150*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$a1', '$5') 151*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$a2', '$6') 152*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$a3', '$7') 153*9a0e4156SSadaf Ebrahimi 154*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t0', '$8') 155*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t1', '$9') 156*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t2', '$10') 157*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t3', '$11') 158*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t4', '$12') 159*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t5', '$13') 160*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t6', '$14') 161*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t7', '$15') 162*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t8', '$24') 163*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$t9', '$25') 164*9a0e4156SSadaf Ebrahimi 165*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s0', '$16') 166*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s1', '$17') 167*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s2', '$18') 168*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s3', '$19') 169*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s4', '$20') 170*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s5', '$21') 171*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s6', '$22') 172*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$s7', '$23') 173*9a0e4156SSadaf Ebrahimi 174*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$k0', '$26') 175*9a0e4156SSadaf Ebrahimi cs_output2 = cs_output2.replace('$k1', '$27') 176*9a0e4156SSadaf Ebrahimi 177*9a0e4156SSadaf Ebrahimi print("\t%s = %s" %(hex_code, cs_output)) 178*9a0e4156SSadaf Ebrahimi 179*9a0e4156SSadaf Ebrahimi 180*9a0e4156SSadaf Ebrahimiif __name__ == '__main__': 181*9a0e4156SSadaf Ebrahimi if len(sys.argv) == 1: 182*9a0e4156SSadaf Ebrahimi fnames = sys.stdin.readlines() 183*9a0e4156SSadaf Ebrahimi for fname in fnames: 184*9a0e4156SSadaf Ebrahimi test_file(fname.strip()) 185*9a0e4156SSadaf Ebrahimi else: 186*9a0e4156SSadaf Ebrahimi #print("Usage: ./test_mc.py <input-file.s.cs>") 187*9a0e4156SSadaf Ebrahimi test_file(sys.argv[1]) 188*9a0e4156SSadaf Ebrahimi 189