xref: /aosp_15_r20/external/capstone/suite/disasm_mc.py (revision 9a0e4156d50a75a99ec4f1653a0e9602a5d45c18)
1*9a0e4156SSadaf Ebrahimi#!/usr/bin/python
2*9a0e4156SSadaf Ebrahimi# Test tool to disassemble MC files. By Nguyen Anh Quynh, 2017
3*9a0e4156SSadaf Ebrahimiimport array, os.path, sys
4*9a0e4156SSadaf Ebrahimifrom capstone import *
5*9a0e4156SSadaf Ebrahimi
6*9a0e4156SSadaf Ebrahimi
7*9a0e4156SSadaf Ebrahimi# convert all hex numbers to decimal numbers in a text
8*9a0e4156SSadaf Ebrahimidef normalize_hex(a):
9*9a0e4156SSadaf Ebrahimi    while(True):
10*9a0e4156SSadaf Ebrahimi        i = a.find('0x')
11*9a0e4156SSadaf Ebrahimi        if i == -1: # no more hex number
12*9a0e4156SSadaf Ebrahimi            break
13*9a0e4156SSadaf Ebrahimi        hexnum = '0x'
14*9a0e4156SSadaf Ebrahimi        for c in a[i + 2:]:
15*9a0e4156SSadaf Ebrahimi            if c in '0123456789abcdefABCDEF':
16*9a0e4156SSadaf Ebrahimi                hexnum += c
17*9a0e4156SSadaf Ebrahimi            else:
18*9a0e4156SSadaf Ebrahimi                break
19*9a0e4156SSadaf Ebrahimi        num = int(hexnum, 16)
20*9a0e4156SSadaf Ebrahimi        a = a.replace(hexnum, str(num))
21*9a0e4156SSadaf Ebrahimi    return a
22*9a0e4156SSadaf Ebrahimi
23*9a0e4156SSadaf Ebrahimi
24*9a0e4156SSadaf Ebrahimidef test_file(fname):
25*9a0e4156SSadaf Ebrahimi    print("Test %s" %fname);
26*9a0e4156SSadaf Ebrahimi    f = open(fname)
27*9a0e4156SSadaf Ebrahimi    lines = f.readlines()
28*9a0e4156SSadaf Ebrahimi    f.close()
29*9a0e4156SSadaf Ebrahimi
30*9a0e4156SSadaf Ebrahimi    if not lines[0].startswith('# '):
31*9a0e4156SSadaf Ebrahimi        print("ERROR: decoding information is missing")
32*9a0e4156SSadaf Ebrahimi        return
33*9a0e4156SSadaf Ebrahimi
34*9a0e4156SSadaf Ebrahimi    # skip '# ' at the front, then split line to get out hexcode
35*9a0e4156SSadaf Ebrahimi    # Note: option can be '', or 'None'
36*9a0e4156SSadaf Ebrahimi    #print lines[0]
37*9a0e4156SSadaf Ebrahimi    #print lines[0][2:].split(', ')
38*9a0e4156SSadaf Ebrahimi    (arch, mode, option) = lines[0][2:].split(', ')
39*9a0e4156SSadaf Ebrahimi    mode = mode.replace(' ', '')
40*9a0e4156SSadaf Ebrahimi    option = option.strip()
41*9a0e4156SSadaf Ebrahimi
42*9a0e4156SSadaf Ebrahimi    archs = {
43*9a0e4156SSadaf Ebrahimi        "CS_ARCH_ARM": CS_ARCH_ARM,
44*9a0e4156SSadaf Ebrahimi        "CS_ARCH_ARM64": CS_ARCH_ARM64,
45*9a0e4156SSadaf Ebrahimi        "CS_ARCH_MIPS": CS_ARCH_MIPS,
46*9a0e4156SSadaf Ebrahimi        "CS_ARCH_PPC": CS_ARCH_PPC,
47*9a0e4156SSadaf Ebrahimi        "CS_ARCH_SPARC": CS_ARCH_SPARC,
48*9a0e4156SSadaf Ebrahimi        "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
49*9a0e4156SSadaf Ebrahimi        "CS_ARCH_X86": CS_ARCH_X86,
50*9a0e4156SSadaf Ebrahimi        "CS_ARCH_XCORE": CS_ARCH_XCORE,
51*9a0e4156SSadaf Ebrahimi        "CS_ARCH_M68K": CS_ARCH_M68K,
52*9a0e4156SSadaf Ebrahimi    }
53*9a0e4156SSadaf Ebrahimi
54*9a0e4156SSadaf Ebrahimi    modes = {
55*9a0e4156SSadaf Ebrahimi        "CS_MODE_16": CS_MODE_16,
56*9a0e4156SSadaf Ebrahimi        "CS_MODE_32": CS_MODE_32,
57*9a0e4156SSadaf Ebrahimi        "CS_MODE_64": CS_MODE_64,
58*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32": CS_MODE_MIPS32,
59*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS64": CS_MODE_MIPS64,
60*9a0e4156SSadaf Ebrahimi        "0": CS_MODE_ARM,
61*9a0e4156SSadaf Ebrahimi        "CS_MODE_ARM": CS_MODE_ARM,
62*9a0e4156SSadaf Ebrahimi        "CS_MODE_THUMB": CS_MODE_THUMB,
63*9a0e4156SSadaf Ebrahimi        "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
64*9a0e4156SSadaf Ebrahimi        "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
65*9a0e4156SSadaf Ebrahimi        "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
66*9a0e4156SSadaf Ebrahimi        "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
67*9a0e4156SSadaf Ebrahimi        "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
68*9a0e4156SSadaf Ebrahimi        "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
69*9a0e4156SSadaf Ebrahimi        "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
70*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
71*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
72*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
73*9a0e4156SSadaf Ebrahimi        "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
74*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
75*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
76*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
77*9a0e4156SSadaf Ebrahimi        "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
78*9a0e4156SSadaf Ebrahimi    }
79*9a0e4156SSadaf Ebrahimi
80*9a0e4156SSadaf Ebrahimi    options = {
81*9a0e4156SSadaf Ebrahimi        "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
82*9a0e4156SSadaf Ebrahimi        "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
83*9a0e4156SSadaf Ebrahimi    }
84*9a0e4156SSadaf Ebrahimi
85*9a0e4156SSadaf Ebrahimi    mc_modes = {
86*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
87*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
88*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
89*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
90*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
91*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
92*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
93*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
94*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
95*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
96*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
97*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
98*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
99*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
100*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
101*9a0e4156SSadaf Ebrahimi        ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
102*9a0e4156SSadaf Ebrahimi        ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
103*9a0e4156SSadaf Ebrahimi        ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
104*9a0e4156SSadaf Ebrahimi        ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
105*9a0e4156SSadaf Ebrahimi    }
106*9a0e4156SSadaf Ebrahimi
107*9a0e4156SSadaf Ebrahimi    #if not option in ('', 'None'):
108*9a0e4156SSadaf Ebrahimi    #    print archs[arch], modes[mode], options[option]
109*9a0e4156SSadaf Ebrahimi
110*9a0e4156SSadaf Ebrahimi    #print(arch, mode, option)
111*9a0e4156SSadaf Ebrahimi    md = Cs(archs[arch], modes[mode])
112*9a0e4156SSadaf Ebrahimi
113*9a0e4156SSadaf Ebrahimi    if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
114*9a0e4156SSadaf Ebrahimi        md.syntax = CS_OPT_SYNTAX_NOREGNAME
115*9a0e4156SSadaf Ebrahimi
116*9a0e4156SSadaf Ebrahimi    if fname.endswith('3DNow.s.cs'):
117*9a0e4156SSadaf Ebrahimi        md.syntax = CS_OPT_SYNTAX_ATT
118*9a0e4156SSadaf Ebrahimi
119*9a0e4156SSadaf Ebrahimi    for line in lines[1:]:
120*9a0e4156SSadaf Ebrahimi        # ignore all the input lines having # in front.
121*9a0e4156SSadaf Ebrahimi        if line.startswith('#'):
122*9a0e4156SSadaf Ebrahimi            continue
123*9a0e4156SSadaf Ebrahimi        #print("Check %s" %line)
124*9a0e4156SSadaf Ebrahimi        code = line.split(' = ')[0]
125*9a0e4156SSadaf Ebrahimi        asm  = ''.join(line.split(' = ')[1:])
126*9a0e4156SSadaf Ebrahimi        hex_code = code.replace('0x', '')
127*9a0e4156SSadaf Ebrahimi        hex_code = hex_code.replace(',', '')
128*9a0e4156SSadaf Ebrahimi        hex_data = hex_code.decode('hex')
129*9a0e4156SSadaf Ebrahimi        #hex_bytes = array.array('B', hex_data)
130*9a0e4156SSadaf Ebrahimi
131*9a0e4156SSadaf Ebrahimi        x = list(md.disasm(hex_data, 0))
132*9a0e4156SSadaf Ebrahimi        if len(x) > 0:
133*9a0e4156SSadaf Ebrahimi            if x[0].op_str != '':
134*9a0e4156SSadaf Ebrahimi                cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
135*9a0e4156SSadaf Ebrahimi            else:
136*9a0e4156SSadaf Ebrahimi                cs_output = x[0].mnemonic
137*9a0e4156SSadaf Ebrahimi        else:
138*9a0e4156SSadaf Ebrahimi            cs_output = 'FAILED to disassemble'
139*9a0e4156SSadaf Ebrahimi
140*9a0e4156SSadaf Ebrahimi        cs_output2 = normalize_hex(cs_output)
141*9a0e4156SSadaf Ebrahimi        cs_output2 = cs_output2.replace(' ', '')
142*9a0e4156SSadaf Ebrahimi
143*9a0e4156SSadaf Ebrahimi        if arch == 'CS_ARCH_MIPS':
144*9a0e4156SSadaf Ebrahimi            # normalize register alias names
145*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$at', '$1')
146*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$v0', '$2')
147*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$v1', '$3')
148*9a0e4156SSadaf Ebrahimi
149*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$a0', '$4')
150*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$a1', '$5')
151*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$a2', '$6')
152*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$a3', '$7')
153*9a0e4156SSadaf Ebrahimi
154*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t0', '$8')
155*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t1', '$9')
156*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t2', '$10')
157*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t3', '$11')
158*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t4', '$12')
159*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t5', '$13')
160*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t6', '$14')
161*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t7', '$15')
162*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t8', '$24')
163*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$t9', '$25')
164*9a0e4156SSadaf Ebrahimi
165*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s0', '$16')
166*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s1', '$17')
167*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s2', '$18')
168*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s3', '$19')
169*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s4', '$20')
170*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s5', '$21')
171*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s6', '$22')
172*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$s7', '$23')
173*9a0e4156SSadaf Ebrahimi
174*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$k0', '$26')
175*9a0e4156SSadaf Ebrahimi            cs_output2 = cs_output2.replace('$k1', '$27')
176*9a0e4156SSadaf Ebrahimi
177*9a0e4156SSadaf Ebrahimi        print("\t%s = %s" %(hex_code, cs_output))
178*9a0e4156SSadaf Ebrahimi
179*9a0e4156SSadaf Ebrahimi
180*9a0e4156SSadaf Ebrahimiif __name__ == '__main__':
181*9a0e4156SSadaf Ebrahimi    if len(sys.argv) == 1:
182*9a0e4156SSadaf Ebrahimi        fnames = sys.stdin.readlines()
183*9a0e4156SSadaf Ebrahimi        for fname in fnames:
184*9a0e4156SSadaf Ebrahimi            test_file(fname.strip())
185*9a0e4156SSadaf Ebrahimi    else:
186*9a0e4156SSadaf Ebrahimi        #print("Usage: ./test_mc.py <input-file.s.cs>")
187*9a0e4156SSadaf Ebrahimi        test_file(sys.argv[1])
188*9a0e4156SSadaf Ebrahimi
189