Rizin
unix-like reverse engineering framework and cli tools
disasm_mc.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Test tool to disassemble MC files. By Nguyen Anh Quynh, 2017
3 import array, os.path, sys
4 from capstone import *
5 
6 
7 # convert all hex numbers to decimal numbers in a text
8 def normalize_hex(a):
9  while(True):
10  i = a.find('0x')
11  if i == -1: # no more hex number
12  break
13  hexnum = '0x'
14  for c in a[i + 2:]:
15  if c in '0123456789abcdefABCDEF':
16  hexnum += c
17  else:
18  break
19  num = int(hexnum, 16)
20  a = a.replace(hexnum, str(num))
21  return a
22 
23 
24 def test_file(fname):
25  print("Test %s" %fname);
26  f = open(fname)
27  lines = f.readlines()
28  f.close()
29 
30  if not lines[0].startswith('# '):
31  print("ERROR: decoding information is missing")
32  return
33 
34  # skip '# ' at the front, then split line to get out hexcode
35  # Note: option can be '', or 'None'
36  #print lines[0]
37  #print lines[0][2:].split(', ')
38  (arch, mode, option) = lines[0][2:].split(', ')
39  mode = mode.replace(' ', '')
40  option = option.strip()
41 
42  archs = {
43  "CS_ARCH_ARM": CS_ARCH_ARM,
44  "CS_ARCH_ARM64": CS_ARCH_ARM64,
45  "CS_ARCH_MIPS": CS_ARCH_MIPS,
46  "CS_ARCH_PPC": CS_ARCH_PPC,
47  "CS_ARCH_SPARC": CS_ARCH_SPARC,
48  "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
49  "CS_ARCH_X86": CS_ARCH_X86,
50  "CS_ARCH_XCORE": CS_ARCH_XCORE,
51  "CS_ARCH_M68K": CS_ARCH_M68K,
52  }
53 
54  modes = {
55  "CS_MODE_16": CS_MODE_16,
56  "CS_MODE_32": CS_MODE_32,
57  "CS_MODE_64": CS_MODE_64,
58  "CS_MODE_MIPS32": CS_MODE_MIPS32,
59  "CS_MODE_MIPS64": CS_MODE_MIPS64,
60  "0": CS_MODE_ARM,
61  "CS_MODE_ARM": CS_MODE_ARM,
62  "CS_MODE_THUMB": CS_MODE_THUMB,
63  "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
64  "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
65  "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
66  "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
67  "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
68  "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
69  "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
70  "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
71  "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
72  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
73  "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
74  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
75  "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
76  "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
77  "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
78  }
79 
80  options = {
81  "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
82  "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
83  }
84 
85  mc_modes = {
86  ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
87  ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
88  ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
89  ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
90  ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
91  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
92  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
93  ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
94  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
95  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
96  ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
97  ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
98  ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
99  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
100  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
101  ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
102  ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
103  ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
104  ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
105  }
106 
107  #if not option in ('', 'None'):
108  # print archs[arch], modes[mode], options[option]
109 
110  #print(arch, mode, option)
111  md = Cs(archs[arch], modes[mode])
112 
113  if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
114  md.syntax = CS_OPT_SYNTAX_NOREGNAME
115 
116  if fname.endswith('3DNow.s.cs'):
117  md.syntax = CS_OPT_SYNTAX_ATT
118 
119  for line in lines[1:]:
120  # ignore all the input lines having # in front.
121  if line.startswith('#'):
122  continue
123  #print("Check %s" %line)
124  code = line.split(' = ')[0]
125  asm = ''.join(line.split(' = ')[1:])
126  hex_code = code.replace('0x', '')
127  hex_code = hex_code.replace(',', '')
128  hex_data = hex_code.decode('hex')
129  #hex_bytes = array.array('B', hex_data)
130 
131  x = list(md.disasm(hex_data, 0))
132  if len(x) > 0:
133  if x[0].op_str != '':
134  cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
135  else:
136  cs_output = x[0].mnemonic
137  else:
138  cs_output = 'FAILED to disassemble'
139 
140  cs_output2 = normalize_hex(cs_output)
141  cs_output2 = cs_output2.replace(' ', '')
142 
143  if arch == 'CS_ARCH_MIPS':
144  # normalize register alias names
145  cs_output2 = cs_output2.replace('$at', '$1')
146  cs_output2 = cs_output2.replace('$v0', '$2')
147  cs_output2 = cs_output2.replace('$v1', '$3')
148 
149  cs_output2 = cs_output2.replace('$a0', '$4')
150  cs_output2 = cs_output2.replace('$a1', '$5')
151  cs_output2 = cs_output2.replace('$a2', '$6')
152  cs_output2 = cs_output2.replace('$a3', '$7')
153 
154  cs_output2 = cs_output2.replace('$t0', '$8')
155  cs_output2 = cs_output2.replace('$t1', '$9')
156  cs_output2 = cs_output2.replace('$t2', '$10')
157  cs_output2 = cs_output2.replace('$t3', '$11')
158  cs_output2 = cs_output2.replace('$t4', '$12')
159  cs_output2 = cs_output2.replace('$t5', '$13')
160  cs_output2 = cs_output2.replace('$t6', '$14')
161  cs_output2 = cs_output2.replace('$t7', '$15')
162  cs_output2 = cs_output2.replace('$t8', '$24')
163  cs_output2 = cs_output2.replace('$t9', '$25')
164 
165  cs_output2 = cs_output2.replace('$s0', '$16')
166  cs_output2 = cs_output2.replace('$s1', '$17')
167  cs_output2 = cs_output2.replace('$s2', '$18')
168  cs_output2 = cs_output2.replace('$s3', '$19')
169  cs_output2 = cs_output2.replace('$s4', '$20')
170  cs_output2 = cs_output2.replace('$s5', '$21')
171  cs_output2 = cs_output2.replace('$s6', '$22')
172  cs_output2 = cs_output2.replace('$s7', '$23')
173 
174  cs_output2 = cs_output2.replace('$k0', '$26')
175  cs_output2 = cs_output2.replace('$k1', '$27')
176 
177  print("\t%s = %s" %(hex_code, cs_output))
178 
179 
180 if __name__ == '__main__':
181  if len(sys.argv) == 1:
182  fnames = sys.stdin.readlines()
183  for fname in fnames:
184  test_file(fname.strip())
185  else:
186  #print("Usage: ./test_mc.py <input-file.s.cs>")
187  test_file(sys.argv[1])
188 
size_t len
Definition: 6502dis.c:15
static void list(RzEgg *egg)
Definition: rz-gg.c:52
while(len< limit &&buf1[len]==buf2[len])++len
def test_file(fname)
Definition: disasm_mc.py:24
def normalize_hex(a)
Definition: disasm_mc.py:8
static int
Definition: sfsocketcall.h:114