Rizin
unix-like reverse engineering framework and cli tools
test_mc.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
3 import array, os.path, sys
4 from subprocess import Popen, PIPE, STDOUT
5 from capstone import *
6 
7 
8 # convert all hex numbers to decimal numbers in a text
9 def normalize_hex(a):
10  while(True):
11  i = a.find('0x')
12  if i == -1: # no more hex number
13  break
14  hexnum = '0x'
15  for c in a[i + 2:]:
16  if c in '0123456789abcdefABCDEF':
17  hexnum += c
18  else:
19  break
20  num = int(hexnum, 16)
21  a = a.replace(hexnum, str(num))
22  return a
23 
24 
25 def run_mc(arch, hexcode, option, syntax=None):
26  def normalize(text):
27  # remove tabs
28  text = text.lower()
29  items = text.split()
30  text = ' '.join(items)
31  if arch == CS_ARCH_X86:
32  # remove comment after #
33  i = text.find('# ')
34  if i != -1:
35  return text[:i].strip()
36  if arch == CS_ARCH_ARM64:
37  # remove comment after #
38  i = text.find('// ')
39  if i != -1:
40  return text[:i].strip()
41  # remove some redundant spaces
42  text = text.replace('{ ', '{')
43  text = text.replace(' }', '}')
44  return text.strip()
45 
46  #print("Trying to decode: %s" %hexcode)
47  if syntax:
48  if arch == CS_ARCH_MIPS:
49  p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
50  else:
51  p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
52  else:
53  if arch == CS_ARCH_MIPS:
54  p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
55  else:
56  p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
57  output = p.communicate(input=hexcode)[0]
58  lines = output.split('\n')
59  #print lines
60  if 'invalid' in lines[0]:
61  #print 'invalid ----'
62  return 'FAILED to disassemble (MC)'
63  else:
64  #print 'OK:', lines[1]
65  return normalize(lines[1].strip())
66 
67 def test_file(fname):
68  print("Test %s" %fname);
69  f = open(fname)
70  lines = f.readlines()
71  f.close()
72 
73  if not lines[0].startswith('# '):
74  print("ERROR: decoding information is missing")
75  return
76 
77  # skip '# ' at the front, then split line to get out hexcode
78  # Note: option can be '', or 'None'
79  #print lines[0]
80  #print lines[0][2:].split(', ')
81  (arch, mode, option) = lines[0][2:].split(', ')
82  mode = mode.replace(' ', '')
83  option = option.strip()
84 
85  archs = {
86  "CS_ARCH_ARM": CS_ARCH_ARM,
87  "CS_ARCH_ARM64": CS_ARCH_ARM64,
88  "CS_ARCH_MIPS": CS_ARCH_MIPS,
89  "CS_ARCH_PPC": CS_ARCH_PPC,
90  "CS_ARCH_SPARC": CS_ARCH_SPARC,
91  "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
92  "CS_ARCH_X86": CS_ARCH_X86,
93  "CS_ARCH_XCORE": CS_ARCH_XCORE,
94  "CS_ARCH_M68K": CS_ARCH_M68K,
95  }
96 
97  modes = {
98  "CS_MODE_16": CS_MODE_16,
99  "CS_MODE_32": CS_MODE_32,
100  "CS_MODE_64": CS_MODE_64,
101  "CS_MODE_MIPS32": CS_MODE_MIPS32,
102  "CS_MODE_MIPS64": CS_MODE_MIPS64,
103  "0": CS_MODE_ARM,
104  "CS_MODE_ARM": CS_MODE_ARM,
105  "CS_MODE_THUMB": CS_MODE_THUMB,
106  "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
107  "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
108  "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
109  "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
110  "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
111  "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
112  "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
113  "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
114  "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
115  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
116  "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
117  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
118  "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
119  "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
120  "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
121  }
122 
123  options = {
124  "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
125  "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
126  }
127 
128  mc_modes = {
129  ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
130  ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
131  ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
132  ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
133  ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
134  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
135  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
136  ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
137  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
138  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
139  ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
140  ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
141  ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
142  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
143  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
144  ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
145  ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
146  ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
147  ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
148  }
149 
150  #if not option in ('', 'None'):
151  # print archs[arch], modes[mode], options[option]
152 
153  #print(arch, mode, option)
154  md = Cs(archs[arch], modes[mode])
155 
156  mc_option = None
157  if arch == 'CS_ARCH_X86':
158  # tell llvm-mc to use Intel syntax
159  mc_option = '-output-asm-variant=1'
160 
161  if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
162  md.syntax = CS_OPT_SYNTAX_NOREGNAME
163 
164  if fname.endswith('3DNow.s.cs'):
165  md.syntax = CS_OPT_SYNTAX_ATT
166 
167  for line in lines[1:]:
168  # ignore all the input lines having # in front.
169  if line.startswith('#'):
170  continue
171  #print("Check %s" %line)
172  code = line.split(' = ')[0]
173  asm = ''.join(line.split(' = ')[1:])
174  hex_code = code.replace('0x', '')
175  hex_code = hex_code.replace(',', '')
176  hex_data = hex_code.decode('hex')
177  #hex_bytes = array.array('B', hex_data)
178 
179  x = list(md.disasm(hex_data, 0))
180  if len(x) > 0:
181  if x[0].op_str != '':
182  cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
183  else:
184  cs_output = x[0].mnemonic
185  else:
186  cs_output = 'FAILED to disassemble'
187 
188  cs_output2 = normalize_hex(cs_output)
189  cs_output2 = cs_output2.replace(' ', '')
190 
191  if arch == 'CS_ARCH_MIPS':
192  # normalize register alias names
193  cs_output2 = cs_output2.replace('$at', '$1')
194  cs_output2 = cs_output2.replace('$v0', '$2')
195  cs_output2 = cs_output2.replace('$v1', '$3')
196 
197  cs_output2 = cs_output2.replace('$a0', '$4')
198  cs_output2 = cs_output2.replace('$a1', '$5')
199  cs_output2 = cs_output2.replace('$a2', '$6')
200  cs_output2 = cs_output2.replace('$a3', '$7')
201 
202  cs_output2 = cs_output2.replace('$t0', '$8')
203  cs_output2 = cs_output2.replace('$t1', '$9')
204  cs_output2 = cs_output2.replace('$t2', '$10')
205  cs_output2 = cs_output2.replace('$t3', '$11')
206  cs_output2 = cs_output2.replace('$t4', '$12')
207  cs_output2 = cs_output2.replace('$t5', '$13')
208  cs_output2 = cs_output2.replace('$t6', '$14')
209  cs_output2 = cs_output2.replace('$t7', '$15')
210  cs_output2 = cs_output2.replace('$t8', '$24')
211  cs_output2 = cs_output2.replace('$t9', '$25')
212 
213  cs_output2 = cs_output2.replace('$s0', '$16')
214  cs_output2 = cs_output2.replace('$s1', '$17')
215  cs_output2 = cs_output2.replace('$s2', '$18')
216  cs_output2 = cs_output2.replace('$s3', '$19')
217  cs_output2 = cs_output2.replace('$s4', '$20')
218  cs_output2 = cs_output2.replace('$s5', '$21')
219  cs_output2 = cs_output2.replace('$s6', '$22')
220  cs_output2 = cs_output2.replace('$s7', '$23')
221 
222  cs_output2 = cs_output2.replace('$k0', '$26')
223  cs_output2 = cs_output2.replace('$k1', '$27')
224 
225  #print("Running MC ...")
226  if fname.endswith('thumb-fp-armv8.s.cs'):
227  mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option)
228  elif fname.endswith('mips64-alu-instructions.s.cs'):
229  mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option)
230  else:
231  mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
232  mc_output2 = normalize_hex(mc_output)
233 
234  if arch == 'CS_ARCH_MIPS':
235  mc_output2 = mc_output2.replace(' 0(', '(')
236 
237  if arch == 'CS_ARCH_PPC':
238  mc_output2 = mc_output2.replace('.+', '')
239  mc_output2 = mc_output2.replace('.', '')
240  mc_output2 = mc_output2.replace(' 0(', '(')
241 
242  mc_output2 = mc_output2.replace(' ', '')
243  mc_output2 = mc_output2.replace('opaque', '')
244 
245 
246  if (cs_output2 != mc_output2):
247  asm = asm.replace(' ', '').strip().lower()
248  if asm != cs_output2:
249  print("Mismatch: %s" %line.strip())
250  print("\tMC = %s" %mc_output)
251  print("\tCS = %s" %cs_output)
252 
253 
254 if __name__ == '__main__':
255  if len(sys.argv) == 1:
256  fnames = sys.stdin.readlines()
257  for fname in fnames:
258  test_file(fname.strip())
259  else:
260  #print("Usage: ./test_mc.py <input-file.s.cs>")
261  test_file(sys.argv[1])
262 
size_t len
Definition: 6502dis.c:15
static void list(RzEgg *egg)
Definition: rz-gg.c:52
static void normalize(lzma_mf *mf)
Normalizes hash values.
while(len< limit &&buf1[len]==buf2[len])++len
def test_file(fname)
Definition: test_mc.py:67
def normalize_hex(a)
Definition: test_mc.py:9
def run_mc(arch, hexcode, option, syntax=None)
Definition: test_mc.py:25
static int
Definition: sfsocketcall.h:114