Rizin
unix-like reverse engineering framework and cli tools
test_corpus.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
3 import sys
4 import os
5 from capstone import *
6 
7 def test_file(fname):
8  print("Test %s" %fname);
9  f = open(fname)
10  lines = f.readlines()
11  f.close()
12 
13  if not lines[0].startswith('# '):
14  print("ERROR: decoding information is missing")
15  return
16 
17  # skip '# ' at the front, then split line to get out hexcode
18  # Note: option can be '', or 'None'
19  #print lines[0]
20  #print lines[0][2:].split(', ')
21  (arch, mode, option) = lines[0][2:].split(', ')
22  mode = mode.replace(' ', '')
23  option = option.strip()
24 
25  archs = {
26  "CS_ARCH_ARM": CS_ARCH_ARM,
27  "CS_ARCH_ARM64": CS_ARCH_ARM64,
28  "CS_ARCH_MIPS": CS_ARCH_MIPS,
29  "CS_ARCH_PPC": CS_ARCH_PPC,
30  "CS_ARCH_SPARC": CS_ARCH_SPARC,
31  "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
32  "CS_ARCH_X86": CS_ARCH_X86,
33  "CS_ARCH_XCORE": CS_ARCH_XCORE,
34  }
35 
36  modes = {
37  "CS_MODE_16": CS_MODE_16,
38  "CS_MODE_32": CS_MODE_32,
39  "CS_MODE_64": CS_MODE_64,
40  "CS_MODE_MIPS32": CS_MODE_MIPS32,
41  "CS_MODE_MIPS64": CS_MODE_MIPS64,
42  "0": CS_MODE_ARM,
43  "CS_MODE_ARM": CS_MODE_ARM,
44  "CS_MODE_THUMB": CS_MODE_THUMB,
45  "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
46  "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
47  "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
48  "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
49  "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
50  "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
51  "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
52  "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
53  "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
54  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
55  "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
56  "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
57  "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
58  "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
59  "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
60  }
61 
62  mc_modes = {
63  ("CS_ARCH_X86", "CS_MODE_32"): 0,
64  ("CS_ARCH_X86", "CS_MODE_64"): 1,
65  ("CS_ARCH_ARM", "CS_MODE_ARM"): 2,
66  ("CS_ARCH_ARM", "CS_MODE_THUMB"): 3,
67  ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4,
68  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5,
69  ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6,
70  ("CS_ARCH_ARM64", "0"): 7,
71  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 8,
72  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 9,
73  ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 10,
74  ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 11,
75  ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 12,
76  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 13,
77  ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 13,
78  ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 14,
79  ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 15,
80  ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 16,
81  ("CS_ARCH_SYSZ", "0"): 17,
82  ("CS_ARCH_XCORE", "0"): 18,
83  ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN"): 19,
84  ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 20,
85  ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6"): 21,
86  ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO"): 22,
87  ("CS_ARCH_M68K", "0"): 23,
88  ("CS_ARCH_M680X", "CS_MODE_M680X_6809"): 24,
89  ("CS_ARCH_EVM", "0"): 25,
90  }
91 
92  #if not option in ('', 'None'):
93  # print archs[arch], modes[mode], options[option]
94 
95  for line in lines[1:]:
96  # ignore all the input lines having # in front.
97  if line.startswith('#'):
98  continue
99  if line.startswith('// '):
100  line=line[3:]
101  #print("Check %s" %line)
102  code = line.split(' = ')[0]
103  if len(code) < 2:
104  continue
105  if code.find('//') >= 0:
106  continue
107  hex_code = code.replace('0x', '')
108  hex_code = hex_code.replace(',', '')
109  hex_data = hex_code.decode('hex')
110  fout = open("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code), 'w')
111  if (arch, mode) not in mc_modes:
112  print "fail", arch, mode
113  fout.write(unichr(mc_modes[(arch, mode)]))
114  fout.write(hex_data)
115  fout.close()
116 
117 
118 if __name__ == '__main__':
119  if len(sys.argv) == 1:
120  fnames = sys.stdin.readlines()
121  for fname in fnames:
122  test_file(fname.strip())
123  else:
124  #print("Usage: ./test_mc.py <input-file.s.cs>")
125  test_file(sys.argv[1])
126 
size_t len
Definition: 6502dis.c:15
def test_file(fname)
Definition: test_corpus.py:7