Rizin
unix-like reverse engineering framework and cli tools
X86DisassemblerDecoder.c
Go to the documentation of this file.
1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2  *
3  * The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the implementation of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 /* Capstone Disassembly Engine */
17 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2015 */
18 
19 #ifdef CAPSTONE_HAS_X86
20 
21 #include <stdarg.h> /* for va_*() */
22 #if defined(CAPSTONE_HAS_OSXKERNEL)
23 #include <libkern/libkern.h>
24 #else
25 #include <stdlib.h> /* for exit() */
26 #endif
27 
28 #include "../../cs_priv.h"
29 #include "../../utils.h"
30 
31 #include "X86DisassemblerDecoder.h"
32 
36 struct ModRMDecision {
37  uint8_t modrm_type;
38  uint16_t instructionIDs;
39 };
40 
43 struct OpcodeDecision {
44  struct ModRMDecision modRMDecisions[256];
45 };
46 
52 struct ContextDecision {
53  struct OpcodeDecision opcodeDecisions[IC_max];
54 };
55 
56 #ifdef CAPSTONE_X86_REDUCE
58 #else
60 #endif
61 
62 //#define GET_INSTRINFO_ENUM
63 #define GET_INSTRINFO_MC_DESC
64 #ifdef CAPSTONE_X86_REDUCE
66 #else
67 #include "X86GenInstrInfo.inc"
68 #endif
69 
70 /*
71  * contextForAttrs - Client for the instruction context table. Takes a set of
72  * attributes and returns the appropriate decode context.
73  *
74  * @param attrMask - Attributes, from the enumeration attributeBits.
75  * @return - The InstructionContext to use when looking up an
76  * an instruction with these attributes.
77  */
78 static InstructionContext contextForAttrs(uint16_t attrMask)
79 {
80  return CONTEXTS_SYM[attrMask];
81 }
82 
83 /*
84  * modRMRequired - Reads the appropriate instruction table to determine whether
85  * the ModR/M byte is required to decode a particular instruction.
86  *
87  * @param type - The opcode type (i.e., how many bytes it has).
88  * @param insnContext - The context for the instruction, as returned by
89  * contextForAttrs.
90  * @param opcode - The last byte of the instruction's opcode, not counting
91  * ModR/M extensions and escapes.
92  * @return - true if the ModR/M byte is required, false otherwise.
93  */
94 static int modRMRequired(OpcodeType type,
95  InstructionContext insnContext,
96  uint16_t opcode)
97 {
98  const struct OpcodeDecision *decision = NULL;
99  const uint8_t *indextable = NULL;
100  uint8_t index;
101 
102  switch (type) {
103  default:
104  case ONEBYTE:
105  decision = ONEBYTE_SYM;
106  indextable = index_x86DisassemblerOneByteOpcodes;
107  break;
108  case TWOBYTE:
109  decision = TWOBYTE_SYM;
110  indextable = index_x86DisassemblerTwoByteOpcodes;
111  break;
112  case THREEBYTE_38:
113  decision = THREEBYTE38_SYM;
114  indextable = index_x86DisassemblerThreeByte38Opcodes;
115  break;
116  case THREEBYTE_3A:
117  decision = THREEBYTE3A_SYM;
118  indextable = index_x86DisassemblerThreeByte3AOpcodes;
119  break;
120 #ifndef CAPSTONE_X86_REDUCE
121  case XOP8_MAP:
122  decision = XOP8_MAP_SYM;
123  indextable = index_x86DisassemblerXOP8Opcodes;
124  break;
125  case XOP9_MAP:
126  decision = XOP9_MAP_SYM;
127  indextable = index_x86DisassemblerXOP9Opcodes;
128  break;
129  case XOPA_MAP:
130  decision = XOPA_MAP_SYM;
131  indextable = index_x86DisassemblerXOPAOpcodes;
132  break;
133  case T3DNOW_MAP:
134  // 3DNow instructions always have ModRM byte
135  return true;
136 #endif
137  }
138 
139  index = indextable[insnContext];
140  if (index)
141  return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;
142  else
143  return false;
144 }
145 
146 /*
147  * decode - Reads the appropriate instruction table to obtain the unique ID of
148  * an instruction.
149  *
150  * @param type - See modRMRequired().
151  * @param insnContext - See modRMRequired().
152  * @param opcode - See modRMRequired().
153  * @param modRM - The ModR/M byte if required, or any value if not.
154  * @return - The UID of the instruction, or 0 on failure.
155  */
157  InstructionContext insnContext,
158  uint8_t opcode,
159  uint8_t modRM)
160 {
161  const struct ModRMDecision *dec = NULL;
162  const uint8_t *indextable = NULL;
163  uint8_t index;
164 
165  switch (type) {
166  default:
167  case ONEBYTE:
168  indextable = index_x86DisassemblerOneByteOpcodes;
169  index = indextable[insnContext];
170  if (index)
171  dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode];
172  else
173  dec = &emptyTable.modRMDecisions[opcode];
174  break;
175  case TWOBYTE:
176  indextable = index_x86DisassemblerTwoByteOpcodes;
177  index = indextable[insnContext];
178  if (index)
179  dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode];
180  else
181  dec = &emptyTable.modRMDecisions[opcode];
182  break;
183  case THREEBYTE_38:
184  indextable = index_x86DisassemblerThreeByte38Opcodes;
185  index = indextable[insnContext];
186  if (index)
187  dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode];
188  else
189  dec = &emptyTable.modRMDecisions[opcode];
190  break;
191  case THREEBYTE_3A:
192  indextable = index_x86DisassemblerThreeByte3AOpcodes;
193  index = indextable[insnContext];
194  if (index)
195  dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode];
196  else
197  dec = &emptyTable.modRMDecisions[opcode];
198  break;
199 #ifndef CAPSTONE_X86_REDUCE
200  case XOP8_MAP:
201  indextable = index_x86DisassemblerXOP8Opcodes;
202  index = indextable[insnContext];
203  if (index)
204  dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode];
205  else
206  dec = &emptyTable.modRMDecisions[opcode];
207  break;
208  case XOP9_MAP:
209  indextable = index_x86DisassemblerXOP9Opcodes;
210  index = indextable[insnContext];
211  if (index)
212  dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode];
213  else
214  dec = &emptyTable.modRMDecisions[opcode];
215  break;
216  case XOPA_MAP:
217  indextable = index_x86DisassemblerXOPAOpcodes;
218  index = indextable[insnContext];
219  if (index)
220  dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode];
221  else
222  dec = &emptyTable.modRMDecisions[opcode];
223  break;
224  case T3DNOW_MAP:
225  indextable = index_x86DisassemblerT3DNOWOpcodes;
226  index = indextable[insnContext];
227  if (index)
228  dec = &T3DNOW_MAP_SYM[index - 1].modRMDecisions[opcode];
229  else
230  dec = &emptyTable.modRMDecisions[opcode];
231  break;
232 #endif
233  }
234 
235  switch (dec->modrm_type) {
236  default:
237  //debug("Corrupt table! Unknown modrm_type");
238  return 0;
239  case MODRM_ONEENTRY:
240  return modRMTable[dec->instructionIDs];
241  case MODRM_SPLITRM:
242  if (modFromModRM(modRM) == 0x3)
243  return modRMTable[dec->instructionIDs+1];
244  return modRMTable[dec->instructionIDs];
245  case MODRM_SPLITREG:
246  if (modFromModRM(modRM) == 0x3)
247  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
248  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
249  case MODRM_SPLITMISC:
250  if (modFromModRM(modRM) == 0x3)
251  return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
252  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
253  case MODRM_FULL:
254  return modRMTable[dec->instructionIDs+modRM];
255  }
256 }
257 
258 /*
259  * specifierForUID - Given a UID, returns the name and operand specification for
260  * that instruction.
261  *
262  * @param uid - The unique ID for the instruction. This should be returned by
263  * decode(); specifierForUID will not check bounds.
264  * @return - A pointer to the specification for that instruction.
265  */
266 static const struct InstructionSpecifier *specifierForUID(InstrUID uid)
267 {
268  return &INSTRUCTIONS_SYM[uid];
269 }
270 
271 /*
272  * consumeByte - Uses the reader function provided by the user to consume one
273  * byte from the instruction's memory and advance the cursor.
274  *
275  * @param insn - The instruction with the reader function to use. The cursor
276  * for this instruction is advanced.
277  * @param byte - A pointer to a pre-allocated memory buffer to be populated
278  * with the data read.
279  * @return - 0 if the read was successful; nonzero otherwise.
280  */
281 static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
282 {
283  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
284 
285  if (!ret)
286  ++(insn->readerCursor);
287 
288  return ret;
289 }
290 
291 /*
292  * lookAtByte - Like consumeByte, but does not advance the cursor.
293  *
294  * @param insn - See consumeByte().
295  * @param byte - See consumeByte().
296  * @return - See consumeByte().
297  */
298 static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
299 {
300  return insn->reader(insn->readerArg, byte, insn->readerCursor);
301 }
302 
303 static void unconsumeByte(struct InternalInstruction *insn)
304 {
305  insn->readerCursor--;
306 }
307 
308 #define CONSUME_FUNC(name, type) \
309  static int name(struct InternalInstruction *insn, type *ptr) { \
310  type combined = 0; \
311  unsigned offset; \
312  for (offset = 0; offset < sizeof(type); ++offset) { \
313  uint8_t byte; \
314  int ret = insn->reader(insn->readerArg, \
315  &byte, \
316  insn->readerCursor + offset); \
317  if (ret) \
318  return ret; \
319  combined = combined | (type)((uint64_t)byte << (offset * 8)); \
320  } \
321  *ptr = combined; \
322  insn->readerCursor += sizeof(type); \
323  return 0; \
324  }
325 
326 /*
327  * consume* - Use the reader function provided by the user to consume data
328  * values of various sizes from the instruction's memory and advance the
329  * cursor appropriately. These readers perform endian conversion.
330  *
331  * @param insn - See consumeByte().
332  * @param ptr - A pointer to a pre-allocated memory of appropriate size to
333  * be populated with the data read.
334  * @return - See consumeByte().
335  */
336 CONSUME_FUNC(consumeInt8, int8_t)
337 CONSUME_FUNC(consumeInt16, int16_t)
338 CONSUME_FUNC(consumeInt32, int32_t)
339 CONSUME_FUNC(consumeUInt16, uint16_t)
340 CONSUME_FUNC(consumeUInt32, uint32_t)
341 CONSUME_FUNC(consumeUInt64, uint64_t)
342 
343 /*
344  * setPrefixPresent - Marks that a particular prefix is present at a particular
345  * location.
346  *
347  * @param insn - The instruction to be marked as having the prefix.
348  * @param prefix - The prefix that is present.
349  * @param location - The location where the prefix is located (in the address
350  * space of the instruction's reader).
351  */
352 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
353 {
354  switch (prefix) {
355  case 0x26:
356  insn->isPrefix26 = true;
357  insn->prefix26 = location;
358  break;
359  case 0x2e:
360  insn->isPrefix2e = true;
361  insn->prefix2e = location;
362  break;
363  case 0x36:
364  insn->isPrefix36 = true;
365  insn->prefix36 = location;
366  break;
367  case 0x3e:
368  insn->isPrefix3e = true;
369  insn->prefix3e = location;
370  break;
371  case 0x64:
372  insn->isPrefix64 = true;
373  insn->prefix64 = location;
374  break;
375  case 0x65:
376  insn->isPrefix65 = true;
377  insn->prefix65 = location;
378  break;
379  case 0x66:
380  insn->isPrefix66 = true;
381  insn->prefix66 = location;
382  break;
383  case 0x67:
384  insn->isPrefix67 = true;
385  insn->prefix67 = location;
386  break;
387  case 0xf0:
388  insn->isPrefixf0 = true;
389  insn->prefixf0 = location;
390  break;
391  case 0xf2:
392  insn->isPrefixf2 = true;
393  insn->prefixf2 = location;
394  break;
395  case 0xf3:
396  insn->isPrefixf3 = true;
397  insn->prefixf3 = location;
398  break;
399  default:
400  break;
401  }
402 }
403 
404 /*
405  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
406  * present at a given location.
407  *
408  * @param insn - The instruction to be queried.
409  * @param prefix - The prefix.
410  * @param location - The location to query.
411  * @return - Whether the prefix is at that location.
412  */
413 static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix,
414  uint64_t location)
415 {
416  switch (prefix) {
417  case 0x26:
418  if (insn->isPrefix26 && insn->prefix26 == location)
419  return true;
420  break;
421  case 0x2e:
422  if (insn->isPrefix2e && insn->prefix2e == location)
423  return true;
424  break;
425  case 0x36:
426  if (insn->isPrefix36 && insn->prefix36 == location)
427  return true;
428  break;
429  case 0x3e:
430  if (insn->isPrefix3e && insn->prefix3e == location)
431  return true;
432  break;
433  case 0x64:
434  if (insn->isPrefix64 && insn->prefix64 == location)
435  return true;
436  break;
437  case 0x65:
438  if (insn->isPrefix65 && insn->prefix65 == location)
439  return true;
440  break;
441  case 0x66:
442  if (insn->isPrefix66 && insn->prefix66 == location)
443  return true;
444  break;
445  case 0x67:
446  if (insn->isPrefix67 && insn->prefix67 == location)
447  return true;
448  break;
449  case 0xf0:
450  if (insn->isPrefixf0 && insn->prefixf0 == location)
451  return true;
452  break;
453  case 0xf2:
454  if (insn->isPrefixf2 && insn->prefixf2 == location)
455  return true;
456  break;
457  case 0xf3:
458  if (insn->isPrefixf3 && insn->prefixf3 == location)
459  return true;
460  break;
461  default:
462  break;
463  }
464  return false;
465 }
466 
467 /*
468  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
469  * instruction as having them. Also sets the instruction's default operand,
470  * address, and other relevant data sizes to report operands correctly.
471  *
472  * @param insn - The instruction whose prefixes are to be read.
473  * @return - 0 if the instruction could be read until the end of the prefix
474  * bytes, and no prefixes conflicted; nonzero otherwise.
475  */
476 static int readPrefixes(struct InternalInstruction *insn)
477 {
478  bool isPrefix = true;
479  uint64_t prefixLocation;
480  uint8_t byte = 0, nextByte;
481 
482  bool hasAdSize = false;
483  bool hasOpSize = false;
484 
485  //initialize to an impossible value
486  insn->necessaryPrefixLocation = insn->readerCursor - 1;
487  while (isPrefix) {
488  if (insn->mode == MODE_64BIT) {
489  // eliminate consecutive redundant REX bytes in front
490  if (consumeByte(insn, &byte))
491  return -1;
492 
493  if ((byte & 0xf0) == 0x40) {
494  while(true) {
495  if (lookAtByte(insn, &byte)) // out of input code
496  return -1;
497  if ((byte & 0xf0) == 0x40) {
498  // another REX prefix, but we only remember the last one
499  if (consumeByte(insn, &byte))
500  return -1;
501  } else
502  break;
503  }
504 
505  // recover the last REX byte if next byte is not a legacy prefix
506  switch (byte) {
507  case 0xf2: /* REPNE/REPNZ */
508  case 0xf3: /* REP or REPE/REPZ */
509  case 0xf0: /* LOCK */
510  case 0x2e: /* CS segment override -OR- Branch not taken */
511  case 0x36: /* SS segment override -OR- Branch taken */
512  case 0x3e: /* DS segment override */
513  case 0x26: /* ES segment override */
514  case 0x64: /* FS segment override */
515  case 0x65: /* GS segment override */
516  case 0x66: /* Operand-size override */
517  case 0x67: /* Address-size override */
518  break;
519  default: /* Not a prefix byte */
520  unconsumeByte(insn);
521  break;
522  }
523  } else {
524  unconsumeByte(insn);
525  }
526  }
527 
528  prefixLocation = insn->readerCursor;
529 
530  /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
531  if (consumeByte(insn, &byte))
532  return -1;
533 
534  if (insn->readerCursor - 1 == insn->startLocation
535  && (byte == 0xf2 || byte == 0xf3)) {
536 
537  if (lookAtByte(insn, &nextByte))
538  return -1;
539 
540  /*
541  * If the byte is 0xf2 or 0xf3, and any of the following conditions are
542  * met:
543  * - it is followed by a LOCK (0xf0) prefix
544  * - it is followed by an xchg instruction
545  * then it should be disassembled as a xacquire/xrelease not repne/rep.
546  */
547  if (((nextByte == 0xf0) ||
548  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
549  insn->xAcquireRelease = byte;
550 
551  /*
552  * Also if the byte is 0xf3, and the following condition is met:
553  * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
554  * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
555  * then it should be disassembled as an xrelease not rep.
556  */
557  if (byte == 0xf3 &&
558  (nextByte == 0x88 || nextByte == 0x89 ||
559  nextByte == 0xc6 || nextByte == 0xc7))
560  insn->xAcquireRelease = byte;
561 
562  if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
563  if (consumeByte(insn, &nextByte))
564  return -1;
565  if (lookAtByte(insn, &nextByte))
566  return -1;
567  unconsumeByte(insn);
568  }
569  }
570 
571  switch (byte) {
572  case 0xf2: /* REPNE/REPNZ */
573  case 0xf3: /* REP or REPE/REPZ */
574  case 0xf0: /* LOCK */
575  // only accept the last prefix
576  insn->isPrefixf2 = false;
577  insn->isPrefixf3 = false;
578  insn->isPrefixf0 = false;
579  setPrefixPresent(insn, byte, prefixLocation);
580  insn->prefix0 = byte;
581  break;
582  case 0x2e: /* CS segment override -OR- Branch not taken */
584  // only accept the last prefix
585  insn->isPrefix2e = false;
586  insn->isPrefix36 = false;
587  insn->isPrefix3e = false;
588  insn->isPrefix26 = false;
589  insn->isPrefix64 = false;
590  insn->isPrefix65 = false;
591 
592  setPrefixPresent(insn, byte, prefixLocation);
593  insn->prefix1 = byte;
594  break;
595  case 0x36: /* SS segment override -OR- Branch taken */
597  // only accept the last prefix
598  insn->isPrefix2e = false;
599  insn->isPrefix36 = false;
600  insn->isPrefix3e = false;
601  insn->isPrefix26 = false;
602  insn->isPrefix64 = false;
603  insn->isPrefix65 = false;
604 
605  setPrefixPresent(insn, byte, prefixLocation);
606  insn->prefix1 = byte;
607  break;
608  case 0x3e: /* DS segment override */
610  // only accept the last prefix
611  insn->isPrefix2e = false;
612  insn->isPrefix36 = false;
613  insn->isPrefix3e = false;
614  insn->isPrefix26 = false;
615  insn->isPrefix64 = false;
616  insn->isPrefix65 = false;
617 
618  setPrefixPresent(insn, byte, prefixLocation);
619  insn->prefix1 = byte;
620  break;
621  case 0x26: /* ES segment override */
623  // only accept the last prefix
624  insn->isPrefix2e = false;
625  insn->isPrefix36 = false;
626  insn->isPrefix3e = false;
627  insn->isPrefix26 = false;
628  insn->isPrefix64 = false;
629  insn->isPrefix65 = false;
630 
631  setPrefixPresent(insn, byte, prefixLocation);
632  insn->prefix1 = byte;
633  break;
634  case 0x64: /* FS segment override */
636  // only accept the last prefix
637  insn->isPrefix2e = false;
638  insn->isPrefix36 = false;
639  insn->isPrefix3e = false;
640  insn->isPrefix26 = false;
641  insn->isPrefix64 = false;
642  insn->isPrefix65 = false;
643 
644  setPrefixPresent(insn, byte, prefixLocation);
645  insn->prefix1 = byte;
646  break;
647  case 0x65: /* GS segment override */
649  // only accept the last prefix
650  insn->isPrefix2e = false;
651  insn->isPrefix36 = false;
652  insn->isPrefix3e = false;
653  insn->isPrefix26 = false;
654  insn->isPrefix64 = false;
655  insn->isPrefix65 = false;
656 
657  setPrefixPresent(insn, byte, prefixLocation);
658  insn->prefix1 = byte;
659  break;
660  case 0x66: /* Operand-size override */
661  hasOpSize = true;
662  setPrefixPresent(insn, byte, prefixLocation);
663  insn->prefix2 = byte;
664  break;
665  case 0x67: /* Address-size override */
666  hasAdSize = true;
667  setPrefixPresent(insn, byte, prefixLocation);
668  insn->prefix3 = byte;
669  break;
670  default: /* Not a prefix byte */
671  isPrefix = false;
672  break;
673  }
674 
675  //if (isPrefix)
676  // dbgprintf(insn, "Found prefix 0x%hhx", byte);
677  }
678 
680 
681 
682  if (byte == 0x62) {
683  uint8_t byte1, byte2;
684 
685  if (consumeByte(insn, &byte1)) {
686  //dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
687  return -1;
688  }
689 
690  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
691  ((~byte1 & 0xc) == 0xc)) {
692  if (lookAtByte(insn, &byte2)) {
693  //dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
694  return -1;
695  }
696 
697  if ((byte2 & 0x4) == 0x4) {
699  } else {
700  unconsumeByte(insn); /* unconsume byte1 */
701  unconsumeByte(insn); /* unconsume byte */
702  insn->necessaryPrefixLocation = insn->readerCursor - 2;
703  }
704 
705  if (insn->vectorExtensionType == TYPE_EVEX) {
706  insn->vectorExtensionPrefix[0] = byte;
707  insn->vectorExtensionPrefix[1] = byte1;
708 
709  if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
710  //dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
711  return -1;
712  }
713 
714  if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
715  //dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
716  return -1;
717  }
718 
719  /* We simulate the REX prefix for simplicity's sake */
720  if (insn->mode == MODE_64BIT) {
721  insn->rexPrefix = 0x40
722  | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
723  | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
724  | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
725  | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
726  }
727  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
728  default:
729  break;
730  case VEX_PREFIX_66:
731  hasOpSize = true;
732  break;
733  }
734  //dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
735  // insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
736  // insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
737  }
738  } else {
739  // BOUND instruction
740  unconsumeByte(insn); /* unconsume byte1 */
741  unconsumeByte(insn); /* unconsume byte */
742  }
743  } else if (byte == 0xc4) {
744  uint8_t byte1;
745 
746  if (lookAtByte(insn, &byte1)) {
747  //dbgprintf(insn, "Couldn't read second byte of VEX");
748  return -1;
749  }
750 
751  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
753  insn->necessaryPrefixLocation = insn->readerCursor - 1;
754  } else {
755  unconsumeByte(insn);
756  insn->necessaryPrefixLocation = insn->readerCursor - 1;
757  }
758 
759  if (insn->vectorExtensionType == TYPE_VEX_3B) {
760  insn->vectorExtensionPrefix[0] = byte;
761  if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
762  return -1;
763  if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
764  return -1;
765 
766  /* We simulate the REX prefix for simplicity's sake */
767  if (insn->mode == MODE_64BIT) {
768  insn->rexPrefix = 0x40
769  | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
770  | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
771  | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
772  | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
773 
774  }
775  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
776  default:
777  break;
778  case VEX_PREFIX_66:
779  hasOpSize = true;
780  break;
781  }
782  }
783  } else if (byte == 0xc5) {
784  uint8_t byte1;
785 
786  if (lookAtByte(insn, &byte1)) {
787  //dbgprintf(insn, "Couldn't read second byte of VEX");
788  return -1;
789  }
790 
791  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
793  } else {
794  unconsumeByte(insn);
795  }
796 
797  if (insn->vectorExtensionType == TYPE_VEX_2B) {
798  insn->vectorExtensionPrefix[0] = byte;
799  if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
800  return -1;
801 
802  if (insn->mode == MODE_64BIT) {
803  insn->rexPrefix = 0x40
804  | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
805  }
806 
807  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
808  default:
809  break;
810  case VEX_PREFIX_66:
811  hasOpSize = true;
812  break;
813  }
814  }
815  } else if (byte == 0x8f) {
816  uint8_t byte1;
817 
818  if (lookAtByte(insn, &byte1)) {
819  // dbgprintf(insn, "Couldn't read second byte of XOP");
820  return -1;
821  }
822 
823  if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
825  insn->necessaryPrefixLocation = insn->readerCursor - 1;
826  } else {
827  unconsumeByte(insn);
828  insn->necessaryPrefixLocation = insn->readerCursor - 1;
829  }
830 
831  if (insn->vectorExtensionType == TYPE_XOP) {
832  insn->vectorExtensionPrefix[0] = byte;
833  if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
834  return -1;
835  if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
836  return -1;
837 
838  /* We simulate the REX prefix for simplicity's sake */
839  if (insn->mode == MODE_64BIT) {
840  insn->rexPrefix = 0x40
841  | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
842  | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
843  | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
844  | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
845  }
846 
847  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
848  default:
849  break;
850  case VEX_PREFIX_66:
851  hasOpSize = true;
852  break;
853  }
854  }
855  } else {
856  if (insn->mode == MODE_64BIT) {
857  if ((byte & 0xf0) == 0x40) {
858  uint8_t opcodeByte;
859 
860  while(true) {
861  if (lookAtByte(insn, &opcodeByte)) // out of input code
862  return -1;
863  if ((opcodeByte & 0xf0) == 0x40) {
864  // another REX prefix, but we only remember the last one
865  if (consumeByte(insn, &byte))
866  return -1;
867  } else
868  break;
869  }
870 
871  insn->rexPrefix = byte;
872  insn->necessaryPrefixLocation = insn->readerCursor - 2;
873  // dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
874  } else {
875  unconsumeByte(insn);
876  insn->necessaryPrefixLocation = insn->readerCursor - 1;
877  }
878  } else {
879  unconsumeByte(insn);
880  insn->necessaryPrefixLocation = insn->readerCursor - 1;
881  }
882  }
883 
884  if (insn->mode == MODE_16BIT) {
885  insn->registerSize = (hasOpSize ? 4 : 2);
886  insn->addressSize = (hasAdSize ? 4 : 2);
887  insn->displacementSize = (hasAdSize ? 4 : 2);
888  insn->immediateSize = (hasOpSize ? 4 : 2);
889  insn->immSize = (hasOpSize ? 4 : 2);
890  } else if (insn->mode == MODE_32BIT) {
891  insn->registerSize = (hasOpSize ? 2 : 4);
892  insn->addressSize = (hasAdSize ? 2 : 4);
893  insn->displacementSize = (hasAdSize ? 2 : 4);
894  insn->immediateSize = (hasOpSize ? 2 : 4);
895  insn->immSize = (hasOpSize ? 2 : 4);
896  } else if (insn->mode == MODE_64BIT) {
897  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
898  insn->registerSize = 8;
899  insn->addressSize = (hasAdSize ? 4 : 8);
900  insn->displacementSize = 4;
901  insn->immediateSize = 4;
902  insn->immSize = 4;
903  } else if (insn->rexPrefix) {
904  insn->registerSize = (hasOpSize ? 2 : 4);
905  insn->addressSize = (hasAdSize ? 4 : 8);
906  insn->displacementSize = (hasOpSize ? 2 : 4);
907  insn->immediateSize = (hasOpSize ? 2 : 4);
908  insn->immSize = (hasOpSize ? 2 : 4);
909  } else {
910  insn->registerSize = (hasOpSize ? 2 : 4);
911  insn->addressSize = (hasAdSize ? 4 : 8);
912  insn->displacementSize = (hasOpSize ? 2 : 4);
913  insn->immediateSize = (hasOpSize ? 2 : 4);
914  insn->immSize = (hasOpSize ? 4 : 8);
915  }
916  }
917 
918  return 0;
919 }
920 
921 static int readModRM(struct InternalInstruction *insn);
922 
923 /*
924  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
925  * extended or escape opcodes).
926  *
927  * @param insn - The instruction whose opcode is to be read.
928  * @return - 0 if the opcode could be read successfully; nonzero otherwise.
929  */
930 static int readOpcode(struct InternalInstruction *insn)
931 {
932  /* Determine the length of the primary opcode */
933  uint8_t current;
934 
935  // printf(">>> readOpcode() = %x\n", insn->readerCursor);
936 
937  insn->opcodeType = ONEBYTE;
938  insn->firstByte = 0x00;
939 
940  if (insn->vectorExtensionType == TYPE_EVEX) {
941  switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
942  default:
943  // dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
944  // mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
945  return -1;
946  case VEX_LOB_0F:
947  insn->opcodeType = TWOBYTE;
948  return consumeByte(insn, &insn->opcode);
949  case VEX_LOB_0F38:
950  insn->opcodeType = THREEBYTE_38;
951  return consumeByte(insn, &insn->opcode);
952  case VEX_LOB_0F3A:
953  insn->opcodeType = THREEBYTE_3A;
954  return consumeByte(insn, &insn->opcode);
955  }
956  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
957  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
958  default:
959  // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
960  // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
961  return -1;
962  case VEX_LOB_0F:
963  insn->twoByteEscape = 0x0f;
964  insn->opcodeType = TWOBYTE;
965  return consumeByte(insn, &insn->opcode);
966  case VEX_LOB_0F38:
967  insn->twoByteEscape = 0x0f;
968  insn->threeByteEscape = 0x38;
969  insn->opcodeType = THREEBYTE_38;
970  return consumeByte(insn, &insn->opcode);
971  case VEX_LOB_0F3A:
972  insn->twoByteEscape = 0x0f;
973  insn->threeByteEscape = 0x3a;
974  insn->opcodeType = THREEBYTE_3A;
975  return consumeByte(insn, &insn->opcode);
976  }
977  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
978  insn->twoByteEscape = 0x0f;
979  insn->opcodeType = TWOBYTE;
980  return consumeByte(insn, &insn->opcode);
981  } else if (insn->vectorExtensionType == TYPE_XOP) {
982  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
983  default:
984  // dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
985  // mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
986  return -1;
987  case XOP_MAP_SELECT_8:
988  // FIXME: twoByteEscape?
989  insn->opcodeType = XOP8_MAP;
990  return consumeByte(insn, &insn->opcode);
991  case XOP_MAP_SELECT_9:
992  // FIXME: twoByteEscape?
993  insn->opcodeType = XOP9_MAP;
994  return consumeByte(insn, &insn->opcode);
995  case XOP_MAP_SELECT_A:
996  // FIXME: twoByteEscape?
997  insn->opcodeType = XOPA_MAP;
998  return consumeByte(insn, &insn->opcode);
999  }
1000  }
1001 
1002  if (consumeByte(insn, &current))
1003  return -1;
1004 
1005  // save this first byte for MOVcr, MOVdr, MOVrc, MOVrd
1006  insn->firstByte = current;
1007 
1008  if (current == 0x0f) {
1009  // dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
1010 
1011  insn->twoByteEscape = current;
1012 
1013  if (consumeByte(insn, &current))
1014  return -1;
1015 
1016  if (current == 0x38) {
1017  // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1018 
1019  insn->threeByteEscape = current;
1020 
1021  if (consumeByte(insn, &current))
1022  return -1;
1023 
1024  insn->opcodeType = THREEBYTE_38;
1025  } else if (current == 0x3a) {
1026  // dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1027 
1028  insn->threeByteEscape = current;
1029 
1030  if (consumeByte(insn, &current))
1031  return -1;
1032 
1033  insn->opcodeType = THREEBYTE_3A;
1034  } else {
1035 #ifndef CAPSTONE_X86_REDUCE
1036  switch(current) {
1037  default:
1038  // dbgprintf(insn, "Didn't find a three-byte escape prefix");
1039  insn->opcodeType = TWOBYTE;
1040  break;
1041  case 0x0e: // HACK for femms. to be handled properly in next version 3.x
1042  insn->opcodeType = T3DNOW_MAP;
1043  // this encode does not have ModRM
1044  insn->consumedModRM = true;
1045  break;
1046  case 0x0f:
1047  // 3DNow instruction has weird format: ModRM/SIB/displacement + opcode
1048  if (readModRM(insn))
1049  return -1;
1050  // next is 3DNow opcode
1051  if (consumeByte(insn, &current))
1052  return -1;
1053  insn->opcodeType = T3DNOW_MAP;
1054  break;
1055  }
1056 #endif
1057  }
1058  }
1059 
1060  /*
1061  * At this point we have consumed the full opcode.
1062  * Anything we consume from here on must be unconsumed.
1063  */
1064 
1065  insn->opcode = current;
1066 
1067  return 0;
1068 }
1069 
1070 // Hacky for FEMMS
1071 #define GET_INSTRINFO_ENUM
1072 #ifndef CAPSTONE_X86_REDUCE
1073 #include "X86GenInstrInfo.inc"
1074 #else
1075 #include "X86GenInstrInfo_reduce.inc"
1076 #endif
1077 
1078 /*
1079  * getIDWithAttrMask - Determines the ID of an instruction, consuming
1080  * the ModR/M byte as appropriate for extended and escape opcodes,
1081  * and using a supplied attribute mask.
1082  *
1083  * @param instructionID - A pointer whose target is filled in with the ID of the
1084  * instruction.
1085  * @param insn - The instruction whose ID is to be determined.
1086  * @param attrMask - The attribute mask to search.
1087  * @return - 0 if the ModR/M could be read when needed or was not
1088  * needed; nonzero otherwise.
1089  */
1090 static int getIDWithAttrMask(uint16_t *instructionID,
1091  struct InternalInstruction *insn,
1092  uint16_t attrMask)
1093 {
1094  bool hasModRMExtension;
1095 
1096  InstructionContext instructionClass;
1097 
1098 #ifndef CAPSTONE_X86_REDUCE
1099  // HACK for femms. to be handled properly in next version 3.x
1100  if (insn->opcode == 0x0e && insn->opcodeType == T3DNOW_MAP) {
1101  *instructionID = X86_FEMMS;
1102  return 0;
1103  }
1104 #endif
1105 
1106  if (insn->opcodeType == T3DNOW_MAP)
1107  instructionClass = IC_OF;
1108  else
1109  instructionClass = contextForAttrs(attrMask);
1110 
1111  hasModRMExtension = modRMRequired(insn->opcodeType,
1112  instructionClass,
1113  insn->opcode) != 0;
1114 
1115  if (hasModRMExtension) {
1116  if (readModRM(insn))
1117  return -1;
1118 
1119  *instructionID = decode(insn->opcodeType,
1120  instructionClass,
1121  insn->opcode,
1122  insn->modRM);
1123  } else {
1124  *instructionID = decode(insn->opcodeType,
1125  instructionClass,
1126  insn->opcode,
1127  0);
1128  }
1129 
1130  return 0;
1131 }
1132 
1133 /*
1134  * is16BitEquivalent - Determines whether two instruction names refer to
1135  * equivalent instructions but one is 16-bit whereas the other is not.
1136  *
1137  * @param orig - The instruction ID that is not 16-bit
1138  * @param equiv - The instruction ID that is 16-bit
1139  */
1140 static bool is16BitEquivalent(unsigned orig, unsigned equiv)
1141 {
1142  size_t i;
1143  uint16_t idx;
1144 
1145  if ((idx = x86_16_bit_eq_lookup[orig]) != 0) {
1146  for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) {
1147  if (x86_16_bit_eq_tbl[i].second == equiv)
1148  return true;
1149  }
1150  }
1151 
1152  return false;
1153 }
1154 
1155 /*
1156  * is64Bit - Determines whether this instruction is a 64-bit instruction.
1157  *
1158  * @param name - The instruction that is not 16-bit
1159  */
1160 static bool is64Bit(uint16_t id)
1161 {
1162  return is_64bit_insn[id];
1163 }
1164 
1165 /*
1166  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
1167  * appropriate for extended and escape opcodes. Determines the attributes and
1168  * context for the instruction before doing so.
1169  *
1170  * @param insn - The instruction whose ID is to be determined.
1171  * @return - 0 if the ModR/M could be read when needed or was not needed;
1172  * nonzero otherwise.
1173  */
1174 static int getID(struct InternalInstruction *insn)
1175 {
1176  uint16_t attrMask;
1177  uint16_t instructionID;
1178 
1179  // printf(">>> getID()\n");
1180  attrMask = ATTR_NONE;
1181 
1182  if (insn->mode == MODE_64BIT)
1183  attrMask |= ATTR_64BIT;
1184 
1185  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1186  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1187 
1188  if (insn->vectorExtensionType == TYPE_EVEX) {
1189  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1190  case VEX_PREFIX_66:
1191  attrMask |= ATTR_OPSIZE;
1192  break;
1193  case VEX_PREFIX_F3:
1194  attrMask |= ATTR_XS;
1195  break;
1196  case VEX_PREFIX_F2:
1197  attrMask |= ATTR_XD;
1198  break;
1199  }
1200 
1201  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1202  attrMask |= ATTR_EVEXKZ;
1203  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1204  attrMask |= ATTR_EVEXB;
1205  if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1206  attrMask |= ATTR_EVEXK;
1207  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1208  attrMask |= ATTR_EVEXL;
1209  if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1210  attrMask |= ATTR_EVEXL2;
1211  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1212  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1213  case VEX_PREFIX_66:
1214  attrMask |= ATTR_OPSIZE;
1215  break;
1216  case VEX_PREFIX_F3:
1217  attrMask |= ATTR_XS;
1218  break;
1219  case VEX_PREFIX_F2:
1220  attrMask |= ATTR_XD;
1221  break;
1222  }
1223 
1224  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1225  attrMask |= ATTR_VEXL;
1226  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1227  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1228  case VEX_PREFIX_66:
1229  attrMask |= ATTR_OPSIZE;
1230  break;
1231  case VEX_PREFIX_F3:
1232  attrMask |= ATTR_XS;
1233  break;
1234  case VEX_PREFIX_F2:
1235  attrMask |= ATTR_XD;
1236  break;
1237  }
1238 
1239  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1240  attrMask |= ATTR_VEXL;
1241  } else if (insn->vectorExtensionType == TYPE_XOP) {
1242  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1243  case VEX_PREFIX_66:
1244  attrMask |= ATTR_OPSIZE;
1245  break;
1246  case VEX_PREFIX_F3:
1247  attrMask |= ATTR_XS;
1248  break;
1249  case VEX_PREFIX_F2:
1250  attrMask |= ATTR_XD;
1251  break;
1252  }
1253 
1254  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1255  attrMask |= ATTR_VEXL;
1256  } else {
1257  return -1;
1258  }
1259  } else {
1260  if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
1261  attrMask |= ATTR_OPSIZE;
1262  } else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) {
1263  attrMask |= ATTR_ADSIZE;
1264  } else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) {
1265  attrMask |= ATTR_XS;
1266  } else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) {
1267  attrMask |= ATTR_XD;
1268  }
1269  }
1270 
1271  if (insn->rexPrefix & 0x08)
1272  attrMask |= ATTR_REXW;
1273 
1274  /*
1275  * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1276  * of the AdSize prefix is inverted w.r.t. 32-bit mode.
1277  */
1278  if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
1279  insn->opcode == 0xE3)
1280  attrMask ^= ATTR_ADSIZE;
1281 
1282  if (getIDWithAttrMask(&instructionID, insn, attrMask))
1283  return -1;
1284 
1285  /* The following clauses compensate for limitations of the tables. */
1286  if (insn->mode != MODE_64BIT &&
1288  /*
1289  * The tables can't distinquish between cases where the W-bit is used to
1290  * select register size and cases where its a required part of the opcode.
1291  */
1292  if ((insn->vectorExtensionType == TYPE_EVEX &&
1293  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1294  (insn->vectorExtensionType == TYPE_VEX_3B &&
1295  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1296  (insn->vectorExtensionType == TYPE_XOP &&
1297  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1298  uint16_t instructionIDWithREXW;
1299  if (getIDWithAttrMask(&instructionIDWithREXW,
1300  insn, attrMask | ATTR_REXW)) {
1301  insn->instructionID = instructionID;
1302  insn->spec = specifierForUID(instructionID);
1303 
1304  return 0;
1305  }
1306 
1307  // If not a 64-bit instruction. Switch the opcode.
1308  if (!is64Bit(instructionIDWithREXW)) {
1309  insn->instructionID = instructionIDWithREXW;
1310  insn->spec = specifierForUID(instructionIDWithREXW);
1311 
1312  return 0;
1313  }
1314  }
1315  }
1316 
1317  /*
1318  * Absolute moves need special handling.
1319  * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1320  * inverted w.r.t.
1321  * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1322  * any position.
1323  */
1324  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
1325  /* Make sure we observed the prefixes in any position. */
1326  if (insn->isPrefix67)
1327  attrMask |= ATTR_ADSIZE;
1328  if (insn->isPrefix66)
1329  attrMask |= ATTR_OPSIZE;
1330 
1331  /* In 16-bit, invert the attributes. */
1332  if (insn->mode == MODE_16BIT)
1333  attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1334 
1335  if (getIDWithAttrMask(&instructionID, insn, attrMask))
1336  return -1;
1337 
1338  insn->instructionID = instructionID;
1339  insn->spec = specifierForUID(instructionID);
1340 
1341  return 0;
1342  }
1343 
1344  if ((insn->mode == MODE_16BIT || insn->isPrefix66) &&
1345  !(attrMask & ATTR_OPSIZE)) {
1346  /*
1347  * The instruction tables make no distinction between instructions that
1348  * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1349  * particular spot (i.e., many MMX operations). In general we're
1350  * conservative, but in the specific case where OpSize is present but not
1351  * in the right place we check if there's a 16-bit operation.
1352  */
1353 
1354  const struct InstructionSpecifier *spec;
1355  uint16_t instructionIDWithOpsize;
1356 
1357  spec = specifierForUID(instructionID);
1358 
1359  if (getIDWithAttrMask(&instructionIDWithOpsize,
1360  insn, attrMask | ATTR_OPSIZE)) {
1361  /*
1362  * ModRM required with OpSize but not present; give up and return version
1363  * without OpSize set
1364  */
1365 
1366  insn->instructionID = instructionID;
1367  insn->spec = spec;
1368  return 0;
1369  }
1370 
1371  if (is16BitEquivalent(instructionID, instructionIDWithOpsize) &&
1372  (insn->mode == MODE_16BIT) ^ insn->isPrefix66) {
1373  insn->instructionID = instructionIDWithOpsize;
1374  insn->spec = specifierForUID(instructionIDWithOpsize);
1375  } else {
1376  insn->instructionID = instructionID;
1377  insn->spec = spec;
1378  }
1379  return 0;
1380  }
1381 
1382  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1383  insn->rexPrefix & 0x01) {
1384  /*
1385  * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1386  * it should decode as XCHG %r8, %eax.
1387  */
1388 
1389  const struct InstructionSpecifier *spec;
1390  uint16_t instructionIDWithNewOpcode;
1391  const struct InstructionSpecifier *specWithNewOpcode;
1392 
1393  spec = specifierForUID(instructionID);
1394 
1395  /* Borrow opcode from one of the other XCHGar opcodes */
1396  insn->opcode = 0x91;
1397 
1398  if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1399  insn,
1400  attrMask)) {
1401  insn->opcode = 0x90;
1402 
1403  insn->instructionID = instructionID;
1404  insn->spec = spec;
1405  return 0;
1406  }
1407 
1408  specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1409 
1410  /* Change back */
1411  insn->opcode = 0x90;
1412 
1413  insn->instructionID = instructionIDWithNewOpcode;
1414  insn->spec = specWithNewOpcode;
1415 
1416  return 0;
1417  }
1418 
1419  insn->instructionID = instructionID;
1420  insn->spec = specifierForUID(insn->instructionID);
1421 
1422  return 0;
1423 }
1424 
1425 /*
1426  * readSIB - Consumes the SIB byte to determine addressing information for an
1427  * instruction.
1428  *
1429  * @param insn - The instruction whose SIB byte is to be read.
1430  * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1431  */
1432 static int readSIB(struct InternalInstruction *insn)
1433 {
1434  SIBIndex sibIndexBase = SIB_INDEX_NONE;
1435  SIBBase sibBaseBase = SIB_BASE_NONE;
1436  uint8_t index, base;
1437 
1438  // dbgprintf(insn, "readSIB()");
1439 
1440  if (insn->consumedSIB)
1441  return 0;
1442 
1443  insn->consumedSIB = true;
1444 
1445  switch (insn->addressSize) {
1446  case 2:
1447  // dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1448  return -1;
1449  case 4:
1450  sibIndexBase = SIB_INDEX_EAX;
1451  sibBaseBase = SIB_BASE_EAX;
1452  break;
1453  case 8:
1454  sibIndexBase = SIB_INDEX_RAX;
1455  sibBaseBase = SIB_BASE_RAX;
1456  break;
1457  }
1458 
1459  if (consumeByte(insn, &insn->sib))
1460  return -1;
1461 
1462  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1463  if (insn->vectorExtensionType == TYPE_EVEX)
1464  index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1465 
1466  switch (index) {
1467  case 0x4:
1468  insn->sibIndex = SIB_INDEX_NONE;
1469  break;
1470  default:
1471  insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1472  if (insn->sibIndex == SIB_INDEX_sib ||
1473  insn->sibIndex == SIB_INDEX_sib64)
1474  insn->sibIndex = SIB_INDEX_NONE;
1475  break;
1476  }
1477 
1478  switch (scaleFromSIB(insn->sib)) {
1479  case 0:
1480  insn->sibScale = 1;
1481  break;
1482  case 1:
1483  insn->sibScale = 2;
1484  break;
1485  case 2:
1486  insn->sibScale = 4;
1487  break;
1488  case 3:
1489  insn->sibScale = 8;
1490  break;
1491  }
1492 
1493  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1494 
1495  switch (base) {
1496  case 0x5:
1497  case 0xd:
1498  switch (modFromModRM(insn->modRM)) {
1499  case 0x0:
1500  insn->eaDisplacement = EA_DISP_32;
1501  insn->sibBase = SIB_BASE_NONE;
1502  break;
1503  case 0x1:
1504  insn->eaDisplacement = EA_DISP_8;
1505  insn->sibBase = (SIBBase)(sibBaseBase + base);
1506  break;
1507  case 0x2:
1508  insn->eaDisplacement = EA_DISP_32;
1509  insn->sibBase = (SIBBase)(sibBaseBase + base);
1510  break;
1511  case 0x3:
1512  //debug("Cannot have Mod = 0b11 and a SIB byte");
1513  return -1;
1514  }
1515  break;
1516  default:
1517  insn->sibBase = (SIBBase)(sibBaseBase + base);
1518  break;
1519  }
1520 
1521  return 0;
1522 }
1523 
1524 /*
1525  * readDisplacement - Consumes the displacement of an instruction.
1526  *
1527  * @param insn - The instruction whose displacement is to be read.
1528  * @return - 0 if the displacement byte was successfully read; nonzero
1529  * otherwise.
1530  */
1531 static int readDisplacement(struct InternalInstruction *insn)
1532 {
1533  int8_t d8;
1534  int16_t d16;
1535  int32_t d32;
1536 
1537  // dbgprintf(insn, "readDisplacement()");
1538 
1539  if (insn->consumedDisplacement)
1540  return 0;
1541 
1542  insn->consumedDisplacement = true;
1543  insn->displacementOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1544 
1545  switch (insn->eaDisplacement) {
1546  case EA_DISP_NONE:
1547  insn->consumedDisplacement = false;
1548  break;
1549  case EA_DISP_8:
1550  if (consumeInt8(insn, &d8))
1551  return -1;
1552  insn->displacement = d8;
1553  break;
1554  case EA_DISP_16:
1555  if (consumeInt16(insn, &d16))
1556  return -1;
1557  insn->displacement = d16;
1558  break;
1559  case EA_DISP_32:
1560  if (consumeInt32(insn, &d32))
1561  return -1;
1562  insn->displacement = d32;
1563  break;
1564  }
1565 
1566  return 0;
1567 }
1568 
1569 /*
1570  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1571  * displacement) for an instruction and interprets it.
1572  *
1573  * @param insn - The instruction whose addressing information is to be read.
1574  * @return - 0 if the information was successfully read; nonzero otherwise.
1575  */
1576 static int readModRM(struct InternalInstruction *insn)
1577 {
1578  uint8_t mod, rm, reg;
1579 
1580  // dbgprintf(insn, "readModRM()");
1581 
1582  // already got ModRM byte?
1583  if (insn->consumedModRM)
1584  return 0;
1585 
1586  insn->modRMOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1587 
1588  if (consumeByte(insn, &insn->modRM))
1589  return -1;
1590 
1591  // mark that we already got ModRM
1592  insn->consumedModRM = true;
1593 
1594  // save original ModRM for later reference
1595  insn->orgModRM = insn->modRM;
1596 
1597  // handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 3
1598  if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) &&
1599  (insn->opcode >= 0x20 && insn->opcode <= 0x23 ))
1600  insn->modRM |= 0xC0;
1601 
1602  mod = modFromModRM(insn->modRM);
1603  rm = rmFromModRM(insn->modRM);
1604  reg = regFromModRM(insn->modRM);
1605 
1606  /*
1607  * This goes by insn->registerSize to pick the correct register, which messes
1608  * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1609  * fixupReg().
1610  */
1611  switch (insn->registerSize) {
1612  case 2:
1613  insn->regBase = MODRM_REG_AX;
1614  insn->eaRegBase = EA_REG_AX;
1615  break;
1616  case 4:
1617  insn->regBase = MODRM_REG_EAX;
1618  insn->eaRegBase = EA_REG_EAX;
1619  break;
1620  case 8:
1621  insn->regBase = MODRM_REG_RAX;
1622  insn->eaRegBase = EA_REG_RAX;
1623  break;
1624  }
1625 
1626  reg |= rFromREX(insn->rexPrefix) << 3;
1627  rm |= bFromREX(insn->rexPrefix) << 3;
1628  if (insn->vectorExtensionType == TYPE_EVEX) {
1629  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1630  rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1631  }
1632 
1633  insn->reg = (Reg)(insn->regBase + reg);
1634 
1635  switch (insn->addressSize) {
1636  case 2:
1637  insn->eaBaseBase = EA_BASE_BX_SI;
1638 
1639  switch (mod) {
1640  case 0x0:
1641  if (rm == 0x6) {
1642  insn->eaBase = EA_BASE_NONE;
1643  insn->eaDisplacement = EA_DISP_16;
1644  if (readDisplacement(insn))
1645  return -1;
1646  } else {
1647  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1648  insn->eaDisplacement = EA_DISP_NONE;
1649  }
1650  break;
1651  case 0x1:
1652  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1653  insn->eaDisplacement = EA_DISP_8;
1654  insn->displacementSize = 1;
1655  if (readDisplacement(insn))
1656  return -1;
1657  break;
1658  case 0x2:
1659  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1660  insn->eaDisplacement = EA_DISP_16;
1661  if (readDisplacement(insn))
1662  return -1;
1663  break;
1664  case 0x3:
1665  insn->eaBase = (EABase)(insn->eaRegBase + rm);
1666  insn->eaDisplacement = EA_DISP_NONE;
1667  if (readDisplacement(insn))
1668  return -1;
1669  break;
1670  }
1671  break;
1672  case 4:
1673  case 8:
1674  insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1675 
1676  switch (mod) {
1677  case 0x0:
1678  insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1679  switch (rm) {
1680  case 0x14:
1681  case 0x4:
1682  case 0xc: /* in case REXW.b is set */
1683  insn->eaBase = (insn->addressSize == 4 ?
1684  EA_BASE_sib : EA_BASE_sib64);
1685  if (readSIB(insn) || readDisplacement(insn))
1686  return -1;
1687  break;
1688  case 0x5:
1689  case 0xd:
1690  insn->eaBase = EA_BASE_NONE;
1691  insn->eaDisplacement = EA_DISP_32;
1692  if (readDisplacement(insn))
1693  return -1;
1694  break;
1695  default:
1696  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1697  break;
1698  }
1699 
1700  break;
1701  case 0x1:
1702  insn->displacementSize = 1;
1703  /* FALLTHROUGH */
1704  case 0x2:
1705  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1706  switch (rm) {
1707  case 0x14:
1708  case 0x4:
1709  case 0xc: /* in case REXW.b is set */
1710  insn->eaBase = EA_BASE_sib;
1711  if (readSIB(insn) || readDisplacement(insn))
1712  return -1;
1713  break;
1714  default:
1715  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1716  if (readDisplacement(insn))
1717  return -1;
1718  break;
1719  }
1720  break;
1721  case 0x3:
1722  insn->eaDisplacement = EA_DISP_NONE;
1723  insn->eaBase = (EABase)(insn->eaRegBase + rm);
1724  break;
1725  }
1726  break;
1727  } /* switch (insn->addressSize) */
1728 
1729  return 0;
1730 }
1731 
1732 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1733  static uint8_t name(struct InternalInstruction *insn, \
1734  OperandType type, \
1735  uint8_t index, \
1736  uint8_t *valid) { \
1737  *valid = 1; \
1738  switch (type) { \
1739  default: \
1740  *valid = 0; \
1741  return 0; \
1742  case TYPE_Rv: \
1743  return base + index; \
1744  case TYPE_R8: \
1745  if (insn->rexPrefix && \
1746  index >= 4 && index <= 7) { \
1747  return prefix##_SPL + (index - 4); \
1748  } else { \
1749  return prefix##_AL + index; \
1750  } \
1751  case TYPE_R16: \
1752  return prefix##_AX + index; \
1753  case TYPE_R32: \
1754  return prefix##_EAX + index; \
1755  case TYPE_R64: \
1756  return prefix##_RAX + index; \
1757  case TYPE_XMM512: \
1758  return prefix##_ZMM0 + index; \
1759  case TYPE_XMM256: \
1760  return prefix##_YMM0 + index; \
1761  case TYPE_XMM128: \
1762  case TYPE_XMM64: \
1763  case TYPE_XMM32: \
1764  case TYPE_XMM: \
1765  return prefix##_XMM0 + index; \
1766  case TYPE_VK1: \
1767  case TYPE_VK8: \
1768  case TYPE_VK16: \
1769  if (index > 7) \
1770  *valid = 0; \
1771  return prefix##_K0 + index; \
1772  case TYPE_MM64: \
1773  return prefix##_MM0 + (index & 0x7); \
1774  case TYPE_SEGMENTREG: \
1775  if (index > 5) \
1776  *valid = 0; \
1777  return prefix##_ES + index; \
1778  case TYPE_DEBUGREG: \
1779  return prefix##_DR0 + index; \
1780  case TYPE_CONTROLREG: \
1781  return prefix##_CR0 + index; \
1782  } \
1783  }
1784 
1785 
1786 /*
1787  * fixup*Value - Consults an operand type to determine the meaning of the
1788  * reg or R/M field. If the operand is an XMM operand, for example, an
1789  * operand would be XMM0 instead of AX, which readModRM() would otherwise
1790  * misinterpret it as.
1791  *
1792  * @param insn - The instruction containing the operand.
1793  * @param type - The operand type.
1794  * @param index - The existing value of the field as reported by readModRM().
1795  * @param valid - The address of a uint8_t. The target is set to 1 if the
1796  * field is valid for the register class; 0 if not.
1797  * @return - The proper value.
1798  */
1799 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1800 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1801 
1802 /*
1803  * fixupReg - Consults an operand specifier to determine which of the
1804  * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1805  *
1806  * @param insn - See fixup*Value().
1807  * @param op - The operand specifier.
1808  * @return - 0 if fixup was successful; -1 if the register returned was
1809  * invalid for its class.
1810  */
1811 static int fixupReg(struct InternalInstruction *insn,
1812  const struct OperandSpecifier *op)
1813 {
1814  uint8_t valid;
1815 
1816  // dbgprintf(insn, "fixupReg()");
1817 
1818  switch ((OperandEncoding)op->encoding) {
1819  default:
1820  //debug("Expected a REG or R/M encoding in fixupReg");
1821  return -1;
1822  case ENCODING_VVVV:
1823  insn->vvvv = (Reg)fixupRegValue(insn,
1824  (OperandType)op->type,
1825  insn->vvvv,
1826  &valid);
1827  if (!valid)
1828  return -1;
1829  break;
1830  case ENCODING_REG:
1831  insn->reg = (Reg)fixupRegValue(insn,
1832  (OperandType)op->type,
1833  (uint8_t)(insn->reg - insn->regBase),
1834  &valid);
1835  if (!valid)
1836  return -1;
1837  break;
1839  if (insn->eaBase >= insn->eaRegBase) {
1840  insn->eaBase = (EABase)fixupRMValue(insn,
1841  (OperandType)op->type,
1842  (uint8_t)(insn->eaBase - insn->eaRegBase),
1843  &valid);
1844  if (!valid)
1845  return -1;
1846  }
1847  break;
1848  }
1849 
1850  return 0;
1851 }
1852 
1853 /*
1854  * readOpcodeRegister - Reads an operand from the opcode field of an
1855  * instruction and interprets it appropriately given the operand width.
1856  * Handles AddRegFrm instructions.
1857  *
1858  * @param insn - the instruction whose opcode field is to be read.
1859  * @param size - The width (in bytes) of the register being specified.
1860  * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1861  * RAX.
1862  * @return - 0 on success; nonzero otherwise.
1863  */
1864 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
1865 {
1866  // dbgprintf(insn, "readOpcodeRegister()");
1867 
1868  if (size == 0)
1869  size = insn->registerSize;
1870 
1871  insn->operandSize = size;
1872 
1873  switch (size) {
1874  case 1:
1875  insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1876  | (insn->opcode & 7)));
1877  if (insn->rexPrefix &&
1878  insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1879  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1880  insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1881  + (insn->opcodeRegister - MODRM_REG_AL - 4));
1882  }
1883 
1884  break;
1885  case 2:
1886  insn->opcodeRegister = (Reg)(MODRM_REG_AX
1887  + ((bFromREX(insn->rexPrefix) << 3)
1888  | (insn->opcode & 7)));
1889  break;
1890  case 4:
1891  insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1892  + ((bFromREX(insn->rexPrefix) << 3)
1893  | (insn->opcode & 7)));
1894  break;
1895  case 8:
1896  insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1897  + ((bFromREX(insn->rexPrefix) << 3)
1898  | (insn->opcode & 7)));
1899  break;
1900  }
1901 
1902  return 0;
1903 }
1904 
1905 /*
1906  * readImmediate - Consumes an immediate operand from an instruction, given the
1907  * desired operand size.
1908  *
1909  * @param insn - The instruction whose operand is to be read.
1910  * @param size - The width (in bytes) of the operand.
1911  * @return - 0 if the immediate was successfully consumed; nonzero
1912  * otherwise.
1913  */
1914 static int readImmediate(struct InternalInstruction *insn, uint8_t size)
1915 {
1916  uint8_t imm8;
1917  uint16_t imm16;
1918  uint32_t imm32;
1919  uint64_t imm64;
1920 
1921  // dbgprintf(insn, "readImmediate()");
1922 
1923  if (insn->numImmediatesConsumed == 2) {
1924  //debug("Already consumed two immediates");
1925  return -1;
1926  }
1927 
1928  if (size == 0)
1929  size = insn->immediateSize;
1930  else
1931  insn->immediateSize = size;
1932  insn->immediateOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1933 
1934  switch (size) {
1935  case 1:
1936  if (consumeByte(insn, &imm8))
1937  return -1;
1938  insn->immediates[insn->numImmediatesConsumed] = imm8;
1939  break;
1940  case 2:
1941  if (consumeUInt16(insn, &imm16))
1942  return -1;
1943  insn->immediates[insn->numImmediatesConsumed] = imm16;
1944  break;
1945  case 4:
1946  if (consumeUInt32(insn, &imm32))
1947  return -1;
1948  insn->immediates[insn->numImmediatesConsumed] = imm32;
1949  break;
1950  case 8:
1951  if (consumeUInt64(insn, &imm64))
1952  return -1;
1953  insn->immediates[insn->numImmediatesConsumed] = imm64;
1954  break;
1955  }
1956 
1957  insn->numImmediatesConsumed++;
1958 
1959  return 0;
1960 }
1961 
1962 /*
1963  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1964  *
1965  * @param insn - The instruction whose operand is to be read.
1966  * @return - 0 if the vvvv was successfully consumed; nonzero
1967  * otherwise.
1968  */
1969 static int readVVVV(struct InternalInstruction *insn)
1970 {
1971  int vvvv;
1972  // dbgprintf(insn, "readVVVV()");
1973 
1974  if (insn->vectorExtensionType == TYPE_EVEX)
1975  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1977  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1978  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1979  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1980  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1981  else if (insn->vectorExtensionType == TYPE_XOP)
1982  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1983  else
1984  return -1;
1985 
1986  if (insn->mode != MODE_64BIT)
1987  vvvv &= 0x7;
1988 
1989  insn->vvvv = vvvv;
1990 
1991  return 0;
1992 }
1993 
1994 /*
1995  * readMaskRegister - Reads an mask register from the opcode field of an
1996  * instruction.
1997  *
1998  * @param insn - The instruction whose opcode field is to be read.
1999  * @return - 0 on success; nonzero otherwise.
2000  */
2001 static int readMaskRegister(struct InternalInstruction *insn)
2002 {
2003  // dbgprintf(insn, "readMaskRegister()");
2004 
2005  if (insn->vectorExtensionType != TYPE_EVEX)
2006  return -1;
2007 
2009 
2010  return 0;
2011 }
2012 
2013 /*
2014  * readOperands - Consults the specifier for an instruction and consumes all
2015  * operands for that instruction, interpreting them as it goes.
2016  *
2017  * @param insn - The instruction whose operands are to be read and interpreted.
2018  * @return - 0 if all operands could be read; nonzero otherwise.
2019  */
2020 static int readOperands(struct InternalInstruction *insn)
2021 {
2022  int index;
2023  int hasVVVV, needVVVV;
2024  int sawRegImm = 0;
2025 
2026  // printf(">>> readOperands(): ID = %u\n", insn->instructionID);
2027  /* If non-zero vvvv specified, need to make sure one of the operands
2028  uses it. */
2029  hasVVVV = !readVVVV(insn);
2030  needVVVV = hasVVVV && (insn->vvvv != 0);
2031 
2032  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
2033  //printf(">>> encoding[%u] = %u\n", index, x86OperandSets[insn->spec->operands][index].encoding);
2034  switch (x86OperandSets[insn->spec->operands][index].encoding) {
2035  case ENCODING_NONE:
2036  case ENCODING_SI:
2037  case ENCODING_DI:
2038  break;
2039  case ENCODING_REG:
2041  if (readModRM(insn))
2042  return -1;
2043  if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2044  return -1;
2045  // Apply the AVX512 compressed displacement scaling factor.
2046  if (x86OperandSets[insn->spec->operands][index].encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
2047  insn->displacement *= (int64_t)1 << (x86OperandSets[insn->spec->operands][index].encoding - ENCODING_RM);
2048  break;
2049  case ENCODING_CB:
2050  case ENCODING_CW:
2051  case ENCODING_CD:
2052  case ENCODING_CP:
2053  case ENCODING_CO:
2054  case ENCODING_CT:
2055  // dbgprintf(insn, "We currently don't hande code-offset encodings");
2056  return -1;
2057  case ENCODING_IB:
2058  if (sawRegImm) {
2059  /* Saw a register immediate so don't read again and instead split the
2060  previous immediate. FIXME: This is a hack. */
2061  insn->immediates[insn->numImmediatesConsumed] =
2062  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
2063  ++insn->numImmediatesConsumed;
2064  break;
2065  }
2066  if (readImmediate(insn, 1))
2067  return -1;
2068  if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
2069  x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
2070  sawRegImm = 1;
2071  break;
2072  case ENCODING_IW:
2073  if (readImmediate(insn, 2))
2074  return -1;
2075  break;
2076  case ENCODING_ID:
2077  if (readImmediate(insn, 4))
2078  return -1;
2079  break;
2080  case ENCODING_IO:
2081  if (readImmediate(insn, 8))
2082  return -1;
2083  break;
2084  case ENCODING_Iv:
2085  if (readImmediate(insn, insn->immediateSize))
2086  return -1;
2087  break;
2088  case ENCODING_Ia:
2089  if (readImmediate(insn, insn->addressSize))
2090  return -1;
2091  /* Direct memory-offset (moffset) immediate will get mapped
2092  to memory operand later. We want the encoding info to
2093  reflect that as well. */
2094  insn->displacementOffset = insn->immediateOffset;
2095  insn->consumedDisplacement = true;
2096  insn->displacementSize = insn->immediateSize;
2097  insn->displacement = insn->immediates[insn->numImmediatesConsumed - 1];
2098  insn->immediateOffset = 0;
2099  insn->immediateSize = 0;
2100  break;
2101  case ENCODING_RB:
2102  if (readOpcodeRegister(insn, 1))
2103  return -1;
2104  break;
2105  case ENCODING_RW:
2106  if (readOpcodeRegister(insn, 2))
2107  return -1;
2108  break;
2109  case ENCODING_RD:
2110  if (readOpcodeRegister(insn, 4))
2111  return -1;
2112  break;
2113  case ENCODING_RO:
2114  if (readOpcodeRegister(insn, 8))
2115  return -1;
2116  break;
2117  case ENCODING_Rv:
2118  if (readOpcodeRegister(insn, 0))
2119  return -1;
2120  break;
2121  case ENCODING_FP:
2122  break;
2123  case ENCODING_VVVV:
2124  needVVVV = 0; /* Mark that we have found a VVVV operand. */
2125  if (!hasVVVV)
2126  return -1;
2127  if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2128  return -1;
2129  break;
2130  case ENCODING_WRITEMASK:
2131  if (readMaskRegister(insn))
2132  return -1;
2133  break;
2134  case ENCODING_DUP:
2135  break;
2136  default:
2137  // dbgprintf(insn, "Encountered an operand with an unknown encoding.");
2138  return -1;
2139  }
2140  }
2141 
2142  /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
2143  if (needVVVV) return -1;
2144 
2145  return 0;
2146 }
2147 
2148 // return True if instruction is illegal to use with prefixes
2149 // This also check & fix the isPrefixNN when a prefix is irrelevant.
2150 static bool checkPrefix(struct InternalInstruction *insn)
2151 {
2152  // LOCK prefix
2153  if (insn->isPrefixf0) {
2154  switch(insn->instructionID) {
2155  default:
2156  // invalid LOCK
2157  return true;
2158 
2159  // nop dword [rax]
2160  case X86_NOOPL:
2161 
2162  // DEC
2163  case X86_DEC16m:
2164  case X86_DEC32m:
2165  case X86_DEC64m:
2166  case X86_DEC8m:
2167 
2168  // ADC
2169  case X86_ADC16mi:
2170  case X86_ADC16mi8:
2171  case X86_ADC16mr:
2172  case X86_ADC32mi:
2173  case X86_ADC32mi8:
2174  case X86_ADC32mr:
2175  case X86_ADC64mi32:
2176  case X86_ADC64mi8:
2177  case X86_ADC64mr:
2178  case X86_ADC8mi:
2179  case X86_ADC8mi8:
2180  case X86_ADC8mr:
2181  case X86_ADC8rm:
2182  case X86_ADC16rm:
2183  case X86_ADC32rm:
2184  case X86_ADC64rm:
2185 
2186  // ADD
2187  case X86_ADD16mi:
2188  case X86_ADD16mi8:
2189  case X86_ADD16mr:
2190  case X86_ADD32mi:
2191  case X86_ADD32mi8:
2192  case X86_ADD32mr:
2193  case X86_ADD64mi32:
2194  case X86_ADD64mi8:
2195  case X86_ADD64mr:
2196  case X86_ADD8mi:
2197  case X86_ADD8mi8:
2198  case X86_ADD8mr:
2199  case X86_ADD8rm:
2200  case X86_ADD16rm:
2201  case X86_ADD32rm:
2202  case X86_ADD64rm:
2203 
2204  // AND
2205  case X86_AND16mi:
2206  case X86_AND16mi8:
2207  case X86_AND16mr:
2208  case X86_AND32mi:
2209  case X86_AND32mi8:
2210  case X86_AND32mr:
2211  case X86_AND64mi32:
2212  case X86_AND64mi8:
2213  case X86_AND64mr:
2214  case X86_AND8mi:
2215  case X86_AND8mi8:
2216  case X86_AND8mr:
2217  case X86_AND8rm:
2218  case X86_AND16rm:
2219  case X86_AND32rm:
2220  case X86_AND64rm:
2221 
2222 
2223  // BTC
2224  case X86_BTC16mi8:
2225  case X86_BTC16mr:
2226  case X86_BTC32mi8:
2227  case X86_BTC32mr:
2228  case X86_BTC64mi8:
2229  case X86_BTC64mr:
2230 
2231  // BTR
2232  case X86_BTR16mi8:
2233  case X86_BTR16mr:
2234  case X86_BTR32mi8:
2235  case X86_BTR32mr:
2236  case X86_BTR64mi8:
2237  case X86_BTR64mr:
2238 
2239  // BTS
2240  case X86_BTS16mi8:
2241  case X86_BTS16mr:
2242  case X86_BTS32mi8:
2243  case X86_BTS32mr:
2244  case X86_BTS64mi8:
2245  case X86_BTS64mr:
2246 
2247  // CMPXCHG
2248  case X86_CMPXCHG16B:
2249  case X86_CMPXCHG16rm:
2250  case X86_CMPXCHG32rm:
2251  case X86_CMPXCHG64rm:
2252  case X86_CMPXCHG8rm:
2253  case X86_CMPXCHG8B:
2254 
2255  // INC
2256  case X86_INC16m:
2257  case X86_INC32m:
2258  case X86_INC64m:
2259  case X86_INC8m:
2260 
2261  // NEG
2262  case X86_NEG16m:
2263  case X86_NEG32m:
2264  case X86_NEG64m:
2265  case X86_NEG8m:
2266 
2267  // NOT
2268  case X86_NOT16m:
2269  case X86_NOT32m:
2270  case X86_NOT64m:
2271  case X86_NOT8m:
2272 
2273  // OR
2274  case X86_OR16mi:
2275  case X86_OR16mi8:
2276  case X86_OR16mr:
2277  case X86_OR32mi:
2278  case X86_OR32mi8:
2279  case X86_OR32mr:
2280  case X86_OR32mrLocked:
2281  case X86_OR64mi32:
2282  case X86_OR64mi8:
2283  case X86_OR64mr:
2284  case X86_OR8mi8:
2285  case X86_OR8mi:
2286  case X86_OR8mr:
2287  case X86_OR8rm:
2288  case X86_OR16rm:
2289  case X86_OR32rm:
2290  case X86_OR64rm:
2291 
2292  // SBB
2293  case X86_SBB16mi:
2294  case X86_SBB16mi8:
2295  case X86_SBB16mr:
2296  case X86_SBB32mi:
2297  case X86_SBB32mi8:
2298  case X86_SBB32mr:
2299  case X86_SBB64mi32:
2300  case X86_SBB64mi8:
2301  case X86_SBB64mr:
2302  case X86_SBB8mi:
2303  case X86_SBB8mi8:
2304  case X86_SBB8mr:
2305 
2306  // SUB
2307  case X86_SUB16mi:
2308  case X86_SUB16mi8:
2309  case X86_SUB16mr:
2310  case X86_SUB32mi:
2311  case X86_SUB32mi8:
2312  case X86_SUB32mr:
2313  case X86_SUB64mi32:
2314  case X86_SUB64mi8:
2315  case X86_SUB64mr:
2316  case X86_SUB8mi8:
2317  case X86_SUB8mi:
2318  case X86_SUB8mr:
2319  case X86_SUB8rm:
2320  case X86_SUB16rm:
2321  case X86_SUB32rm:
2322  case X86_SUB64rm:
2323 
2324  // XADD
2325  case X86_XADD16rm:
2326  case X86_XADD32rm:
2327  case X86_XADD64rm:
2328  case X86_XADD8rm:
2329 
2330  // XCHG
2331  case X86_XCHG16rm:
2332  case X86_XCHG32rm:
2333  case X86_XCHG64rm:
2334  case X86_XCHG8rm:
2335 
2336  // XOR
2337  case X86_XOR16mi:
2338  case X86_XOR16mi8:
2339  case X86_XOR16mr:
2340  case X86_XOR32mi:
2341  case X86_XOR32mi8:
2342  case X86_XOR32mr:
2343  case X86_XOR64mi32:
2344  case X86_XOR64mi8:
2345  case X86_XOR64mr:
2346  case X86_XOR8mi8:
2347  case X86_XOR8mi:
2348  case X86_XOR8mr:
2349  case X86_XOR8rm:
2350  case X86_XOR16rm:
2351  case X86_XOR32rm:
2352  case X86_XOR64rm:
2353 
2354  // this instruction can be used with LOCK prefix
2355  return false;
2356  }
2357  }
2358 
2359  // REPNE prefix
2360  if (insn->isPrefixf2) {
2361  // 0xf2 can be a part of instruction encoding, but not really a prefix.
2362  // In such a case, clear it.
2363  if (insn->twoByteEscape == 0x0f) {
2364  insn->prefix0 = 0;
2365  }
2366  }
2367 
2368  // no invalid prefixes
2369  return false;
2370 }
2371 
2372 /*
2373  * decodeInstruction - Reads and interprets a full instruction provided by the
2374  * user.
2375  *
2376  * @param insn - A pointer to the instruction to be populated. Must be
2377  * pre-allocated.
2378  * @param reader - The function to be used to read the instruction's bytes.
2379  * @param readerArg - A generic argument to be passed to the reader to store
2380  * any internal state.
2381  * @param startLoc - The address (in the reader's address space) of the first
2382  * byte in the instruction.
2383  * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
2384  * decode the instruction in.
2385  * @return - 0 if instruction is valid; nonzero if not.
2386  */
2387 int decodeInstruction(struct InternalInstruction *insn,
2389  const void *readerArg,
2390  uint64_t startLoc,
2392 {
2393  insn->reader = reader;
2394  insn->readerArg = readerArg;
2395  insn->startLocation = startLoc;
2396  insn->readerCursor = startLoc;
2397  insn->mode = mode;
2398 
2399  if (readPrefixes(insn) ||
2400  readOpcode(insn) ||
2401  getID(insn) ||
2402  insn->instructionID == 0 ||
2403  checkPrefix(insn) ||
2404  readOperands(insn))
2405  return -1;
2406 
2407  insn->length = (size_t)(insn->readerCursor - insn->startLocation);
2408 
2409  // instruction length must be <= 15 to be valid
2410  if (insn->length > 15)
2411  return -1;
2412 
2413  if (insn->operandSize == 0)
2414  insn->operandSize = insn->registerSize;
2415 
2416  insn->operands = &x86OperandSets[insn->spec->operands][0];
2417 
2418  return 0;
2419 }
2420 
2421 #endif
#define TWOBYTE_SYM
#define T3DNOW_MAP_SYM
#define XOP9_MAP_SYM
#define CASE_ENCODING_RM
#define CONTEXTS_SYM
#define X86_MAX_OPERANDS
#define INSTRUCTIONS_SYM
#define THREEBYTE3A_SYM
#define XOP8_MAP_SYM
#define THREEBYTE38_SYM
#define XOPA_MAP_SYM
uint16_t InstrUID
#define ONEBYTE_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define rFromREX(rex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
@ SIB_INDEX_NONE
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
int decodeInstruction(struct InternalInstruction *insn, byteReader_t reader, const void *readerArg, uint64_t startLoc, DisassemblerMode mode)
#define baseFromSIB(sib)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define wFromREX(rex)
#define lFromXOP3of3(xop)
@ TYPE_NO_VEX_XOP
@ SEG_OVERRIDE_ES
@ SEG_OVERRIDE_CS
@ SEG_OVERRIDE_GS
@ SEG_OVERRIDE_SS
@ SEG_OVERRIDE_FS
@ SEG_OVERRIDE_DS
#define lFromVEX2of2(vex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define vvvvFromVEX2of2(vex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
@ XOP_MAP_SELECT_9
@ XOP_MAP_SELECT_A
@ XOP_MAP_SELECT_8
#define mmFromEVEX2of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
int(* byteReader_t)(const struct reader_info *arg, uint8_t *byte, uint64_t address)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define vvvvFromXOP3of3(vex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define wFromXOP3of3(xop)
#define vvvvFromEVEX3of4(evex)
#define xFromREX(rex)
#define bFromREX(rex)
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
int mod(int a, int b)
Definition: crypto_rot.c:8
int(* decode)(const ut8 *, ebc_command_t *cmd)
Definition: ebc_disas.c:88
unsigned short prefix[65536]
Definition: gun.c:163
voidpf void uLong size
Definition: ioapi.h:138
const char int mode
Definition: ioapi.h:137
static const char d32[]
Definition: iob_net.c:42
#define reg(n)
int type
Definition: mipsasm.c:17
int idx
Definition: setup.py:197
#define ARR_SIZE(a)
Definition: ocaml.c:13
int id
Definition: op.c:540
unsigned short uint16_t
Definition: sftypes.h:30
long int64_t
Definition: sftypes.h:32
int int32_t
Definition: sftypes.h:33
int size_t
Definition: sftypes.h:40
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
short int16_t
Definition: sftypes.h:34
unsigned char uint8_t
Definition: sftypes.h:31
char int8_t
Definition: sftypes.h:35
const struct InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
const struct OperandSpecifier * operands
SegmentOverride segmentOverride
void reader(void *n)
Definition: main.c:8
bool valid
Definition: core.c:77
Definition: dis.c:32