Rizin
unix-like reverse engineering framework and cli tools
asm.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2009-2021 pancake <pancake@nopcode.org>
2 // SPDX-FileCopyrightText: 2009-2021 nibble <nibble.ds@gmail.com>
3 // SPDX-License-Identifier: LGPL-3.0-only
4 
5 #include "rz_util/rz_print.h"
6 #include <rz_vector.h>
7 #include <rz_util/rz_strbuf.h>
8 #include <rz_regex.h>
9 #include <rz_util/rz_assert.h>
10 #include <rz_list.h>
11 #include <stdio.h>
12 #include <rz_core.h>
13 #include <rz_types.h>
14 #include <rz_util.h>
15 #include <rz_asm.h>
16 #define USE_R2 1
17 #include <spp.h>
18 #include <config.h>
19 
21 
30 static bool is_num(const char *c) {
31  rz_return_val_if_fail(c, false);
32  if (!isascii(*c)) {
33  return false; // UTF-8
34  }
35  return rz_num_is_hex_prefix(c) || isxdigit(c[0]);
36 }
37 
45 static bool is_alpha_num(const char *c) {
46  rz_return_val_if_fail(c, false);
47  if (!isascii(*c)) {
48  return false; // UTF-8
49  }
50  return is_num(c) || isalpha(c[0]);
51 }
52 
53 static bool is_separator(const char *c) {
54  if (!isascii(*c)) {
55  return false; // UTF-8
56  }
57  return (*c == '(' || *c == ')' || *c == '[' || *c == ']' || *c == '{' || *c == '}' || *c == ',' || *c == '.' || *c == '#' || *c == ':' || *c == ' ' ||
58  (c[0] == '|' && c[1] == '|') ||
59  (c[0] == '=' && c[1] == '=') ||
60  (c[0] == '<' && c[1] == '=') ||
61  (c[0] == ':' && c[1] == ':'));
62 }
63 
64 static bool is_operator(const char *c) {
65  if (!isascii(*c)) {
66  return false; // UTF-8
67  }
68  return (*c == '+' || *c == '-' || *c == '/' || *c == '>' || *c == '<' || *c == '*' || *c == '%' || *c == '|' || *c == '&' || *c == '=' || *c == '!');
69 }
70 
71 static bool is_register(const char *name, RZ_BORROW const RzRegSet *regset) {
73  if (!regset) {
74  return false;
75  }
76 
77  bool found = false;
78  for (ut32 i = 0; i < RZ_REG_TYPE_LAST; ++i) {
79  if (regset[i].ht_regs) {
80  ht_pp_find(regset[i].ht_regs, name, &found);
81  if (found) {
82  return true;
83  }
84  }
85  }
86  return false;
87 }
88 
89 static char *directives[] = {
90  ".include", ".error", ".warning",
91  ".echo", ".if", ".ifeq", ".endif",
92  ".else", ".set", ".get", NULL
93 };
94 
96 
97 static void parseHeap(RzParse *p, RzStrBuf *s) {
98  char *op_buf_asm = rz_strbuf_get(s);
99  char *out = rz_parse_pseudocode(p, op_buf_asm);
100  if (out) {
101  rz_strbuf_set(s, out);
102  free(out);
103  }
104 }
105 
106 /* pseudo.c - private api */
107 static int rz_asm_pseudo_align(RzAsmCode *acode, RzAsmOp *op, char *input) {
108  acode->code_align = rz_num_math(NULL, input);
109  return 0;
110 }
111 
112 static int rz_asm_pseudo_string(RzAsmOp *op, char *input, int zero) {
113  int len = strlen(input) - 1;
114  if (len < 1) {
115  return 0;
116  }
117  // TODO: if not starting with '"'.. give up
118  if (input[len] == '"') {
119  input[len] = 0;
120  }
121  if (*input == '"') {
122  input++;
123  }
124  len = rz_str_unescape(input) + zero;
125  rz_strbuf_set(&op->buf, input); // uh?
126  return len;
127 }
128 
129 static inline int rz_asm_pseudo_arch(RzAsm *a, char *input) {
130  if (!rz_asm_use(a, input)) {
131  RZ_LOG_ERROR("Unknown asm plugin name '%s'\n", input);
132  return -1;
133  }
134  return 0;
135 }
136 
137 static inline int rz_asm_pseudo_bits(RzAsm *a, char *input) {
138  if (!(rz_asm_set_bits(a, rz_num_math(NULL, input)))) {
139  RZ_LOG_ERROR("Unsupported bits (%s) value for the selected asm plugin.\n", input);
140  return -1;
141  }
142  return 0;
143 }
144 
145 static inline int rz_asm_pseudo_org(RzAsm *a, char *input) {
147  return 0;
148 }
149 
150 static inline int rz_asm_pseudo_intN(RzAsm *a, RzAsmOp *op, char *input, int n) {
151  ut16 s;
152  ut32 i;
153  ut64 s64 = rz_num_math(NULL, input);
154  if (n != 8 && s64 >> (n * 8)) {
155  RZ_LOG_ERROR("Cannot write a number that does not fit within a int%d type.\n", (n * 8));
156  return 0;
157  }
158  // XXX honor endian here
159  ut8 *buf = (ut8 *)rz_strbuf_get(&op->buf);
160  if (!buf) {
161  return 0;
162  }
163  if (n == 2) {
164  s = (ut16)(st16)s64;
165  rz_write_ble16(buf, s, a->big_endian);
166  } else if (n == 4) {
167  i = (ut32)(st32)s64;
168  rz_write_ble32(buf, i, a->big_endian);
169  } else if (n == 8) {
170  rz_write_ble64(buf, (ut64)s64, a->big_endian);
171  } else {
172  return 0;
173  }
174  return n;
175 }
176 
177 static inline int rz_asm_pseudo_int16(RzAsm *a, RzAsmOp *op, char *input) {
178  return rz_asm_pseudo_intN(a, op, input, 2);
179 }
180 
181 static inline int rz_asm_pseudo_int32(RzAsm *a, RzAsmOp *op, char *input) {
182  return rz_asm_pseudo_intN(a, op, input, 4);
183 }
184 
185 static inline int rz_asm_pseudo_int64(RzAsm *a, RzAsmOp *op, char *input) {
186  return rz_asm_pseudo_intN(a, op, input, 8);
187 }
188 
189 static inline int rz_asm_pseudo_byte(RzAsmOp *op, char *input) {
190  int i, len = 0;
191  rz_str_replace_char(input, ',', ' ');
194  ut8 *buf = malloc(len);
195  if (!buf) {
196  return 0;
197  }
198  for (i = 0; i < len; i++) {
199  const char *word = rz_str_word_get0(input, i);
200  int num = (int)rz_num_math(NULL, word);
201  buf[i] = num;
202  }
204  free(buf);
205  return len;
206 }
207 
208 static inline int rz_asm_pseudo_fill(RzAsmOp *op, char *input) {
209  int i, repeat = 0, size = 0, value = 0;
210  sscanf(input, "%d,%d,%d", &repeat, &size, &value); // use rz_num?
211  size *= (sizeof(value) * repeat);
212  if (size > 0) {
213  ut8 *buf = malloc(size);
214  if (buf) {
215  for (i = 0; i < size; i += sizeof(value)) {
216  memcpy(&buf[i], &value, sizeof(value));
217  }
219  free(buf);
220  }
221  } else {
222  size = 0;
223  }
224  return size;
225 }
226 
227 static inline int rz_asm_pseudo_incbin(RzAsmOp *op, char *input) {
228  size_t bytes_read = 0;
229  rz_str_replace_char(input, ',', ' ');
230  // int len = rz_str_word_count (input);
232  // const char *filename = rz_str_word_get0 (input, 0);
235  char *content = rz_file_slurp(input, &bytes_read);
236  if (!content) {
237  RZ_LOG_ERROR("Could not open '%s'.\n", input);
238  return -1;
239  }
240  if (skip > 0) {
241  skip = skip > bytes_read ? bytes_read : skip;
242  }
243  if (count > 0) {
244  count = count > bytes_read ? 0 : count;
245  } else {
246  count = bytes_read;
247  }
248  // Need to handle arbitrary amount of data
249  rz_buf_free(op->buf_inc);
250  op->buf_inc = rz_buf_new_with_string(content + skip);
251  // Terminate the original buffer
252  free(content);
253  return count;
254 }
255 
256 static void plugin_fini(RzAsm *a) {
257  if (a->cur && a->cur->fini && !a->cur->fini(a->plugin_data)) {
258  RZ_LOG_ERROR("asm plugin '%s' failed to terminate.\n", a->cur->name);
259  }
260 }
261 
263  int i;
264  RzAsm *a = RZ_NEW0(RzAsm);
265  if (!a) {
266  return NULL;
267  }
268  a->dataalign = 1;
269  a->bits = RZ_SYS_BITS;
270  a->bitshift = 0;
271  a->syntax = RZ_ASM_SYNTAX_INTEL;
272  a->plugins = rz_list_newf(NULL);
273  if (!a->plugins) {
274  free(a);
275  return NULL;
276  }
277  for (i = 0; i < RZ_ARRAY_SIZE(asm_static_plugins); i++) {
279  }
280  return a;
281 }
282 
283 RZ_API bool rz_asm_setup(RzAsm *a, const char *arch, int bits, int big_endian) {
284  rz_return_val_if_fail(a && arch, false);
285  bool ret = !rz_asm_use(a, arch);
286  return ret | !rz_asm_set_bits(a, bits);
287 }
288 
289 // TODO: spagueti
290 RZ_API int rz_asm_sub_names_input(RzAsm *a, const char *f) {
291  rz_return_val_if_fail(a && f, false);
292  if (!a->ifilter) {
293  a->ifilter = rz_parse_new();
294  }
295  if (!rz_parse_use(a->ifilter, f)) {
296  rz_parse_free(a->ifilter);
297  a->ifilter = NULL;
298  return false;
299  }
300  return true;
301 }
302 
303 RZ_API int rz_asm_sub_names_output(RzAsm *a, const char *f) {
304  if (!a->ofilter) {
305  a->ofilter = rz_parse_new();
306  }
307  if (!rz_parse_use(a->ofilter, f)) {
308  rz_parse_free(a->ofilter);
309  a->ofilter = NULL;
310  return false;
311  }
312  return true;
313 }
314 
316  if (!a) {
317  return;
318  }
319  plugin_fini(a);
320  if (a->plugins) {
321  rz_list_free(a->plugins);
322  a->plugins = NULL;
323  }
324  rz_syscall_free(a->syscall);
325  free(a->cpu);
326  free(a->features);
327  sdb_free(a->pair);
328  ht_pp_free(a->flags);
329  a->pair = NULL;
330  free(a);
331 }
332 
334  if (!p->name) {
335  return false;
336  }
337  if (rz_asm_is_valid(a, p->name)) {
338  return false;
339  }
340  rz_list_append(a->plugins, p);
341  return true;
342 }
343 
344 RZ_API int rz_asm_del(RzAsm *a, const char *name) {
345  /* TODO: Implement rz_asm_del */
346  return false;
347 }
348 
349 RZ_API bool rz_asm_is_valid(RzAsm *a, const char *name) {
350  RzAsmPlugin *h;
351  RzListIter *iter;
352  if (!name || !*name) {
353  return false;
354  }
355  rz_list_foreach (a->plugins, iter, h) {
356  if (!strcmp(h->name, name)) {
357  return true;
358  }
359  }
360  return false;
361 }
362 
363 RZ_API bool rz_asm_use_assembler(RzAsm *a, const char *name) {
364  RzAsmPlugin *h;
365  RzListIter *iter;
366  if (a) {
367  if (name && *name) {
368  rz_list_foreach (a->plugins, iter, h) {
369  if (h->assemble && !strcmp(h->name, name)) {
370  a->acur = h;
371  return true;
372  }
373  }
374  }
375  a->acur = NULL;
376  }
377  return false;
378 }
379 
386 static void set_plugin_configs(RZ_BORROW RzAsm *rz_asm, RZ_BORROW RzConfig *pcfg) {
387  rz_return_if_fail(pcfg && rz_asm);
388 
389  RzConfig *conf = ((RzCore *)(rz_asm->core))->config;
390  RzConfigNode *n;
391  RzListIter *it;
392  rz_list_foreach_iter(pcfg->nodes, it) {
393  n = it->data;
395  RZ_LOG_WARN("Failed to add \"%s\" to the global config.\n", n->name)
396  }
397  }
398 }
399 
407  rz_return_if_fail(pcfg && rz_asm && rz_asm->core);
408 
409  RzConfig *conf = ((RzCore *)(rz_asm->core))->config;
410  RzConfigNode *n;
411  RzListIter *it;
412  rz_list_foreach_iter(pcfg->nodes, it) {
413  n = it->data;
414  if (!rz_config_rm(conf, n->name)) {
415  RZ_LOG_WARN("Failed to remove \"%s\" from the global config.", n->name)
416  }
417  }
418 }
419 
420 // TODO: this can be optimized using rz_str_hash()
429 RZ_API bool rz_asm_use(RzAsm *a, const char *name) {
430  RzAsmPlugin *h;
431  RzListIter *iter;
432  if (!a || !name) {
433  return false;
434  }
435  RzCore *core = a->core;
436  if (a->cur && !strcmp(a->cur->arch, name)) {
437  return true;
438  }
439  rz_list_foreach (a->plugins, iter, h) {
440  if (h->arch && h->name && !strcmp(h->name, name)) {
441  if (!a->cur || (a->cur && strcmp(a->cur->arch, h->arch))) {
442  plugin_fini(a);
443  char *opcodes_dir = rz_path_system(RZ_SDB_OPCODES);
444  char *file = rz_str_newf("%s/%s.sdb", opcodes_dir, h->arch);
445  if (file) {
447  sdb_free(a->pair);
448  a->pair = sdb_new(NULL, file, 0);
449  free(file);
450  }
451  free(opcodes_dir);
452  }
453  if (h->init && !h->init(&a->plugin_data)) {
454  RZ_LOG_ERROR("asm plugin '%s' failed to initialize.\n", h->name);
455  return false;
456  }
457 
458  if (a->cur && a->cur->get_config && core) {
459  rz_config_lock(core->config, false);
460  unset_plugins_config(a, a->cur->get_config());
461  rz_config_lock(core->config, true);
462  }
463  if (h->get_config && core) {
464  rz_config_lock(core->config, false);
465  set_plugin_configs(a, h->get_config());
466  rz_config_lock(core->config, true);
467  }
468  a->cur = h;
469  return true;
470  }
471  }
472  sdb_free(a->pair);
473  a->pair = NULL;
474  return false;
475 }
476 
478  if (a) {
479  free(a->cpu);
480  a->cpu = cpu ? strdup(cpu) : NULL;
481  }
482 }
483 
484 static bool has_bits(RzAsmPlugin *h, int bits) {
485  return (h && h->bits && (bits & h->bits));
486 }
487 
489  if (has_bits(a->cur, bits)) {
490  a->bits = bits; // TODO : use OR? :)
491  return true;
492  }
493  return false;
494 }
495 
497  rz_return_val_if_fail(a && a->cur, false);
498  a->big_endian = false; // little endian by default
499  switch (a->cur->endian) {
500  case RZ_SYS_ENDIAN_NONE:
501  case RZ_SYS_ENDIAN_BI:
502  // TODO: not yet implemented
503  a->big_endian = b;
504  break;
506  a->big_endian = false;
507  break;
508  case RZ_SYS_ENDIAN_BIG:
509  a->big_endian = true;
510  break;
511  default:
512  RZ_LOG_DEBUG("The asm plugin doesn't specify endianness.\n");
513  break;
514  }
515  return a->big_endian;
516 }
517 
519  // TODO: move into rz_arch ?
520  switch (syntax) {
522  case RZ_ASM_SYNTAX_INTEL:
523  case RZ_ASM_SYNTAX_MASM:
524  case RZ_ASM_SYNTAX_ATT:
525  case RZ_ASM_SYNTAX_JZ:
526  a->syntax = syntax;
527  return true;
528  default:
529  return false;
530  }
531 }
532 
534  a->pc = pc;
535  return true;
536 }
537 
538 static bool __isInvalid(RzAsmOp *op) {
539  const char *buf_asm = rz_strbuf_get(&op->buf_asm);
540  return (buf_asm && *buf_asm && !strcmp(buf_asm, "invalid"));
541 }
542 
545  rz_return_val_if_fail(a && buf && op, -1);
546  if (len < 1) {
547  return 0;
548  }
549 
550  int ret = op->payload = 0;
551  op->size = 4;
552  op->bitsize = 0;
553  rz_asm_op_set_asm(op, "");
554  if (a->pcalign) {
555  const int mod = a->pc % a->pcalign;
556  if (mod) {
557  op->size = a->pcalign - mod;
558  rz_strbuf_set(&op->buf_asm, "unaligned");
559  return -1;
560  }
561  }
562  if (a->cur && a->cur->disassemble) {
563  // shift buf N bits
564  if (a->bitshift > 0) {
565  ut8 *tmp = calloc(len, 1);
566  if (tmp) {
567  rz_mem_copybits_delta(tmp, 0, buf, a->bitshift, (len * 8) - a->bitshift);
568  ret = a->cur->disassemble(a, op, tmp, len);
569  free(tmp);
570  }
571  } else {
572  ret = a->cur->disassemble(a, op, buf, len);
573  }
574  }
575  if (ret < 0) {
576  ret = 0;
577  }
578  if (op->bitsize > 0) {
579  op->size = op->bitsize / 8;
580  a->bitshift += op->bitsize % 8;
581  int count = a->bitshift / 8;
582  if (count > 0) {
583  op->size = op->size + count;
584  a->bitshift %= 8;
585  }
586  }
587 
588  if (op->size < 1 || __isInvalid(op)) {
589  if (a->invhex) {
590  if (a->bits == 16) {
591  ut16 b = rz_read_le16(buf);
592  rz_strbuf_set(&op->buf_asm, sdb_fmt(".word 0x%04x", b));
593  } else {
594  ut32 b = rz_read_le32(buf);
595  rz_strbuf_set(&op->buf_asm, sdb_fmt(".dword 0x%08x", b));
596  }
597  // TODO: something for 64bits too?
598  } else {
599  rz_strbuf_set(&op->buf_asm, "invalid");
600  }
601  }
602  if (a->ofilter) {
603  parseHeap(a->ofilter, &op->buf_asm);
604  }
605  int opsz = (op->size > 0) ? RZ_MAX(0, RZ_MIN(len, op->size)) : 1;
606  rz_asm_op_set_buf(op, buf, opsz);
607  return ret;
608 }
609 
610 typedef int (*Ase)(RzAsm *a, RzAsmOp *op, const char *buf);
611 
613  if (!a || !h->arch || !h->assemble || !has_bits(h, a->bits)) {
614  return false;
615  }
616  return (!strncmp(a->cur->arch, h->arch, strlen(a->cur->arch)));
617 }
618 
619 static Ase findAssembler(RzAsm *a, const char *kw) {
620  Ase ase = NULL;
621  RzAsmPlugin *h;
622  RzListIter *iter;
623  if (a->acur && a->acur->assemble) {
624  return a->acur->assemble;
625  }
626  rz_list_foreach (a->plugins, iter, h) {
627  if (assemblerMatches(a, h)) {
628  if (kw) {
629  if (strstr(h->name, kw)) {
630  return h->assemble;
631  }
632  } else {
633  ase = h->assemble;
634  }
635  }
636  }
637  return ase;
638 }
639 
640 static char *replace_directives_for(char *str, char *token) {
641  RzStrBuf *sb = rz_strbuf_new("");
642  char *p = NULL;
643  char *q = str;
644  bool changes = false;
645  for (;;) {
646  if (q) {
647  p = strstr(q, token);
648  }
649  if (p) {
650  char *nl = strchr(p, '\n');
651  if (nl) {
652  *nl++ = 0;
653  }
654  char _ = *p;
655  *p = 0;
656  rz_strbuf_append(sb, q);
657  *p = _;
658  rz_strbuf_appendf(sb, "<{%s}>\n", p + 1);
659  q = nl;
660  changes = true;
661  } else {
662  if (q) {
663  rz_strbuf_append(sb, q);
664  }
665  break;
666  }
667  }
668  if (changes) {
669  free(str);
670  return rz_strbuf_drain(sb);
671  }
673  return str;
674 }
675 
676 static char *replace_directives(char *str) {
677  int i = 0;
678  char *dir = directives[i++];
679  char *o = replace_directives_for(str, dir);
680  while (dir) {
681  o = replace_directives_for(o, dir);
682  dir = directives[i++];
683  }
684  return o;
685 }
686 
688  int i = 0;
689  char *dir = directives[i++];
690  while (dir) {
691  printf("%s\n", dir);
692  dir = directives[i++];
693  }
694 }
695 
696 // returns instruction size
697 RZ_API int rz_asm_assemble(RzAsm *a, RzAsmOp *op, const char *buf) {
698  rz_return_val_if_fail(a && op && buf, 0);
699  int ret = 0;
700  char *b = strdup(buf);
701  if (!b) {
702  return 0;
703  }
704  if (a->ifilter) {
705  char *tmp = rz_parse_pseudocode(a->ifilter, buf);
706  if (tmp) {
707  free(b);
708  b = tmp;
709  }
710  }
711  rz_str_case(b, 0); // to-lower
712  memset(op, 0, sizeof(RzAsmOp));
713  if (a->cur) {
714  Ase ase = NULL;
715  if (!a->cur->assemble) {
716  /* find callback if no assembler support in current plugin */
717  ase = findAssembler(a, ".ks");
718  if (!ase) {
719  ase = findAssembler(a, ".nz");
720  if (!ase) {
721  ase = findAssembler(a, NULL);
722  }
723  }
724  } else {
725  ase = a->cur->assemble;
726  }
727  if (ase) {
728  ret = ase(a, op, b);
729  }
730  }
731  // XXX delete this block, the ase thing should be setting asm, buf and hex
732  if (op && ret > 0) {
733  op->size = ret; // XXX shouldn't be necessary
734  rz_asm_op_set_asm(op, b); // XXX ase should be updating this already, isn't?
735  ut8 *opbuf = (ut8 *)rz_strbuf_get(&op->buf);
736  rz_asm_op_set_buf(op, opbuf, ret);
737  }
738  free(b);
739  return ret;
740 }
741 
742 // TODO: Use RzStrBuf api here pls
744  rz_return_val_if_fail(a && buf && len >= 0, NULL);
745 
746  RzStrBuf *buf_asm;
747  RzAsmCode *acode;
748  ut64 pc = a->pc;
749  ut64 idx;
750  size_t ret;
751  const size_t addrbytes = a->core ? ((RzCore *)a->core)->io->addrbytes : 1;
752 
753  if (!(acode = rz_asm_code_new())) {
754  return NULL;
755  }
756  if (!(acode->bytes = malloc(1 + len))) {
757  return rz_asm_code_free(acode);
758  }
759  memcpy(acode->bytes, buf, len);
760  if (!(buf_asm = rz_strbuf_new(NULL))) {
761  return rz_asm_code_free(acode);
762  }
763  RzAsmOp op;
764  rz_asm_op_init(&op);
765  for (idx = 0; idx + addrbytes <= len; idx += (addrbytes * ret)) {
766  rz_asm_set_pc(a, pc + idx);
767  ret = rz_asm_disassemble(a, &op, buf + idx, len - idx);
768  if (ret < 1) {
769  ret = 1;
770  }
771  if (a->ofilter) {
772  parseHeap(a->ofilter, &op.buf_asm);
773  }
774  rz_strbuf_append(buf_asm, rz_strbuf_get(&op.buf_asm));
775  rz_strbuf_append(buf_asm, "\n");
776  }
777  rz_asm_op_fini(&op);
778  acode->assembly = rz_strbuf_drain(buf_asm);
779  acode->len = idx;
780  return acode;
781 }
782 
784  ut8 *buf = malloc(strlen(hexstr) + 1);
785  if (!buf) {
786  return NULL;
787  }
788  int len = rz_hex_str2bin(hexstr, buf);
789  if (len < 1) {
790  free(buf);
791  return NULL;
792  }
794  if (ret && p) {
795  char *tmp = rz_parse_pseudocode(p, ret->assembly);
796  if (tmp) {
797  free(ret->assembly);
798  ret->assembly = tmp;
799  }
800  }
801  free(buf);
802  return ret;
803 }
804 
805 static void __flag_free_kv(HtPPKv *kv) {
806  free(kv->key);
807  free(kv->value);
808 }
809 
810 static void *__dup_val(const void *v) {
811  return (void *)strdup((char *)v);
812 }
813 
814 RZ_API RzAsmCode *rz_asm_massemble(RzAsm *a, const char *assembly) {
815  int num, stage, ret, idx, ctr, i, linenum = 0;
816  char *lbuf = NULL, *ptr2, *ptr = NULL, *ptr_start = NULL;
817  const char *asmcpu = NULL;
818  RzAsmCode *acode = NULL;
819  RzAsmOp op = { 0 };
820  ut64 off, pc;
821 
822  char *buf_token = NULL;
823  size_t tokens_size = 32;
824  char **tokens = calloc(sizeof(char *), tokens_size);
825  if (!tokens) {
826  return NULL;
827  }
828  if (!assembly) {
829  free(tokens);
830  return NULL;
831  }
832  ht_pp_free(a->flags);
833  if (!(a->flags = ht_pp_new(__dup_val, __flag_free_kv, NULL))) {
834  free(tokens);
835  return NULL;
836  }
837  if (!(acode = rz_asm_code_new())) {
838  free(tokens);
839  return NULL;
840  }
841  if (!(acode->assembly = malloc(strlen(assembly) + 16))) {
842  free(tokens);
843  return rz_asm_code_free(acode);
844  }
845  rz_str_ncpy(acode->assembly, assembly, sizeof(acode->assembly) - 1);
846  if (!(acode->bytes = calloc(1, 64))) {
847  free(tokens);
848  return rz_asm_code_free(acode);
849  }
850  lbuf = strdup(assembly);
851  acode->code_align = 0;
852 
853  /* consider ,, an alias for a newline */
854  lbuf = rz_str_replace(lbuf, ",,", "\n", true);
855  /* accept ';' as comments when input is multiline */
856  {
857  char *nl = strchr(lbuf, '\n');
858  if (nl) {
859  if (strchr(nl + 1, '\n')) {
860  rz_str_replace_char(lbuf, ';', '#');
861  }
862  }
863  }
864  // XXX: ops like mov eax, $pc+33 fail coz '+' is not a valid number!!!
865  // XXX: must be handled here to be global.. and not arch-specific
866  {
867  char val[32];
868  snprintf(val, sizeof(val), "0x%" PFMT64x, a->pc);
869  lbuf = rz_str_replace(lbuf, "$$", val, 1);
870  }
871  if (a->syscall) {
872  char val[32];
873  char *aa, *p = strstr(lbuf, "$sys.");
874  while (p) {
875  char *sp = (char *)rz_str_closer_chr(p, " \n\r#");
876  if (sp) {
877  char osp = *sp;
878  *sp = 0;
879  aa = strdup(p);
880  *sp = osp;
881  num = rz_syscall_get_num(a->syscall, aa + 5);
882  snprintf(val, sizeof(val), "%d", num);
883  lbuf = rz_str_replace(lbuf, aa, val, 1);
884  free(aa);
885  }
886  p = strstr(p + 5, "$sys.");
887  }
888  }
889  bool labels = !!strchr(lbuf, ':');
890 
891  /* Tokenize */
892  for (tokens[0] = lbuf, ctr = 0;
893  ((ptr = strchr(tokens[ctr], ';')) ||
894  (ptr = strchr(tokens[ctr], '\n')) ||
895  (ptr = strchr(tokens[ctr], '\r')));) {
896  if (ctr + 1 >= tokens_size) {
897  const size_t new_tokens_size = tokens_size * 2;
898  if (sizeof(char *) * new_tokens_size <= sizeof(char *) * tokens_size) {
899  // overflow
900  RZ_LOG_ERROR("Too many tokens while assembling (overflow).\n");
901  goto fail;
902  }
903  char **new_tokens = realloc(tokens, sizeof(char *) * new_tokens_size);
904  if (!new_tokens) {
905  RZ_LOG_ERROR("Cannot reallocate meory for tokens while assembling.\n");
906  goto fail;
907  }
908  tokens_size = new_tokens_size;
909  tokens = new_tokens;
910  }
911  ctr++;
912  *ptr = '\0';
913  tokens[ctr] = ptr + 1;
914  }
915 
916 #define isavrseparator(x) ((x) == ' ' || (x) == '\t' || (x) == '\n' || (x) == '\r' || (x) == ' ' || \
917  (x) == ',' || (x) == ';' || (x) == '[' || (x) == ']' || \
918  (x) == '(' || (x) == ')' || (x) == '{' || (x) == '}')
919 
920  /* Stage 0-2: Parse labels*/
921  /* Stage 3: Assemble */
922 // XXX: stages must be dynamic. until all equs have been resolved
923 #define STAGES 5
924  pc = a->pc;
925  bool inComment = false;
926  for (stage = 0; stage < STAGES; stage++) {
927  if (stage < 2 && !labels) {
928  continue;
929  }
930  inComment = false;
931  rz_asm_set_pc(a, pc);
932  for (idx = ret = i = 0; i <= ctr; i++, idx += ret) {
933  buf_token = tokens[i];
934  if (!buf_token) {
935  continue;
936  }
937  if (inComment) {
938  if (!strncmp(ptr_start, "*/", 2)) {
939  inComment = false;
940  }
941  continue;
942  }
943  // XXX TODO remove arch-specific hacks
944  if (!strncmp(a->cur->arch, "avr", 3)) {
945  for (ptr_start = buf_token; *ptr_start && isavrseparator(*ptr_start); ptr_start++)
946  ;
947  } else {
948  for (ptr_start = buf_token; *ptr_start && IS_SEPARATOR(*ptr_start); ptr_start++)
949  ;
950  }
951  if (!strncmp(ptr_start, "/*", 2)) {
952  if (!strstr(ptr_start + 2, "*/")) {
953  inComment = true;
954  }
955  continue;
956  }
957  /* Comments */ {
958  bool likely_comment = true;
959  char *cptr = strchr(ptr_start, ',');
960  ptr = strchr(ptr_start, '#');
961  // a comma is probably not followed by a comment
962  // 8051 often uses #symbol notation as 2nd arg
963  if (cptr && ptr && cptr < ptr) {
964  likely_comment = false;
965  for (cptr += 1; cptr < ptr; cptr += 1) {
966  if (!isspace(*cptr)) {
967  likely_comment = true;
968  break;
969  }
970  }
971  }
972  // # followed by number literal also
973  // isn't likely to be a comment
974  likely_comment = likely_comment && ptr && !RZ_BETWEEN('0', ptr[1], '9') && ptr[1] != '-';
975  if (likely_comment) {
976  *ptr = '\0';
977  }
978  }
979  rz_asm_set_pc(a, a->pc + ret);
980  off = a->pc;
981  ret = 0;
982  if (!*ptr_start) {
983  continue;
984  }
985  linenum++;
986  /* labels */
987  if (labels && (ptr = strchr(ptr_start, ':'))) {
988  bool is_a_label = true;
989  char *q = ptr_start;
990  while (*q) {
991  if (*q == ' ') {
992  is_a_label = false;
993  break;
994  }
995  q++;
996  }
997  if (is_a_label) {
998  // if (stage != 2) {
999  if (ptr_start[1] && ptr_start[1] != ' ') {
1000  *ptr = 0;
1001  char *p = strdup(ptr_start);
1002  *ptr = ':';
1003  if (acode->code_align) {
1004  off += (acode->code_align - (off % acode->code_align));
1005  }
1006  char *food = rz_str_newf("0x%" PFMT64x, off);
1007  ht_pp_insert(a->flags, ptr_start, food);
1008  rz_asm_code_set_equ(acode, p, food);
1009  free(p);
1010  free(food);
1011  }
1012  //}
1013  ptr_start = ptr + 1;
1014  }
1015  }
1016  if (!*ptr_start) {
1017  ret = 0;
1018  continue;
1019  }
1020  if (*ptr_start == '.') { /* pseudo */
1021  /* TODO: move into a separate function */
1022  ptr = ptr_start;
1023  rz_str_trim(ptr);
1024  if (!strncmp(ptr, ".intel_syntax", 13)) {
1025  a->syntax = RZ_ASM_SYNTAX_INTEL;
1026  } else if (!strncmp(ptr, ".att_syntax", 11)) {
1027  a->syntax = RZ_ASM_SYNTAX_ATT;
1028  } else if (!strncmp(ptr, ".endian", 7)) {
1029  rz_asm_set_big_endian(a, atoi(ptr + 7));
1030  } else if (!strncmp(ptr, ".big_endian", 7 + 4)) {
1031  rz_asm_set_big_endian(a, true);
1032  } else if (!strncmp(ptr, ".lil_endian", 7 + 4) || !strncmp(ptr, "little_endian", 7 + 6)) {
1033  rz_asm_set_big_endian(a, false);
1034  } else if (!strncmp(ptr, ".asciz", 6)) {
1035  rz_str_trim(ptr + 8);
1036  ret = rz_asm_pseudo_string(&op, ptr + 8, 1);
1037  } else if (!strncmp(ptr, ".string ", 8)) {
1038  rz_str_trim(ptr + 8);
1039  char *str = strdup(ptr + 8);
1040  ret = rz_asm_pseudo_string(&op, str, 1);
1041  free(str);
1042  } else if (!strncmp(ptr, ".ascii", 6)) {
1043  ret = rz_asm_pseudo_string(&op, ptr + 7, 0);
1044  } else if (!strncmp(ptr, ".align", 6)) {
1045  ret = rz_asm_pseudo_align(acode, &op, ptr + 7);
1046  } else if (!strncmp(ptr, ".arm", 4)) {
1047  rz_asm_use(a, "arm");
1048  rz_asm_set_bits(a, 32);
1049  ret = 0;
1050  } else if (!strncmp(ptr, ".thumb", 6)) {
1051  rz_asm_use(a, "arm");
1052  rz_asm_set_bits(a, 16);
1053  ret = 0;
1054  } else if (!strncmp(ptr, ".arch ", 6)) {
1055  ret = rz_asm_pseudo_arch(a, ptr + 6);
1056  } else if (!strncmp(ptr, ".bits ", 6)) {
1057  ret = rz_asm_pseudo_bits(a, ptr + 6);
1058  } else if (!strncmp(ptr, ".fill ", 6)) {
1059  ret = rz_asm_pseudo_fill(&op, ptr + 6);
1060  } else if (!strncmp(ptr, ".kernel ", 8)) {
1061  rz_syscall_setup(a->syscall, a->cur->arch, a->bits, asmcpu, ptr + 8);
1062  } else if (!strncmp(ptr, ".cpu ", 5)) {
1063  rz_asm_set_cpu(a, ptr + 5);
1064  } else if (!strncmp(ptr, ".os ", 4)) {
1065  rz_syscall_setup(a->syscall, a->cur->arch, a->bits, asmcpu, ptr + 4);
1066  } else if (!strncmp(ptr, ".hex ", 5)) {
1067  ret = rz_asm_op_set_hex(&op, ptr + 5);
1068  } else if ((!strncmp(ptr, ".int16 ", 7)) || !strncmp(ptr, ".short ", 7)) {
1069  ret = rz_asm_pseudo_int16(a, &op, ptr + 7);
1070  } else if (!strncmp(ptr, ".int32 ", 7)) {
1071  ret = rz_asm_pseudo_int32(a, &op, ptr + 7);
1072  } else if (!strncmp(ptr, ".int64 ", 7)) {
1073  ret = rz_asm_pseudo_int64(a, &op, ptr + 7);
1074  } else if (!strncmp(ptr, ".size", 5)) {
1075  ret = true; // do nothing, ignored
1076  } else if (!strncmp(ptr, ".section", 8)) {
1077  ret = true; // do nothing, ignored
1078  } else if ((!strncmp(ptr, ".byte ", 6)) || (!strncmp(ptr, ".int8 ", 6))) {
1079  ret = rz_asm_pseudo_byte(&op, ptr + 6);
1080  } else if (!strncmp(ptr, ".glob", 5)) { // .global .globl
1081  RZ_LOG_DEBUG(".global directive not yet implemented\n");
1082  ret = 0;
1083  continue;
1084  } else if (!strncmp(ptr, ".equ ", 5)) {
1085  ptr2 = strchr(ptr + 5, ',');
1086  if (!ptr2) {
1087  ptr2 = strchr(ptr + 5, '=');
1088  }
1089  if (!ptr2) {
1090  ptr2 = strchr(ptr + 5, ' ');
1091  }
1092  if (ptr2) {
1093  *ptr2 = '\0';
1094  rz_asm_code_set_equ(acode, ptr + 5, ptr2 + 1);
1095  } else {
1096  RZ_LOG_ERROR("Invalid syntax for '.equ': Use '.equ <word> <word>'\n");
1097  }
1098  } else if (!strncmp(ptr, ".org ", 5)) {
1099  ret = rz_asm_pseudo_org(a, ptr + 5);
1100  } else if (rz_str_startswith(ptr, ".offset ")) {
1101  RZ_LOG_ERROR("Invalid use of the .offset directory. This directive is only supported in rizin -c 'waf'.\n");
1102  } else if (!strncmp(ptr, ".text", 5)) {
1103  acode->code_offset = a->pc;
1104  } else if (!strncmp(ptr, ".data", 5)) {
1105  acode->data_offset = a->pc;
1106  } else if (!strncmp(ptr, ".incbin", 7)) {
1107  if (ptr[7] != ' ') {
1108  RZ_LOG_ERROR("Invalid syntax for '.incbin': Use '.incbin <filename>'\n");
1109  continue;
1110  }
1111  ret = rz_asm_pseudo_incbin(&op, ptr + 8);
1112  } else {
1113  RZ_LOG_ERROR("Unknown directive named '%s'\n", ptr);
1114  goto fail;
1115  }
1116  if (!ret) {
1117  continue;
1118  }
1119  if (ret < 0) {
1120  RZ_LOG_ERROR("Something went wrong when handling the directive '%s'.\n", ptr);
1121  goto fail;
1122  }
1123  } else { /* Instruction */
1124  char *str = ptr_start;
1125  rz_str_trim(str);
1126  if (acode->equs) {
1127  if (!*ptr_start) {
1128  continue;
1129  }
1130  str = rz_asm_code_equ_replace(acode, strdup(ptr_start));
1131  rz_asm_op_fini(&op);
1132  rz_asm_op_init(&op);
1133  ret = rz_asm_assemble(a, &op, str);
1134  free(str);
1135  } else {
1136  if (!*ptr_start) {
1137  continue;
1138  }
1139  rz_asm_op_fini(&op);
1140  rz_asm_op_init(&op);
1141  ret = rz_asm_assemble(a, &op, ptr_start);
1142  }
1143  }
1144  if (stage == STAGES - 1) {
1145  if (ret < 1) {
1146  RZ_LOG_ERROR("Cannot assemble '%s' at line %d\n", ptr_start, linenum);
1147  goto fail;
1148  }
1149  acode->len = idx + ret;
1150  char *newbuf = realloc(acode->bytes, (idx + ret) * 2);
1151  if (!newbuf) {
1152  goto fail;
1153  }
1154  acode->bytes = (ut8 *)newbuf;
1155  memcpy(acode->bytes + idx, rz_strbuf_get(&op.buf), rz_strbuf_length(&op.buf));
1156  memset(acode->bytes + idx + ret, 0, idx + ret);
1157  if (op.buf_inc && rz_buf_size(op.buf_inc) > 1) {
1158  char *inc = rz_buf_to_string(op.buf_inc);
1159  rz_buf_free(op.buf_inc);
1160  op.buf_inc = NULL;
1161  if (inc) {
1162  ret += rz_hex_str2bin(inc, acode->bytes + idx + ret);
1163  free(inc);
1164  }
1165  }
1166  }
1167  }
1168  }
1169  rz_asm_op_fini(&op);
1170  free(lbuf);
1171  free(tokens);
1172  return acode;
1173 fail:
1174  rz_asm_op_fini(&op);
1175  free(lbuf);
1176  free(tokens);
1177  return rz_asm_code_free(acode);
1178 }
1179 
1180 RZ_API int rz_asm_get_offset(RzAsm *a, int type, int idx) { // link to rbin
1181  if (a && a->binb.bin && a->binb.get_offset) {
1182  return a->binb.get_offset(a->binb.bin, type, idx);
1183  }
1184  return -1;
1185 }
1186 
1187 RZ_API char *rz_asm_describe(RzAsm *a, const char *str) {
1188  return (a && a->pair) ? sdb_get(a->pair, str, 0) : NULL;
1189 }
1190 
1192  return a->plugins;
1193 }
1194 
1195 RZ_API bool rz_asm_set_arch(RzAsm *a, const char *name, int bits) {
1196  return rz_asm_use(a, name) ? rz_asm_set_bits(a, bits) : false;
1197 }
1198 
1199 /* to ease the use of the native bindings (not used in rizin) */
1200 RZ_API char *rz_asm_to_string(RzAsm *a, ut64 addr, const ut8 *b, int l) {
1201  rz_return_val_if_fail(a && b && l >= 0, NULL);
1202  rz_asm_set_pc(a, addr);
1204  if (code) {
1205  char *buf_asm = code->assembly;
1206  code->assembly = NULL;
1208  return buf_asm;
1209  }
1210  return NULL;
1211 }
1212 
1213 RZ_API ut8 *rz_asm_from_string(RzAsm *a, ut64 addr, const char *b, int *l) {
1214  rz_asm_set_pc(a, addr);
1216  if (code) {
1217  ut8 *buf = code->bytes;
1218  if (l) {
1219  *l = code->len;
1220  }
1222  return buf;
1223  }
1224  return NULL;
1225 }
1226 
1229  if (!strcmp(name, "regnum")) {
1230  return RZ_ASM_SYNTAX_REGNUM;
1231  }
1232  if (!strcmp(name, "jz")) {
1233  return RZ_ASM_SYNTAX_JZ;
1234  }
1235  if (!strcmp(name, "intel")) {
1236  return RZ_ASM_SYNTAX_INTEL;
1237  }
1238  if (!strcmp(name, "masm")) {
1239  return RZ_ASM_SYNTAX_MASM;
1240  }
1241  if (!strcmp(name, "att")) {
1242  return RZ_ASM_SYNTAX_ATT;
1243  }
1244  return -1;
1245 }
1246 
1247 RZ_API char *rz_asm_mnemonics(RzAsm *a, int id, bool json) {
1248  rz_return_val_if_fail(a && a->cur, NULL);
1249  if (a->cur->mnemonics) {
1250  return a->cur->mnemonics(a, id, json);
1251  }
1252  return NULL;
1253 }
1254 
1256  rz_return_val_if_fail(a && a->cur, 0);
1257  if (a->cur->mnemonics) {
1258  int i;
1259  for (i = 0; i < 1024; i++) {
1260  char *n = a->cur->mnemonics(a, i, false);
1261  if (n && !strcmp(n, name)) {
1262  return i;
1263  }
1264  free(n);
1265  }
1266  }
1267  return 0;
1268 }
1269 
1270 RZ_API RzAsmCode *rz_asm_rasm_assemble(RzAsm *a, const char *buf, bool use_spp) {
1272  char *lbuf = strdup(buf);
1273  if (!lbuf) {
1274  return NULL;
1275  }
1276  RzAsmCode *acode;
1277  if (use_spp) {
1278  Output out;
1279  out.fout = NULL;
1280  out.cout = rz_strbuf_new("");
1281  rz_strbuf_init(out.cout);
1282  struct Proc proc;
1283  spp_proc_set(&proc, "spp", 1);
1284 
1285  lbuf = replace_directives(lbuf);
1286  spp_eval(lbuf, &out);
1287  free(lbuf);
1288  lbuf = strdup(rz_strbuf_get(out.cout));
1289  }
1290  acode = rz_asm_massemble(a, lbuf);
1291  free(lbuf);
1292  return acode;
1293 }
1294 
1297  if (!s) {
1298  return NULL;
1299  }
1300  s->tokens = rz_vector_new(sizeof(RzAsmToken), NULL, NULL);
1301  s->str = rz_strbuf_new(asm_str);
1302  rz_return_val_if_fail(s->tokens && s->str, NULL);
1303  return s;
1304 }
1305 
1307  if (!toks) {
1308  return;
1309  }
1310  rz_strbuf_free(toks->str);
1311  rz_vector_free(toks->tokens);
1312  free(toks);
1313 }
1314 
1316  rz_return_val_if_fail(toks, NULL);
1317 
1319  if (!newt) {
1320  return NULL;
1321  }
1322  newt->tokens = rz_vector_clone(toks->tokens);
1323  newt->str = rz_strbuf_new(rz_strbuf_get(toks->str));
1324  newt->op_type = toks->op_type;
1325 
1326  if (!(newt->tokens && newt->str)) {
1327  free(newt);
1328  return NULL;
1329  }
1330  return newt;
1331 }
1332 
1334  if (!p) {
1335  return;
1336  }
1338  free(pat->pattern);
1339  rz_regex_free(pat->regex);
1340  free(p);
1341 }
1342 
1352 static RZ_OWN RzAsmToken *asm_token_create(const size_t start, const size_t len, const RzAsmTokenType type, const ut64 val) {
1355  if (!t) {
1356  return NULL;
1357  }
1358 
1359  t->start = start;
1360  t->type = type;
1361  t->len = len;
1362  switch (type) {
1363  default:
1364  break;
1365  case RZ_ASM_TOKEN_NUMBER:
1366  t->val.number = val;
1367  break;
1368  }
1369  return t;
1370 }
1371 
1381 static void add_token(RZ_OUT RzAsmTokenString *toks, const size_t i, const size_t l, const RzAsmTokenType type, const ut64 token_val) {
1382  rz_return_if_fail(toks);
1383  RzAsmToken *t = asm_token_create(i, l, type, token_val);
1384  if (!t) {
1385  RZ_LOG_WARN("Failed to create token. Asm strings will be flawed.\n");
1387  return;
1388  }
1389 
1390  rz_vector_push(toks->tokens, t);
1391 }
1392 
1402 static bool overlaps_with_token(RZ_BORROW RzVector /*<RzAsmTokenString>*/ *toks, const size_t s, const size_t e) {
1403  rz_return_val_if_fail(toks, false);
1404  size_t x, y; // Other tokens start/end
1405  RzAsmToken *it;
1406  rz_vector_foreach(toks, it) {
1407  x = it->start;
1408  y = it->start + it->len - 1;
1409  if (!(s > y || e < x)) { // s:e not outside of x:y
1410  return true;
1411  }
1412  }
1413  return false;
1414 }
1415 
1416 static int cmp_tokens(const RzAsmToken *a, const RzAsmToken *b) {
1417  rz_return_val_if_fail(a && b, 0);
1418  if (a->start < b->start) {
1419  return -1;
1420  } else if (a->start > b->start) {
1421  return 1;
1422  }
1423  return 0;
1424 }
1425 
1427  rz_return_if_fail(toks);
1428  if (rz_vector_len(toks->tokens) == 0) {
1429  RZ_LOG_WARN("No tokens given.\n");
1430  return;
1431  }
1432  bool error = false;
1433  // Check if all characters belong to a token.
1434  RzAsmToken *cur, *prev = NULL;
1435  int i = 0;
1436  ut32 ci, cj, pi, pj; // Current and previous token indices.
1437  rz_vector_foreach(toks->tokens, cur) {
1438  if (i == cur->start) {
1439  prev = cur;
1440  i = cur->start + cur->len;
1441  continue;
1442  }
1443  ci = cur->start;
1444  cj = cur->start + cur->len;
1445  pi = prev ? prev->start : 0;
1446  pj = prev ? prev->start + prev->len : 0;
1447  if (i > cur->start) {
1448  RZ_LOG_WARN("i = %" PFMT32d " Token at %" PFMT32d ":%" PFMT32d " overlaps with token %" PFMT32d ":%" PFMT32d "\n",
1449  i, pi, pj, ci, cj);
1450  error = true;
1451  } else {
1452  RZ_LOG_WARN("i = %" PFMT32d ", Part of asm string is not covered by a token."
1453  " Empty range between token %" PFMT32d ":%" PFMT32d " and token %" PFMT32d ":%" PFMT32d "\n",
1454  i, pi, pj, ci, cj);
1455  error = true;
1456  }
1457  i = cur->start + cur->len;
1458  prev = cur;
1459  }
1460  if (error) {
1461  RZ_LOG_WARN("Parsing errors in asm str: %s\n", rz_strbuf_get(toks->str));
1462  }
1463 }
1464 
1473  rz_return_val_if_fail(asm_str && patterns, NULL);
1474 
1475  const char *str = rz_strbuf_get(asm_str);
1476  RzRegexMatch m[1];
1477  size_t j = 0; // Offset into str. Regex patterns are only searched in substring str[j:].
1478  st64 i = 0; // Start of token in str.
1479  st64 s = 0; // Start of matched token in substring str[j:]
1480  st64 l = 0; // Length of token.
1482  void **it;
1484  RzAsmTokenPattern *pat = *it;
1485  rz_return_val_if_fail(pat && pat->regex, NULL);
1486  j = 0;
1487  if (!pat->regex) {
1488  continue;
1489  }
1490  while (rz_regex_exec(pat->regex, str + j, 1, m, 0) == 0) {
1491  s = m[0].rm_so; // Token start in substring str[j:]
1492  l = m[0].rm_eo - s; // (End in substring str[j:]) - (start in substring str[j:]) = Length of token.
1493  i = j + s; // Start of token in str.
1494  if (overlaps_with_token(toks->tokens, i, i + l - 1)) {
1495  // If this is true a token with higher priority was matched before.
1496  j = i + l;
1497  continue;
1498  }
1499  if (!is_num(str + i)) {
1500  add_token(toks, i, l, pat->type, 0);
1501  j = i + l;
1502  continue;
1503  }
1504  add_token(toks, i, l, pat->type, strtoull(str + i, NULL, 0));
1505  j = i + l;
1506  }
1507  }
1508 
1510  check_token_coverage(toks);
1511 
1512  return toks;
1513 }
1514 
1523 static size_t seek_to_end_of_token(const char *str, size_t i, RzAsmTokenType type) {
1525  size_t j = i;
1526 
1527  switch (type) {
1528  default:
1530  break;
1531  case RZ_ASM_TOKEN_MNEMONIC:
1532  case RZ_ASM_TOKEN_REGISTER:
1533  do {
1534  ++j;
1535  } while (is_alpha_num(str + j));
1536  break;
1537  case RZ_ASM_TOKEN_NUMBER:
1538  do {
1539  if (rz_num_is_hex_prefix(str + j)) {
1540  j += 2;
1541  } else {
1542  ++j;
1543  }
1544  } while (is_num(str + j));
1545  break;
1547  do {
1548  ++j;
1549  } while (is_separator(str + j));
1550  break;
1551  case RZ_ASM_TOKEN_OPERATOR:
1552  do {
1553  ++j;
1554  } while (is_operator(str + j));
1555  break;
1556  case RZ_ASM_TOKEN_UNKNOWN:
1557  do {
1558  ++j;
1559  } while (!isascii(*(str + j)) && !is_operator(str + j) && !is_separator(str + j) && !is_alpha_num(str + j));
1560  }
1561  return j - i;
1562 }
1563 
1574  rz_return_val_if_fail(asm_str, NULL);
1575  if (rz_strbuf_is_empty(asm_str)) {
1576  return NULL;
1577  }
1578  // Splitting the asm string into tokens is relatively straight forward.
1579  //
1580  // The target is to split an asm string into separate tokens of a given type.
1581  // For example:
1582  //
1583  // Asm string: `mov eax, 0x122`
1584  //
1585  // is split into:
1586  // `mov` : Mnemonic token
1587  // ` ` : Separator token
1588  // `eax` : Register token
1589  // `, ` : Separator token
1590  // `0x122` : Number token
1591  //
1592  // In order to do this we associated a certain characters with a token type.
1593  //
1594  // E.g. alphanumeric characters are associated with numbers, registers and mnemonics.
1595  // Comma and brackets are interpreted as separators.
1596  // Plus, minus and pipe are associated with the operator token type and so forth.
1597  //
1598  // A sequence of characters of the same type are interpreted as a token.
1599  //
1600  // For example: `lr` could be a mnemonic or a special register.
1601  //
1602  // In this generic method we ignore these ambiguities and parse the first alphabetic token always as mnemonic
1603  // and alphabetic tokens after that as registers/unknowns.
1604  //
1605  // To extract the tokens we set the following variables:
1606  // `i = 0` // Start of token
1607  // `l = 0` // Length of token.
1608  // `i + l` // Is the start of the next token.
1609  //
1610  // Parsing is done sequentially:
1611  // - The character at `str[i]` determines the token type.
1612  // - Iterate over characters from `i` on and stop if a character of another token type appears (char at `str[l]`).
1613  // - Create token from `i` to `l-1` with length `l`.
1614  // - Start again from `i + l`
1615 
1616  const char *str = rz_strbuf_get(asm_str);
1617  if (!str) {
1618  return NULL;
1619  }
1621  if (!toks) {
1622  return NULL;
1623  }
1624  // Start of token.
1625  size_t i = 0;
1626  // Length of token.
1627  size_t l = 0;
1628  // Set flag once the mnemonic was parsed
1629  // The mnemonic is the first token in our string which ends with an ' '
1630  // Some mnemonics are not at the beginning of the string
1631  // and have only hexadecimal digits. It is too complicated to handle those.
1632  // In this case the plugin should build its own token strings.
1633  bool mnemonic_parsed = false;
1634 
1635  while (str[i]) {
1636  // Alphanumeric tokens
1637  if (is_alpha_num(str + i)) {
1638  bool is_number = false;
1639  bool prefix_less_hex = false;
1640  if (isxdigit(*(str + i)) && mnemonic_parsed) {
1641  // Registers, mnemonics and hexadecimal numbers can be ambiguous.
1642  // E.g. "eax" could be parsed as hex number token "ea".
1643  // "ac0" could be a prefixless hexnumber or a register.
1644  // To solve this we do:
1645  //
1646  // Step 1:
1647  // Here we check try to parse a number and check:
1648  // A. the character after the number token
1649  // B. if the number token starts with the hex prefix "0x"
1650  // Step 2:
1651  // A: If the char after the number token is an alphabetic char (like the "x" in "eax"),
1652  // the token isn't a number.
1653  // B: If it could be a hex number but has no prefix, a flag is set.
1654  // In this case we only mark it as number if it is not in the register profile.
1655 
1657  if (!str[i + l]) { // End of asm string => token is number.
1658  prefix_less_hex = !rz_num_is_hex_prefix(str + i);
1659  is_number = true;
1660  } else if (!isalpha(str[i + l])) { // Next char is something non alphabetic => Treat as number.
1661  prefix_less_hex = !rz_num_is_hex_prefix(str + i);
1662  is_number = true;
1663  }
1664  }
1665 
1666  if (is_number && !prefix_less_hex) {
1667  // Parse numbers which are defintly a number.
1668  add_token(toks, i, l, RZ_ASM_TOKEN_NUMBER, strtoull(str + i, NULL, 0));
1669  } else if (mnemonic_parsed) {
1671  char *op_name = rz_str_ndup(str + i, l);
1672  if (param && is_register(op_name, param->reg_sets)) {
1673  add_token(toks, i, l, RZ_ASM_TOKEN_REGISTER, 0);
1674  } else if (prefix_less_hex) {
1675  // It wasn't a register but still could be a prefixless hex number.
1676  add_token(toks, i, l, RZ_ASM_TOKEN_NUMBER, strtoull(str + i, NULL, 0));
1677  } else {
1678  // Didn't match any of the before. Mark as unknown.
1679  add_token(toks, i, l, RZ_ASM_TOKEN_UNKNOWN, 0);
1680  }
1681  free(op_name);
1682  } else {
1683  mnemonic_parsed = true;
1685  if (*(str + i + l) != ' ') {
1686  // Mnemonics can contain dots and other separators.
1687  // Example ARM asm string: "adc.w r8, sb, sl, ror 31"
1688  // Here we seek past the first separator.
1690  }
1691  add_token(toks, i, l, RZ_ASM_TOKEN_MNEMONIC, 0);
1692  }
1693  } else if (is_operator(str + i)) {
1695  add_token(toks, i, l, RZ_ASM_TOKEN_OPERATOR, 0);
1696  } else if (is_separator(str + i)) {
1698  add_token(toks, i, l, RZ_ASM_TOKEN_SEPARATOR, 0);
1699  } else {
1700  // Unknown tokens. UTF-8 and others.
1702  add_token(toks, i, l, RZ_ASM_TOKEN_UNKNOWN, 0);
1703  }
1704  i = i + l;
1705  }
1706  return toks;
1707 }
1708 
1719  rz_return_val_if_fail(asm_str, NULL);
1720 
1721  return tokenize_asm_generic(asm_str, param);
1722 }
1723 
1742  RzStrBuf *colored_asm;
1743  RzAsmTokenString *ts;
1744  if (toks) {
1745  colored_asm = rz_print_colorize_asm_str(p, toks);
1746  } else {
1747  ts = rz_asm_tokenize_asm_string(asm_str, param);
1748  ts->op_type = param ? param->ana_op_type : 0;
1749  colored_asm = rz_print_colorize_asm_str(p, ts);
1750  }
1751  if (!toks) {
1753  }
1754  return colored_asm;
1755 }
1756 
1764  if (!reg) {
1765  return NULL;
1766  }
1768  param->reg_sets = reg->regset;
1769  param->ana_op_type = ana_op_type;
1770  return param;
1771 }
size_t len
Definition: 6502dis.c:15
ut8 op
Definition: 6502dis.c:13
RZ_API void * rz_asm_code_free(RzAsmCode *acode)
Definition: acode.c:11
RZ_API bool rz_asm_code_set_equ(RzAsmCode *code, const char *key, const char *value)
Definition: acode.c:38
RZ_API RzAsmCode * rz_asm_code_new(void)
Definition: acode.c:7
RZ_API char * rz_asm_code_equ_replace(RzAsmCode *code, char *str)
Definition: acode.c:58
#define e(frag)
static ut32 cpu[32]
Definition: analysis_or1k.c:21
RZ_API void rz_asm_op_fini(RzAsmOp *op)
Definition: aop.c:21
RZ_API void rz_asm_op_set_buf(RzAsmOp *op, const ut8 *buf, int len)
Definition: aop.c:83
RZ_API void rz_asm_op_init(RzAsmOp *op)
Definition: aop.c:15
RZ_API int rz_asm_op_set_hex(RzAsmOp *op, const char *str)
Definition: aop.c:58
RZ_API void rz_asm_op_set_asm(RzAsmOp *op, const char *str)
Definition: aop.c:53
lzma_index ** i
Definition: index.h:629
ut16 val
Definition: armass64_const.h:6
static bool overlaps_with_token(RZ_BORROW RzVector *toks, const size_t s, const size_t e)
Checks if indicies s, e overlap with other tokens start/end.
Definition: asm.c:1402
static RZ_OWN RzAsmToken * asm_token_create(const size_t start, const size_t len, const RzAsmTokenType type, const ut64 val)
Creates a token and returns it.
Definition: asm.c:1352
static char * directives[]
Definition: asm.c:89
RZ_API int rz_asm_mnemonics_byname(RzAsm *a, const char *name)
Definition: asm.c:1255
static int rz_asm_pseudo_byte(RzAsmOp *op, char *input)
Definition: asm.c:189
RZ_API char * rz_asm_to_string(RzAsm *a, ut64 addr, const ut8 *b, int l)
Definition: asm.c:1200
RZ_API RzAsmCode * rz_asm_mdisassemble(RzAsm *a, const ut8 *buf, int len)
Definition: asm.c:743
RZ_API RZ_OWN RzAsmTokenString * rz_asm_tokenize_asm_regex(RZ_BORROW RzStrBuf *asm_str, RzPVector *patterns)
Splits an asm string into tokens by using the given regex patterns.
Definition: asm.c:1472
static bool is_separator(const char *c)
Definition: asm.c:53
static bool is_num(const char *c)
Checks if the first character of c is a digit character OR if the first two chars are a hex prefix.
Definition: asm.c:30
RZ_API bool rz_asm_add(RzAsm *a, RzAsmPlugin *p)
Definition: asm.c:333
RZ_API void rz_asm_list_directives(void)
Definition: asm.c:687
static bool is_alpha_num(const char *c)
Checks if the first character of c is an alphanumeric character OR if it is a hex prefix.
Definition: asm.c:45
static int rz_asm_pseudo_bits(RzAsm *a, char *input)
Definition: asm.c:137
static char * replace_directives(char *str)
Definition: asm.c:676
RZ_API void rz_asm_free(RzAsm *a)
Definition: asm.c:315
static int rz_asm_pseudo_string(RzAsmOp *op, char *input, int zero)
Definition: asm.c:112
RZ_LIB_VERSION(rz_asm)
static bool has_bits(RzAsmPlugin *h, int bits)
Definition: asm.c:484
RZ_API int rz_asm_syntax_from_string(const char *name)
Definition: asm.c:1227
static int rz_asm_pseudo_arch(RzAsm *a, char *input)
Definition: asm.c:129
RZ_API bool rz_asm_set_big_endian(RzAsm *a, bool b)
Definition: asm.c:496
RZ_API bool rz_asm_is_valid(RzAsm *a, const char *name)
Definition: asm.c:349
static void set_plugin_configs(RZ_BORROW RzAsm *rz_asm, RZ_BORROW RzConfig *pcfg)
Copies all config nodes in pcfg to the config in rz_asm.
Definition: asm.c:386
RZ_API RzAsm * rz_asm_new(void)
Definition: asm.c:262
RZ_API void rz_asm_token_pattern_free(void *p)
Definition: asm.c:1333
static RzAsmPlugin * asm_static_plugins[]
Definition: asm.c:95
static void * __dup_val(const void *v)
Definition: asm.c:810
static void __flag_free_kv(HtPPKv *kv)
Definition: asm.c:805
RZ_DEPRECATE RZ_API int rz_asm_set_bits(RzAsm *a, int bits)
Definition: asm.c:488
RZ_API RZ_OWN RzAsmParseParam * rz_asm_get_parse_param(RZ_NULLABLE const RzReg *reg, ut32 ana_op_type)
Does all kinds of NULL checks on the parameters and returns an initialized RzAsmParseParam or NULL on...
Definition: asm.c:1763
RZ_API bool rz_asm_setup(RzAsm *a, const char *arch, int bits, int big_endian)
Definition: asm.c:283
static void check_token_coverage(RzAsmTokenString *toks)
Definition: asm.c:1426
#define isavrseparator(x)
RZ_API char * rz_asm_describe(RzAsm *a, const char *str)
Definition: asm.c:1187
static void unset_plugins_config(RZ_BORROW RzAsm *rz_asm, RZ_BORROW RzConfig *pcfg)
Deletes all copies of pcfg nodes in the RzConfig from rz_asm.
Definition: asm.c:406
static void plugin_fini(RzAsm *a)
Definition: asm.c:256
static size_t seek_to_end_of_token(const char *str, size_t i, RzAsmTokenType type)
Seeks to the end of the token at str + i and returns the length of it.
Definition: asm.c:1523
RZ_API int rz_asm_assemble(RzAsm *a, RzAsmOp *op, const char *buf)
Definition: asm.c:697
RZ_API RzAsmCode * rz_asm_rasm_assemble(RzAsm *a, const char *buf, bool use_spp)
Definition: asm.c:1270
int(* Ase)(RzAsm *a, RzAsmOp *op, const char *buf)
Definition: asm.c:610
static Ase findAssembler(RzAsm *a, const char *kw)
Definition: asm.c:619
RZ_API bool rz_asm_set_syntax(RzAsm *a, int syntax)
Definition: asm.c:518
RZ_DEPRECATE RZ_API RZ_OWN RzStrBuf * rz_asm_colorize_asm_str(RZ_BORROW RzStrBuf *asm_str, RZ_BORROW RzPrint *p, RZ_NULLABLE const RzAsmParseParam *param, RZ_NULLABLE const RzAsmTokenString *toks)
Colors a given asm string and returns it. If toks is not NULL it uses the tokens to color the asm str...
Definition: asm.c:1741
RZ_API char * rz_asm_mnemonics(RzAsm *a, int id, bool json)
Definition: asm.c:1247
#define STAGES
RZ_DEPRECATE RZ_API void rz_asm_set_cpu(RzAsm *a, const char *cpu)
Definition: asm.c:477
static int cmp_tokens(const RzAsmToken *a, const RzAsmToken *b)
Definition: asm.c:1416
RZ_API int rz_asm_set_pc(RzAsm *a, ut64 pc)
Definition: asm.c:533
RZ_API RZ_OWN RzAsmTokenString * rz_asm_token_string_new(const char *asm_str)
Definition: asm.c:1295
RZ_API RzAsmCode * rz_asm_massemble(RzAsm *a, const char *assembly)
Definition: asm.c:814
RZ_API bool rz_asm_use(RzAsm *a, const char *name)
Puts an Asm plugin in use and disables the previous one.
Definition: asm.c:429
static void parseHeap(RzParse *p, RzStrBuf *s)
Definition: asm.c:97
static bool assemblerMatches(RzAsm *a, RzAsmPlugin *h)
Definition: asm.c:612
static void add_token(RZ_OUT RzAsmTokenString *toks, const size_t i, const size_t l, const RzAsmTokenType type, const ut64 token_val)
Creates a token and adds it to the token string vector toks.
Definition: asm.c:1381
static int rz_asm_pseudo_intN(RzAsm *a, RzAsmOp *op, char *input, int n)
Definition: asm.c:150
RZ_API bool rz_asm_set_arch(RzAsm *a, const char *name, int bits)
Definition: asm.c:1195
RZ_API RzAsmCode * rz_asm_mdisassemble_hexstr(RzAsm *a, RzParse *p, const char *hexstr)
Definition: asm.c:783
static int rz_asm_pseudo_int64(RzAsm *a, RzAsmOp *op, char *input)
Definition: asm.c:185
RZ_API int rz_asm_sub_names_input(RzAsm *a, const char *f)
Definition: asm.c:290
static int rz_asm_pseudo_int16(RzAsm *a, RzAsmOp *op, char *input)
Definition: asm.c:177
static int rz_asm_pseudo_fill(RzAsmOp *op, char *input)
Definition: asm.c:208
RZ_API RzList * rz_asm_get_plugins(RzAsm *a)
Definition: asm.c:1191
static bool is_operator(const char *c)
Definition: asm.c:64
RZ_DEPRECATE RZ_API RZ_OWN RzAsmTokenString * rz_asm_tokenize_asm_string(RZ_BORROW RzStrBuf *asm_str, RZ_NULLABLE const RzAsmParseParam *param)
Parses an asm string generically. It parses the string like: <mnemmonic> <op>, <op>....
Definition: asm.c:1718
RZ_API int rz_asm_disassemble(RzAsm *a, RzAsmOp *op, const ut8 *buf, int len)
Definition: asm.c:543
RZ_API int rz_asm_del(RzAsm *a, const char *name)
Definition: asm.c:344
static char * replace_directives_for(char *str, char *token)
Definition: asm.c:640
static int rz_asm_pseudo_incbin(RzAsmOp *op, char *input)
Definition: asm.c:227
RZ_API RZ_OWN RzAsmTokenString * rz_asm_token_string_clone(RZ_OWN RZ_NONNULL RzAsmTokenString *toks)
Definition: asm.c:1315
static bool is_register(const char *name, RZ_BORROW const RzRegSet *regset)
Definition: asm.c:71
static bool __isInvalid(RzAsmOp *op)
Definition: asm.c:538
RZ_API bool rz_asm_use_assembler(RzAsm *a, const char *name)
Definition: asm.c:363
RZ_API int rz_asm_sub_names_output(RzAsm *a, const char *f)
Definition: asm.c:303
RZ_API ut8 * rz_asm_from_string(RzAsm *a, ut64 addr, const char *b, int *l)
Definition: asm.c:1213
static int rz_asm_pseudo_align(RzAsmCode *acode, RzAsmOp *op, char *input)
Definition: asm.c:107
RZ_API void rz_asm_token_string_free(RZ_OWN RzAsmTokenString *toks)
Definition: asm.c:1306
static RZ_OWN RzAsmTokenString * tokenize_asm_generic(RZ_BORROW RzStrBuf *asm_str, RZ_NULLABLE const RzAsmParseParam *param)
Parses an asm string into tokens.
Definition: asm.c:1573
static int rz_asm_pseudo_org(RzAsm *a, char *input)
Definition: asm.c:145
static int rz_asm_pseudo_int32(RzAsm *a, RzAsmOp *op, char *input)
Definition: asm.c:181
RZ_API int rz_asm_get_offset(RzAsm *a, int type, int idx)
Definition: asm.c:1180
static SblHeader sb
Definition: bin_mbn.c:26
int bits(struct state *s, int need)
Definition: blast.c:72
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
#define RZ_ASM_STATIC_PLUGINS
Definition: config.h:21
static int value
Definition: cmd_api.c:93
RZ_API bool rz_config_rm(RzConfig *cfg, RZ_NONNULL const char *name)
Definition: config.c:392
RZ_API bool rz_config_add_node(RZ_BORROW RzConfig *cfg, RZ_OWN RzConfigNode *node)
Appends the given node to the config cfg.
Definition: config.c:362
RZ_API void rz_config_lock(RzConfig *cfg, int l)
Definition: config.c:476
RZ_API RZ_OWN RzConfigNode * rz_config_node_clone(RzConfigNode *n)
Definition: config.c:20
#define RZ_API
#define NULL
Definition: cris-opc.c:27
int mod(int a, int b)
Definition: crypto_rot.c:8
_Use_decl_annotations_ int __cdecl printf(const char *const _Format,...)
Definition: cs_driver.c:93
cs_arch arch
Definition: cstool.c:13
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
static int is_number(const ut8 *buf, int size)
Definition: data.c:37
uint16_t ut16
uint32_t ut32
const char * v
Definition: dsignal.c:12
RZ_API char * sdb_fmt(const char *fmt,...)
Definition: fmt.c:26
void skip(file *in, unsigned n)
Definition: gzappend.c:202
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
RZ_API const KEY_TYPE bool * found
Definition: ht_inc.h:130
voidpf void uLong size
Definition: ioapi.h:138
voidpf void * buf
Definition: ioapi.h:138
snprintf
Definition: kernel.h:364
#define reg(n)
uint8_t ut8
Definition: lh5801.h:11
return memset(p, 0, total)
void * p
Definition: libc.cpp:67
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
RZ_API RZ_OWN RzList * rz_list_newf(RzListFree f)
Returns a new initialized RzList pointer and sets the free method.
Definition: list.c:248
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137
void * realloc(void *ptr, size_t size)
Definition: malloc.c:144
void * malloc(size_t size)
Definition: malloc.c:123
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode static oldfd struct tms static buf static getgid static geteuid const char static filename static arg static mask struct ustat static ubuf static getppid static setsid static egid sigset_t static set struct timeval struct timezone static tz fd_set fd_set fd_set struct timeval static timeout const char char static bufsiz const char static swapflags void static offset const char static length static mode static who const char struct statfs static buf unsigned unsigned num
Definition: sflib.h:126
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
int x
Definition: mipsasm.c:20
int n
Definition: mipsasm.c:19
int type
Definition: mipsasm.c:17
Definition: conf.py:1
int idx
Definition: setup.py:197
#define _(String)
Definition: opintl.h:53
int off
Definition: pal.c:13
RZ_API void rz_parse_free(RzParse *p)
Definition: parse.c:40
RZ_API char * rz_parse_pseudocode(RzParse *p, const char *assembly)
Converts the assembly line into pseudocode.
Definition: parse.c:107
RZ_API RzParse * rz_parse_new(void)
Definition: parse.c:16
RZ_API bool rz_parse_use(RzParse *p, const char *name)
Definition: parse.c:56
RZ_API RZ_OWN RzStrBuf * rz_print_colorize_asm_str(RZ_BORROW RzPrint *p, const RzAsmTokenString *toks)
Colorizes a tokenized asm string.
Definition: print.c:1613
static void repeat(struct parse *, sopno, int, int)
Definition: regcomp.c:1155
static RzSocket * s
Definition: rtr.c:28
@ RZ_ASM_SYNTAX_REGNUM
Definition: rz_asm.h:53
@ RZ_ASM_SYNTAX_ATT
Definition: rz_asm.h:51
@ RZ_ASM_SYNTAX_INTEL
Definition: rz_asm.h:50
@ RZ_ASM_SYNTAX_MASM
Definition: rz_asm.h:52
@ RZ_ASM_SYNTAX_JZ
Definition: rz_asm.h:54
#define rz_warn_if_reached()
Definition: rz_assert.h:29
#define rz_return_if_fail(expr)
Definition: rz_assert.h:100
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API RZ_OWN RzBuffer * rz_buf_new_with_string(RZ_NONNULL const char *msg)
Creates a new buffer from a string.
Definition: buf.c:570
RZ_API void rz_buf_free(RzBuffer *b)
Free all internal data hold by the buffer and the buffer.
Definition: buf.c:1253
RZ_API ut64 rz_buf_size(RZ_NONNULL RzBuffer *b)
Return the size of the buffer.
Definition: buf.c:1225
RZ_API RZ_OWN char * rz_buf_to_string(RZ_NONNULL RzBuffer *b)
Stringify the buffer.
Definition: buf.c:642
static ut16 rz_read_le16(const void *src)
Definition: rz_endian.h:206
static ut32 rz_read_le32(const void *src)
Definition: rz_endian.h:239
static void rz_write_ble32(void *dest, ut32 val, bool big_endian)
Definition: rz_endian.h:540
static void rz_write_ble64(void *dest, ut64 val, bool big_endian)
Definition: rz_endian.h:544
static void rz_write_ble16(void *dest, ut16 val, bool big_endian)
Definition: rz_endian.h:532
RZ_API RZ_OWN char * rz_file_slurp(const char *str, RZ_NULLABLE size_t *usz)
Definition: file.c:454
RZ_API int rz_hex_str2bin(const char *in, ut8 *out)
Convert an input string in into the binary form in out.
Definition: hex.c:444
#define RZ_LOG_WARN(fmtstr,...)
Definition: rz_log.h:56
#define RZ_LOG_DEBUG(fmtstr,...)
Definition: rz_log.h:49
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API void rz_mem_copybits_delta(ut8 *dst, int doff, const ut8 *src, int soff, int bits)
Definition: mem.c:141
RZ_API ut64 rz_num_math(RzNum *num, const char *str)
Definition: unum.c:456
RZ_API bool rz_num_is_hex_prefix(const char *p)
Checks if the first two chars of p equal "0x".
Definition: unum.c:17
RZ_API RZ_OWN char * rz_path_system(RZ_NULLABLE const char *path)
Return the full system path of the given subpath path.
Definition: path.c:162
RzAsmTokenType
Definition: rz_print.h:45
@ RZ_ASM_TOKEN_MNEMONIC
Definition: rz_print.h:47
@ RZ_ASM_TOKEN_REGISTER
Definition: rz_print.h:50
@ RZ_ASM_TOKEN_OPERATOR
Definition: rz_print.h:48
@ RZ_ASM_TOKEN_NUMBER
Definition: rz_print.h:49
@ RZ_ASM_TOKEN_SEPARATOR
Definition: rz_print.h:51
@ RZ_ASM_TOKEN_UNKNOWN
Definition: rz_print.h:46
@ RZ_REG_TYPE_LAST
Definition: rz_reg.h:34
RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch, RzRegexMatch __pmatch[], int eflags)
Definition: regexec.c:149
RZ_API void rz_regex_free(RzRegex *)
Definition: regcomp.c:249
RZ_API char * rz_str_newf(const char *fmt,...) RZ_PRINTF_CHECK(1
RZ_API char * rz_str_ndup(RZ_NULLABLE const char *ptr, int len)
Create new copy of string ptr limited to size len.
Definition: str.c:1006
RZ_API int rz_str_word_count(const char *string)
Definition: str.c:643
RZ_API void rz_str_case(char *str, bool up)
Definition: str.c:341
RZ_API size_t rz_str_ncpy(char *dst, const char *src, size_t n)
Secure string copy with null terminator.
Definition: str.c:923
RZ_API char * rz_str_replace(char *str, const char *key, const char *val, int g)
Definition: str.c:1110
RZ_API int rz_str_word_set0(char *str)
Definition: str.c:423
RZ_API void rz_str_trim(RZ_NONNULL RZ_INOUT char *str)
Removes whitespace characters (space, tab, newline etc.) from the beginning and end of a string.
Definition: str_trim.c:190
RZ_API bool rz_str_startswith(RZ_NONNULL const char *str, RZ_NONNULL const char *needle)
Checks if a string starts with a specifc sequence of characters (case sensitive)
Definition: str.c:3286
RZ_API int rz_str_replace_char(char *s, int a, int b)
Definition: str.c:169
RZ_API int rz_str_unescape(char *buf)
Definition: str.c:1300
RZ_API const char * rz_str_word_get0(const char *str, int idx)
Definition: str.c:598
RZ_API const char * rz_str_closer_chr(const char *b, const char *s)
Definition: str.c:3111
#define IS_SEPARATOR(x)
Definition: rz_str_util.h:6
RZ_API RZ_OWN char * rz_strbuf_drain(RzStrBuf *sb)
Definition: strbuf.c:342
RZ_API const char * rz_strbuf_set(RzStrBuf *sb, const char *s)
Definition: strbuf.c:153
RZ_API char * rz_strbuf_get(RzStrBuf *sb)
Definition: strbuf.c:321
RZ_API bool rz_strbuf_append(RzStrBuf *sb, const char *s)
Definition: strbuf.c:222
RZ_API RzStrBuf * rz_strbuf_new(const char *s)
Definition: strbuf.c:8
RZ_API void rz_strbuf_free(RzStrBuf *sb)
Definition: strbuf.c:358
RZ_API bool rz_strbuf_appendf(RzStrBuf *sb, const char *fmt,...) RZ_PRINTF_CHECK(2
RZ_API void rz_strbuf_init(RzStrBuf *sb)
Definition: strbuf.c:33
RZ_API bool rz_strbuf_is_empty(RzStrBuf *sb)
Definition: strbuf.c:24
RZ_API int rz_strbuf_length(RzStrBuf *sb)
Definition: strbuf.c:28
#define RZ_SYS_ENDIAN_BIG
Definition: rz_types.h:527
#define RZ_NULLABLE
Definition: rz_types.h:65
#define RZ_OWN
Definition: rz_types.h:62
#define RZ_SYS_ENDIAN_LITTLE
Definition: rz_types.h:526
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_SYS_ENDIAN_BI
Definition: rz_types.h:528
#define RZ_SYS_ENDIAN_NONE
Definition: rz_types.h:525
#define RZ_OUT
Definition: rz_types.h:51
#define RZ_SYS_BITS
Definition: rz_types.h:520
#define RZ_NONNULL
Definition: rz_types.h:64
#define RZ_NEW(x)
Definition: rz_types.h:285
#define RZ_ARRAY_SIZE(x)
Definition: rz_types.h:300
#define PFMT64x
Definition: rz_types.h:393
#define PFMT32d
Definition: rz_types.h:408
#define RZ_BORROW
Definition: rz_types.h:63
#define RZ_DEPRECATE
Definition: rz_types.h:66
#define RZ_MIN(x, y)
#define st64
Definition: rz_types_base.h:10
#define RZ_MAX(x, y)
#define RZ_BETWEEN(x, y, z)
#define st16
Definition: rz_types_base.h:14
#define st32
Definition: rz_types_base.h:12
#define RZ_SDB_OPCODES
Definition: rz_userconf.h:84
RZ_API void * rz_vector_push(RzVector *vec, void *x)
Definition: vector.c:197
#define rz_vector_foreach(vec, it)
Definition: rz_vector.h:169
RZ_API void rz_vector_free(RzVector *vec)
Definition: vector.c:75
RZ_API RzVector * rz_vector_clone(RzVector *vec)
Definition: vector.c:101
RZ_API RzVector * rz_vector_new(size_t elem_size, RzVectorFree free, void *free_user)
Definition: vector.c:42
static size_t rz_vector_len(const RzVector *vec)
Definition: rz_vector.h:82
int(* RzVectorComparator)(const void *a, const void *b)
Definition: rz_vector.h:41
RZ_API void rz_vector_sort(RzVector *vec, RzVectorComparator cmp, bool reverse)
Sort function for RzVector.
Definition: vector.c:285
#define rz_pvector_foreach(vec, it)
Definition: rz_vector.h:334
#define isspace(c)
Definition: safe-ctype.h:141
#define isalpha(c)
Definition: safe-ctype.h:125
#define isxdigit(c)
Definition: safe-ctype.h:145
RZ_API Sdb * sdb_new(const char *path, const char *name, int lock)
Definition: sdb.c:47
RZ_API char * sdb_get(Sdb *s, const char *key, ut32 *cas)
Definition: sdb.c:290
RZ_API bool sdb_free(Sdb *s)
Definition: sdb.c:206
static int
Definition: sfsocketcall.h:114
int size_t
Definition: sftypes.h:40
#define b(i)
Definition: sha256.c:42
#define f(i)
Definition: sha256.c:46
#define c(i)
Definition: sha256.c:43
#define a(i)
Definition: sha256.c:41
#define h(i)
Definition: sha256.c:48
S_API void spp_eval(char *buf, Output *out)
Definition: spp.c:109
S_API void spp_proc_set(SppProc *p, const char *arg, int fail)
Definition: spp.c:298
Definition: spp.h:92
Definition: spp.h:128
ut32 ana_op_type
Analysis op type (see: _RzAnalysisOpType) of the token string to parse.
Definition: rz_print.h:80
const RzRegSet * reg_sets
Array of reg sets used to lookup register names during parsing.
Definition: rz_print.h:79
Pattern for a asm string token.
Definition: rz_print.h:86
RzAsmTokenType type
Definition: rz_print.h:87
char * pattern
Definition: rz_print.h:88
RzRegex * regex
Definition: rz_print.h:89
An tokenized asm string.
Definition: rz_print.h:72
RzVector * tokens
Definition: rz_print.h:75
ut32 op_type
RzAnalysisOpType. Mnemonic color depends on this.
Definition: rz_print.h:73
RzStrBuf * str
Definition: rz_print.h:74
A token of an asm string holding meta data.
Definition: rz_print.h:60
size_t len
Definition: rz_print.h:62
size_t start
Definition: rz_print.h:61
union RzAsmToken::@310 val
RzAsmTokenType type
Definition: rz_print.h:63
ut64 number
Definition: rz_print.h:65
Definition: inftree9.h:24
Definition: gzappend.c:170
Definition: z80asm.h:102
ut64 code_offset
Definition: rz_asm.h:86
char * assembly
Definition: rz_asm.h:81
ut8 * bytes
Definition: rz_asm.h:80
int code_align
Definition: rz_asm.h:88
RzList * equs
Definition: rz_asm.h:85
ut64 data_offset
Definition: rz_asm.h:87
RzConfig * config
Definition: rz_core.h:300
void * data
Definition: rz_list.h:14
struct Proc * proc
RZ_API void rz_syscall_free(RzSyscall *s)
Frees an RzSyscall type.
Definition: syscall.c:79
RZ_API int rz_syscall_get_num(RzSyscall *s, const char *str)
Definition: syscall.c:376
RZ_API bool rz_syscall_setup(RzSyscall *s, const char *arch, int bits, const char *cpu, const char *os)
Definition: syscall.c:234
#define fail(test)
Definition: tests.h:29
Definition: dis.c:32
void error(const char *msg)
Definition: untgz.c:593
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int sp
Definition: z80asm.c:91
static int addr
Definition: z80asm.c:58
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)