Rizin
unix-like reverse engineering framework and cli tools
parse_common.c File Reference

Go to the source code of this file.

Classes

struct  RzPseudoGrammar
 
struct  RzPseudoDirect
 
struct  RzPseudoReplace
 
struct  RzPseudoConfig
 

Macros

#define RZ_PSEUDO_DEFINE_GRAMMAR(x, y)    { .mnemonic = x, .mnemonic_length = sizeof(x) - 1, .grammar = y }
 
#define RZ_PSEUDO_DEFINE_DIRECT(x, y)    { .expected = x, .pseudo = y }
 
#define RZ_PSEUDO_DEFINE_REPLACE(x, y, f)    { .expected = x, .replace = y, .flag = f }
 
#define RZ_PSEUDO_DEFINE_CONFIG(d, l, r, m, t)
 
#define RZ_PSEUDO_DEFINE_CONFIG_NO_DIRECT(l, r, m, t)
 
#define RZ_PSEUDO_DEFINE_CONFIG_ONLY_LEXICON(l, m, t)
 

Functions

static bool rz_pseudo_convert (const RzPseudoConfig *config, const char *assembly, RzStrBuf *sb)
 

Detailed Description

This file contains a common code that can be used to convert any asm code into a pseudo code, via a generic grammar.

The grammar is quite simple; Let's take a simple example

Let's take the following assembly ; intel x86 asm ; rax = rax + 10 add rax, 10

The associated grammar will be "1 += 2" the number 1 will be changed to "rax" and 2 with "10"

another example:

; mips asm ; t0 = 4097 << 16 lui t0, 4097

The associated grammar will be "1 = 2 << #16" to notice the # symbol. The # symbol is used to ignore any set of chars after this till next whitespace/end of the line

the developer has to provide a tokenize method to split the assembly in various token strings and

Definition in file parse_common.c.

Macro Definition Documentation

◆ RZ_PSEUDO_DEFINE_CONFIG

#define RZ_PSEUDO_DEFINE_CONFIG (   d,
  l,
  r,
  m,
 
)
Value:
{ \
.direct = d, \
.direct_length = RZ_ARRAY_SIZE(d), \
.replace = r, \
.replace_length = RZ_ARRAY_SIZE(r), \
.lexicon = l, \
.lexicon_length = RZ_ARRAY_SIZE(l), \
.max_args = m, \
.tokenize = t, \
}
#define r
Definition: crypto_rc6.c:12
#define RZ_ARRAY_SIZE(x)
Definition: rz_types.h:300
#define d(i)
Definition: sha256.c:44

Definition at line 67 of file parse_common.c.

◆ RZ_PSEUDO_DEFINE_CONFIG_NO_DIRECT

#define RZ_PSEUDO_DEFINE_CONFIG_NO_DIRECT (   l,
  r,
  m,
 
)
Value:
{ \
.direct = NULL, \
.direct_length = 0, \
.replace = r, \
.replace_length = RZ_ARRAY_SIZE(r), \
.lexicon = l, \
.lexicon_length = RZ_ARRAY_SIZE(l), \
.max_args = m, \
.tokenize = t, \
}
#define NULL
Definition: cris-opc.c:27

Definition at line 79 of file parse_common.c.

◆ RZ_PSEUDO_DEFINE_CONFIG_ONLY_LEXICON

#define RZ_PSEUDO_DEFINE_CONFIG_ONLY_LEXICON (   l,
  m,
 
)
Value:
{ \
.direct = NULL, \
.direct_length = 0, \
.replace = NULL, \
.replace_length = 0, \
.lexicon = l, \
.lexicon_length = RZ_ARRAY_SIZE(l), \
.max_args = m, \
.tokenize = t, \
}

Definition at line 91 of file parse_common.c.

◆ RZ_PSEUDO_DEFINE_DIRECT

#define RZ_PSEUDO_DEFINE_DIRECT (   x,
 
)     { .expected = x, .pseudo = y }

Definition at line 61 of file parse_common.c.

◆ RZ_PSEUDO_DEFINE_GRAMMAR

#define RZ_PSEUDO_DEFINE_GRAMMAR (   x,
 
)     { .mnemonic = x, .mnemonic_length = sizeof(x) - 1, .grammar = y }

Definition at line 58 of file parse_common.c.

◆ RZ_PSEUDO_DEFINE_REPLACE

#define RZ_PSEUDO_DEFINE_REPLACE (   x,
  y,
  f 
)     { .expected = x, .replace = y, .flag = f }

Definition at line 64 of file parse_common.c.

Function Documentation

◆ rz_pseudo_convert()

static bool rz_pseudo_convert ( const RzPseudoConfig config,
const char *  assembly,
RzStrBuf sb 
)
static

Definition at line 103 of file parse_common.c.

103  {
104  rz_return_val_if_fail(config && config->tokenize && config->lexicon, false);
105 
106  size_t i, p;
107  const char *tmp = NULL;
108  const RzPseudoGrammar *gr = NULL;
109  const RzPseudoReplace *rp = NULL;
110 
111  if (!strcmp(assembly, "invalid")) {
112  return true;
113  } else if (!strncmp(assembly, "trunc", 5)) {
114  return true;
115  } else if (!strcmp(assembly, "nop")) {
116  return true;
117  }
118  size_t length = strlen(assembly);
119 
120  for (i = 0; i < config->direct_length; ++i) {
121  tmp = config->direct[i].expected;
122  if (!strcmp(assembly, tmp)) {
123  rz_strbuf_set(sb, config->direct[i].pseudo);
124  return true;
125  }
126  }
127 
128  size_t mnemonic_length = length;
129  if ((tmp = strchr(assembly, ' '))) {
130  mnemonic_length = tmp - assembly;
131  }
132  for (i = 0; i < config->lexicon_length; ++i) {
133  gr = &config->lexicon[i];
134  if (gr->mnemonic_length == mnemonic_length && !strncmp(gr->mnemonic, assembly, mnemonic_length)) {
135  break;
136  }
137  gr = NULL;
138  }
139  if (!gr) {
140  rz_strbuf_setf(sb, "asm(\"%s\")", assembly);
141  return true;
142  }
143 
144  RzList *tokens = config->tokenize(assembly, length);
145  if (!tokens) {
146  rz_strbuf_setf(sb, "asm(\"%s\")", assembly);
147  return true;
148  }
149 
150  for (i = 0, p = 0; gr->grammar[p]; ++p) {
151  int index = gr->grammar[p] - '0';
152  if (index > 0 && index < config->max_args) {
153  tmp = (const char *)rz_list_get_n(tokens, index);
154  if (!tmp) {
155  tmp = "?";
156  }
157  rz_strbuf_append_n(sb, gr->grammar + i, p - i);
158  i = p + 1;
160  } else if (gr->grammar[p] == '#') {
161  rz_strbuf_append_n(sb, gr->grammar + i, p - i);
162  i = p + 1;
163  p++;
164  while (gr->grammar[p] && !IS_WHITESPACE(gr->grammar[p])) {
165  ++p;
166  }
167  }
168  }
169 
170  if (i < p) {
171  rz_strbuf_append_n(sb, gr->grammar + i, p - i);
172  }
173 
174  char *result = rz_strbuf_drain_nofree(sb);
175  for (int i = 0; i < config->replace_length; ++i) {
176  rp = &config->replace[i];
177  result = rz_str_replace(result, rp->expected, rp->replace, rp->flag);
178  }
179  rz_strbuf_set(sb, result);
180  free(result);
181 
182  rz_list_free(tokens);
183  return true;
184 }
lzma_index ** i
Definition: index.h:629
static SblHeader sb
Definition: bin_mbn.c:26
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
static char * rp[]
Definition: i8080dis.c:36
void * p
Definition: libc.cpp:67
RZ_API RZ_BORROW void * rz_list_get_n(RZ_NONNULL const RzList *list, ut32 n)
Returns the N-th element of the list.
Definition: list.c:574
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API char * rz_str_replace(char *str, const char *key, const char *val, int g)
Definition: str.c:1110
#define IS_WHITESPACE(x)
Definition: rz_str_util.h:13
RZ_API RZ_OWN char * rz_strbuf_drain_nofree(RzStrBuf *sb)
Definition: strbuf.c:349
RZ_API const char * rz_strbuf_set(RzStrBuf *sb, const char *s)
Definition: strbuf.c:153
RZ_API bool rz_strbuf_append(RzStrBuf *sb, const char *s)
Definition: strbuf.c:222
RZ_API const char * rz_strbuf_setf(RzStrBuf *sb, const char *fmt,...) RZ_PRINTF_CHECK(2
RZ_API bool rz_strbuf_append_n(RzStrBuf *sb, const char *s, size_t l)
Definition: strbuf.c:229
const char * mnemonic
Definition: parse_common.c:31
const char * grammar
Definition: parse_common.c:33
size_t mnemonic_length
Definition: parse_common.c:32

References free(), RzPseudoGrammar::grammar, i, IS_WHITESPACE, length, RzPseudoGrammar::mnemonic, RzPseudoGrammar::mnemonic_length, NULL, p, rp, rz_list_free(), rz_list_get_n(), rz_return_val_if_fail, rz_str_replace(), rz_strbuf_append(), rz_strbuf_append_n(), rz_strbuf_drain_nofree(), rz_strbuf_set(), rz_strbuf_setf(), sb, and autogen_x86imm::tmp.

Referenced by parse().