Rizin
unix-like reverse engineering framework and cli tools
filter.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2015 pancake <pancake@nopcode.org>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <rz_bin.h>
5 #include "i/private.h"
6 
7 static char *__hashify(char *s, ut64 vaddr) {
9 
10  char *os = s;
11  while (*s) {
12  if (!IS_PRINTABLE(*s)) {
13  if (vaddr && vaddr != UT64_MAX) {
14  char *ret = rz_str_newf("_%" PFMT64d, vaddr);
15  if (ret) {
16  free(os);
17  }
18  return ret;
19  }
20  ut32 hash = sdb_hash(s);
21  char *ret = rz_str_newf("%x", hash);
22  if (ret) {
23  free(os);
24  }
25  return ret;
26  }
27  s++;
28  }
29  return os;
30 }
31 
32 // - name should be allocated on the heap
33 RZ_API char *rz_bin_filter_name(RzBinFile *bf, HtPU *db, ut64 vaddr, char *name) {
35 
36  char *resname = name;
37  char *uname = rz_str_newf("%" PFMT64x ".%s", vaddr, name);
38  int count = 0;
39  HtPUKv *kv = ht_pu_find_kv(db, name, NULL);
40  if (kv) {
41  count = ++kv->value;
42  } else {
43  count = 1;
44  ht_pu_insert(db, name, 1ULL);
45  }
46 
47  bool found;
48  ht_pu_find(db, uname, &found);
49  if (found) {
50  // TODO: symbol is dupped, so symbol can be removed!
51  free(uname);
52  return resname;
53  }
54 
55  HtPUKv tmp = {
56  .key = uname,
57  .key_len = strlen(uname),
58  .value = 1ULL,
59  .value_len = sizeof(ut64)
60  };
61  ht_pu_insert_kv(db, &tmp, false);
62 
63  if (vaddr) {
64  char *p = __hashify(resname, vaddr);
65  if (p) {
66  resname = p;
67  }
68  }
69  if (count > 1) {
70  char *p = rz_str_appendf(resname, "_%d", count - 1);
71  if (p) {
72  resname = p;
73  }
74 
75  // two symbols at different addresses and same name
76  // eprintf ("Symbol '%s' dupped!\n", sym->name);
77  }
78  return resname;
79 }
80 
81 RZ_API void rz_bin_filter_sym(RzBinFile *bf, HtPP *ht, ut64 vaddr, RzBinSymbol *sym) {
82  rz_return_if_fail(ht && sym && sym->name);
83  const char *name = sym->dname ? sym->dname : sym->name;
84 
85  if (bf && bf->o && bf->o->lang && !sym->dname) {
86  char *dn = rz_bin_demangle(bf, NULL, name, sym->vaddr, false);
87  if (RZ_STR_ISNOTEMPTY(dn)) {
88  sym->dname = dn;
89  // extract class information from demangled symbol name
90  char *p = strchr(dn, '.');
91  if (p) {
92  if (IS_UPPER(*dn)) {
93  free(sym->classname);
94  sym->classname = strdup(dn);
95  sym->classname[p - dn] = 0;
96  } else if (IS_UPPER(p[1])) {
97  free(sym->classname);
98  sym->classname = strdup(p + 1);
99  p = strchr(sym->classname, '.');
100  if (p) {
101  *p = 0;
102  }
103  }
104  }
105  }
106  }
107 
108  const char *uname = sdb_fmt("%" PFMT64x ".%c.%s", vaddr, sym->is_imported ? 'i' : 's', name);
109  bool res = ht_pp_insert(ht, uname, sym);
110  if (!res) {
111  return;
112  }
113  sym->dup_count = 0;
114 
115  const char *oname = sdb_fmt("o.0.%c.%s", sym->is_imported ? 'i' : 's', name);
116  RzBinSymbol *prev_sym = ht_pp_find(ht, oname, NULL);
117  if (!prev_sym) {
118  if (!ht_pp_insert(ht, oname, sym)) {
119  RZ_LOG_WARN("Failed to insert dup_count in ht");
120  return;
121  }
122  } else {
123  sym->dup_count = prev_sym->dup_count + 1;
124  ht_pp_update(ht, oname, sym);
125  }
126 }
127 
128 RZ_API void rz_bin_filter_symbols(RzBinFile *bf, RzList /*<RzBinSymbol *>*/ *list) {
129  HtPP *ht = ht_pp_new0();
130  if (!ht) {
131  return;
132  }
133 
134  RzListIter *iter;
135  RzBinSymbol *sym;
136  rz_list_foreach (list, iter, sym) {
137  if (sym && sym->name && *sym->name) {
138  rz_bin_filter_sym(bf, ht, sym->vaddr, sym);
139  }
140  }
141  ht_pp_free(ht);
142 }
143 
144 RZ_API void rz_bin_filter_sections(RzBinFile *bf, RzList /*<RzBinSection *>*/ *list) {
145  RzBinSection *sec;
146  HtPU *db = ht_pu_new0();
147  RzListIter *iter;
148  rz_list_foreach (list, iter, sec) {
149  char *p = rz_bin_filter_name(bf, db, sec->vaddr, sec->name);
150  if (p) {
151  sec->name = p;
152  }
153  }
154  ht_pu_free(db);
155 }
156 
157 static bool false_positive(const char *str) {
158  int i;
159  ut8 bo[0x100];
160  int up = 0;
161  int lo = 0;
162  int ot = 0;
163  int di = 0;
164  int ln = 0;
165  int sp = 0;
166  int nm = 0;
167  for (i = 0; i < 0x100; i++) {
168  bo[i] = 0;
169  }
170  for (i = 0; str[i]; i++) {
171  if (IS_DIGIT(str[i])) {
172  nm++;
173  } else if (str[i] >= 'a' && str[i] <= 'z') {
174  lo++;
175  } else if (str[i] >= 'A' && str[i] <= 'Z') {
176  up++;
177  } else {
178  ot++;
179  }
180  if (str[i] == '\\') {
181  ot++;
182  }
183  if (str[i] == ' ') {
184  sp++;
185  }
186  bo[(ut8)str[i]] = 1;
187  ln++;
188  }
189  for (i = 0; i < 0x100; i++) {
190  if (bo[i]) {
191  di++;
192  }
193  }
194  if (ln > 2 && str[0] != '_') {
195  if (ln < 10) {
196  return true;
197  }
198  if (ot >= (nm + up + lo)) {
199  return true;
200  }
201  if (lo < 3) {
202  return true;
203  }
204  }
205  return false;
206 }
207 
208 RZ_API bool rz_bin_strpurge(RzBin *bin, const char *str, ut64 refaddr) {
209  bool purge = false;
210  if (bin->strpurge) {
211  char *addrs = strdup(bin->strpurge);
212  if (addrs) {
213  int splits = rz_str_split(addrs, ',');
214  int i;
215  char *ptr;
216  char *range_sep;
217  ut64 addr, from, to;
218  for (i = 0, ptr = addrs; i < splits; i++, ptr += strlen(ptr) + 1) {
219  if (!strcmp(ptr, "true") && false_positive(str)) {
220  purge = true;
221  continue;
222  }
223  bool bang = false;
224  if (*ptr == '!') {
225  bang = true;
226  ptr++;
227  }
228  if (!strcmp(ptr, "all")) {
229  purge = !bang;
230  continue;
231  }
232  range_sep = strchr(ptr, '-');
233  if (range_sep) {
234  *range_sep = 0;
235  from = rz_num_get(NULL, ptr);
236  ptr = range_sep + 1;
237  to = rz_num_get(NULL, ptr);
238  if (refaddr >= from && refaddr <= to) {
239  purge = !bang;
240  continue;
241  }
242  }
243  addr = rz_num_get(NULL, ptr);
244  if (addr != 0 || *ptr == '0') {
245  if (refaddr == addr) {
246  purge = !bang;
247  continue;
248  }
249  }
250  }
251  free(addrs);
252  }
253  }
254  return purge;
255 }
256 
257 static int get_char_ratio(char ch, const char *str) {
258  int i;
259  int ch_count = 0;
260  for (i = 0; str[i]; i++) {
261  if (str[i] == ch) {
262  ch_count++;
263  }
264  }
265  return i ? ch_count * 100 / i : 0;
266 }
267 
268 static bool bin_strfilter(RzBin *bin, const char *str) {
269  int i;
270  bool got_uppercase, in_esc_seq;
271  switch (bin->strfilter) {
272  case 'U': // only uppercase strings
273  got_uppercase = false;
274  in_esc_seq = false;
275  for (i = 0; str[i]; i++) {
276  signed char ch = str[i];
277  if (ch == ' ' ||
278  (in_esc_seq && (ch == 't' || ch == 'n' || ch == 'r'))) {
279  goto loop_end;
280  }
281  if (ch < 0 || IS_LOWER(ch)) {
282  return false;
283  }
284  if (IS_UPPER(ch)) {
285  got_uppercase = true;
286  }
287  loop_end:
288  in_esc_seq = in_esc_seq ? false : ch == '\\';
289  }
290  if (get_char_ratio(str[0], str) >= 60) {
291  return false;
292  }
293  if (str[0] && get_char_ratio(str[1], str) >= 60) {
294  return false;
295  }
296  if (!got_uppercase) {
297  return false;
298  }
299  break;
300  case 'a': // only alphanumeric - plain ascii
301  for (i = 0; str[i]; i++) {
302  char ch = str[i];
303  if (ch < 1 || !IS_PRINTABLE(ch)) {
304  return false;
305  }
306  }
307  break;
308  case 'e': // emails
309  if (str && *str) {
310  if (!strchr(str + 1, '@')) {
311  return false;
312  }
313  if (!strchr(str + 1, '.')) {
314  return false;
315  }
316  } else {
317  return false;
318  }
319  break;
320  case 'f': // format-string
321  if (str && *str) {
322  if (!strchr(str + 1, '%')) {
323  return false;
324  }
325  } else {
326  return false;
327  }
328  break;
329  case 'u': // URLs
330  if (!strstr(str, "://")) {
331  return false;
332  }
333  break;
334  case 'i': // IPV4
335  {
336  int segment = 0;
337  int segmentsum = 0;
338  bool prevd = false;
339  for (i = 0; str[i]; i++) {
340  char ch = str[i];
341  if (IS_DIGIT(ch)) {
342  segmentsum = segmentsum * 10 + (ch - '0');
343  if (segment == 3) {
344  return true;
345  }
346  prevd = true;
347  } else if (ch == '.') {
348  if (prevd == true && segmentsum < 256) {
349  segment++;
350  segmentsum = 0;
351  } else {
352  segmentsum = 0;
353  segment = 0;
354  }
355  prevd = false;
356  } else {
357  segmentsum = 0;
358  prevd = false;
359  segment = 0;
360  }
361  }
362  return false;
363  }
364  case 'p': // path
365  if (str[0] != '/') {
366  return false;
367  }
368  break;
369  case '8': // utf8
370  for (i = 0; str[i]; i++) {
371  char ch = str[i];
372  if (ch < 0) {
373  return true;
374  }
375  }
376  return false;
377  }
378  return true;
379 }
380 
385 RZ_API bool rz_bin_string_filter(RzBin *bin, const char *str, int len, ut64 addr) {
386  if (len >= 0 && (len < bin->minstrlen || (bin->maxstrlen > 0 && len > bin->maxstrlen))) {
387  return false;
388  }
390  return false;
391  }
392  return true;
393 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
RZ_API void rz_bin_filter_symbols(RzBinFile *bf, RzList *list)
Definition: filter.c:128
static char * __hashify(char *s, ut64 vaddr)
Definition: filter.c:7
RZ_API void rz_bin_filter_sections(RzBinFile *bf, RzList *list)
Definition: filter.c:144
RZ_API char * rz_bin_filter_name(RzBinFile *bf, HtPU *db, ut64 vaddr, char *name)
Definition: filter.c:33
static bool bin_strfilter(RzBin *bin, const char *str)
Definition: filter.c:268
static int get_char_ratio(char ch, const char *str)
Definition: filter.c:257
RZ_API void rz_bin_filter_sym(RzBinFile *bf, HtPP *ht, ut64 vaddr, RzBinSymbol *sym)
Definition: filter.c:81
static bool false_positive(const char *str)
Definition: filter.c:157
RZ_API bool rz_bin_string_filter(RzBin *bin, const char *str, int len, ut64 addr)
Definition: filter.c:385
RZ_API bool rz_bin_strpurge(RzBin *bin, const char *str, ut64 refaddr)
Definition: filter.c:208
RZ_API RZ_OWN char * rz_bin_demangle(RZ_NULLABLE RzBinFile *bf, RZ_NULLABLE const char *language, RZ_NULLABLE const char *symbol, ut64 vaddr, bool libs)
Demangles a symbol based on the language or the RzBinFile data.
Definition: bin.c:1295
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
#define ut8
Definition: dcpu16.h:8
#define false
uint32_t ut32
RZ_API char * sdb_fmt(const char *fmt,...)
Definition: fmt.c:26
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
RZ_API const KEY_TYPE bool * found
Definition: ht_inc.h:130
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
static void list(RzEgg *egg)
Definition: rz-gg.c:52
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
static const char struct stat static buf struct stat static buf static vhangup int struct rusage static rusage struct sysinfo static info unsigned static __unused uname
Definition: sflib.h:153
const char * name
Definition: op.c:541
static RzSocket * s
Definition: rtr.c:28
#define rz_return_if_fail(expr)
Definition: rz_assert.h:100
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
#define RZ_LOG_WARN(fmtstr,...)
Definition: rz_log.h:56
RZ_API ut64 rz_num_get(RzNum *num, const char *str)
Definition: unum.c:172
#define RZ_STR_ISNOTEMPTY(x)
Definition: rz_str.h:68
RZ_API char * rz_str_appendf(char *ptr, const char *fmt,...) RZ_PRINTF_CHECK(2
RZ_API char * rz_str_newf(const char *fmt,...) RZ_PRINTF_CHECK(1
RZ_API size_t rz_str_split(char *str, char ch)
Split string str in place by using ch as a delimiter.
Definition: str.c:406
#define IS_UPPER(c)
Definition: rz_str_util.h:14
#define IS_LOWER(c)
Definition: rz_str_util.h:15
#define IS_DIGIT(x)
Definition: rz_str_util.h:11
#define IS_PRINTABLE(x)
Definition: rz_str_util.h:10
#define PFMT64d
Definition: rz_types.h:394
#define PFMT64x
Definition: rz_types.h:393
#define UT64_MAX
Definition: rz_types_base.h:86
RZ_API ut32 sdb_hash(const char *key)
Definition: util.c:22
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125
int value
Definition: sparc-opc.c:1837
Definition: malloc.c:26
Definition: z80asm.h:102
XX curplugin == o->plugin.
Definition: rz_bin.h:298
RzBinObject * o
Definition: rz_bin.h:305
RzBinLanguage lang
Definition: rz_bin.h:290
char * name
Definition: rz_bin.h:619
bool is_imported
Definition: rz_bin.h:684
char * name
Definition: rz_bin.h:675
char * classname
Definition: rz_bin.h:678
char * dname
Definition: rz_bin.h:676
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int sp
Definition: z80asm.c:91
static int addr
Definition: z80asm.c:58