Rizin
unix-like reverse engineering framework and cli tools
search.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2008-2016 pancake <pancake@nopcode.org>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <rz_search.h>
5 #include <rz_list.h>
6 #include <ctype.h>
7 
8 // Experimental search engine (fails, because stops at first hit of every block read
9 #define USE_BMH 0
10 
11 RZ_LIB_VERSION(rz_search);
12 
13 typedef struct {
15  int len;
16  ut8 data[];
18 
21  if (!s) {
22  return NULL;
23  }
24  if (!rz_search_set_mode(s, mode)) {
25  free(s);
26  eprintf("Cannot init search for mode %d\n", mode);
27  return false;
28  }
29  s->inverse = false;
30  s->data = NULL;
31  s->user = NULL;
32  s->callback = NULL;
33  s->align = 0;
34  s->distance = 0;
35  s->contiguous = 0;
36  s->overlap = false;
37  s->pattern_size = 0;
38  s->string_max = 255;
39  s->string_min = 3;
40  s->hits = rz_list_newf(free);
41  s->maxhits = 0;
42  // TODO: review those mempool sizes. ensure never gets NULL
43  s->kws = rz_list_newf(free);
44  if (!s->kws) {
46  return NULL;
47  }
48  s->kws->free = (RzListFree)rz_search_keyword_free;
49  return s;
50 }
51 
53  if (!s) {
54  return NULL;
55  }
56  rz_list_free(s->hits);
57  rz_list_free(s->kws);
58  // rz_io_free(s->iob.io); this is supposed to be a weak reference
59  free(s->data);
60  free(s);
61  return NULL;
62 }
63 
65  if (max < min) {
66  return false;
67  }
68  s->string_min = min;
69  s->string_max = max;
70  return true;
71 }
72 
74  eprintf("TODO: import librz/core/cmd_search.c /m implementation into rsearch\n");
75  return false;
76 }
77 
79  s->update = NULL;
80  switch (mode) {
81  case RZ_SEARCH_KEYWORD: s->update = rz_search_mybinparse_update; break;
82  case RZ_SEARCH_REGEXP: s->update = rz_search_regexp_update; break;
83  case RZ_SEARCH_AES: s->update = rz_search_aes_update; break;
84  case RZ_SEARCH_PRIV_KEY: s->update = rz_search_privkey_update; break;
85  case RZ_SEARCH_STRING: s->update = rz_search_strings_update; break;
86  case RZ_SEARCH_DELTAKEY: s->update = rz_search_deltakey_update; break;
87  case RZ_SEARCH_MAGIC: s->update = rz_search_magic_update; break;
88  }
89  if (s->update || mode == RZ_SEARCH_PATTERN) {
90  s->mode = mode;
91  return true;
92  }
93  return false;
94 }
95 
98  RzSearchKeyword *kw;
99  rz_list_foreach (s->kws, iter, kw) {
100  kw->count = 0;
101  kw->last = 0;
102  }
103  return true;
104 }
105 
106 // Returns 2 if search.maxhits is reached, 0 on error, otherwise 1
108  if (s->align && (addr % s->align)) {
109  eprintf("0x%08" PFMT64x " unaligned\n", addr);
110  return 1;
111  }
112  if (!s->contiguous) {
113  if (kw->last && addr == kw->last) {
114  kw->count--;
115  kw->last = s->bckwrds ? addr : addr + kw->keyword_length;
116  eprintf("0x%08" PFMT64x " Sequential hit ignored.\n", addr);
117  return 1;
118  }
119  }
120  // kw->last is used by string search, the right endpoint of last match (forward search), to honor search.overlap
121  kw->last = s->bckwrds ? addr : addr + kw->keyword_length;
122 
123  if (s->callback) {
124  int ret = s->callback(kw, s->user, addr);
125  kw->count++;
126  s->nhits++;
127  // If callback returns 0 or larger than 1, forwards it; otherwise returns 2 if search.maxhits is reached
128  return !ret || ret > 1 ? ret : s->maxhits && s->nhits >= s->maxhits ? 2
129  : 1;
130  }
131  kw->count++;
132  s->nhits++;
134  if (hit) {
135  hit->kw = kw;
136  hit->addr = addr;
137  rz_list_append(s->hits, hit);
138  }
139  return s->maxhits && s->nhits >= s->maxhits ? 2 : 1;
140 }
141 
142 // TODO support search across block boundaries
143 // Supported search variants: backward, overlap
145  RzListIter *iter;
146  int longest = 0, i, j;
147  RzSearchKeyword *kw;
148  RzSearchLeftover *left;
149  const int old_nhits = s->nhits;
150  rz_list_foreach (s->kws, iter, kw) {
151  longest = RZ_MAX(longest, kw->keyword_length + 1);
152  }
153  if (!longest) {
154  return 0;
155  }
156  if (s->data) {
157  left = s->data;
158  if (left->end != from) {
159  left->len = 0;
160  }
161  } else {
162  left = malloc(sizeof(RzSearchLeftover) + (size_t)2 * (longest - 1));
163  if (!left) {
164  return -1;
165  }
166  s->data = left;
167  left->len = 0;
168  if (s->bckwrds) {
169  rz_list_foreach (s->kws, iter, kw) {
170  ut8 *i = kw->bin_keyword, *j = kw->bin_keyword + kw->keyword_length;
171  for (; i < j; i++) {
172  *i = -*i;
173  }
174  }
175  }
176  }
177  if (s->bckwrds) {
178  // XXX Change function signature from const ut8 * to ut8 *
179  ut8 *i = (ut8 *)buf, *j = i + len;
180  while (i < j) {
181  ut8 t = *i;
182  *i++ = *--j;
183  *j = t;
184  }
185  }
186 
187  ut64 len1 = left->len + RZ_MIN(longest - 1, len);
188  memcpy(left->data + left->len, buf, len1 - left->len);
189  rz_list_foreach (s->kws, iter, kw) {
190  ut8 *a = kw->bin_keyword;
191  i = s->overlap || !kw->count ? 0 : s->bckwrds ? kw->last - from < left->len ? from + left->len - kw->last : 0
192  : from - kw->last < left->len ? kw->last + left->len - from
193  : 0;
194  for (; i + kw->keyword_length < len1 && i < left->len; i++) {
195  if ((ut8)(left->data[i + 1] - left->data[i]) == a[0]) {
196  j = 1;
197  while (j < kw->keyword_length && (ut8)(left->data[i + j + 1] - left->data[i + j]) == a[j]) {
198  j++;
199  }
200  if (j == kw->keyword_length) {
201  int t = rz_search_hit_new(s, kw, s->bckwrds ? from - kw->keyword_length - 1 - i + left->len : from + i - left->len);
202  kw->last += s->bckwrds ? 0 : 1;
203  if (!t) {
204  return -1;
205  }
206  if (t > 1) {
207  return s->nhits - old_nhits;
208  }
209  if (!s->overlap) {
210  i += kw->keyword_length;
211  }
212  }
213  }
214  }
215  i = s->overlap || !kw->count ? 0 : s->bckwrds ? from > kw->last ? from - kw->last : 0
216  : from < kw->last ? kw->last - from
217  : 0;
218  for (; i + kw->keyword_length < len; i++) {
219  if ((ut8)(buf[i + 1] - buf[i]) == a[0]) {
220  j = 1;
221  while (j < kw->keyword_length && (ut8)(buf[i + j + 1] - buf[i + j]) == a[j]) {
222  j++;
223  }
224  if (j == kw->keyword_length) {
225  int t = rz_search_hit_new(s, kw, s->bckwrds ? from - kw->keyword_length - 1 - i : from + i);
226  kw->last += s->bckwrds ? 0 : 1;
227  if (!t) {
228  return -1;
229  }
230  if (t > 1) {
231  return s->nhits - old_nhits;
232  }
233  if (!s->overlap) {
234  i += kw->keyword_length;
235  }
236  }
237  }
238  }
239  }
240  if (len < longest - 1) {
241  if (len1 < longest) {
242  left->len = len1;
243  } else {
244  left->len = longest - 1;
245  memmove(left->data, left->data + len1 - longest + 1, longest - 1);
246  }
247  } else {
248  left->len = longest - 1;
249  memcpy(left->data, buf + len - longest + 1, longest - 1);
250  }
251  left->end = s->bckwrds ? from - len : from + len;
252 
253  return s->nhits - old_nhits;
254 }
255 
256 #if 0
257 // Boyer-Moore-Horspool pattern matching
258 // Supported search variants: icase, overlap
259 static int rz_search_horspool(RzSearch *s, RzSearchKeyword *kw, ut64 from, const ut8 *buf, int len) {
260  ut64 bad_char_shift[UT8_MAX + 1];
261  int i, j, m = kw->keyword_length - 1, count = 0;
262  ut8 ch;
263 
264  for (i = 0; i < RZ_ARRAY_SIZE (bad_char_shift); i++) {
265  bad_char_shift[i] = kw->keyword_length;
266  }
267  for (i = 0; i < m; i++) {
268  ch = kw->bin_keyword[i];
269  bad_char_shift[kw->icase ? tolower (ch) : ch] = m - i;
270  }
271 
272  for (i = 0; i + m < len; ) {
273  next:
274  for (j = m; ; j--) {
275  ut8 a = buf[i + j], b = kw->bin_keyword[j];
276  if (kw->icase) {
277  a = tolower (a);
278  b = tolower (b);
279  }
280  if (a != b) break;
281  if (i == 0) {
282  if (!rz_search_hit_new (s, kw, from + i)) {
283  return -1;
284  }
285  kw->count++;
286  count++;
287  if (!s->overlap) {
288  i += kw->keyword_length;
289  goto next;
290  }
291  }
292  }
293  ch = buf[i + m];
294  i += bad_char_shift[kw->icase ? tolower (ch) : ch];
295  }
296 
297  return false;
298 }
299 #endif
300 
301 static bool brute_force_match(RzSearch *s, RzSearchKeyword *kw, const ut8 *buf, int i) {
302  int j = 0;
303  if (s->distance) { // slow path, more work in the loop
304  int dist = 0;
305  if (kw->binmask_length > 0) {
306  for (; j < kw->keyword_length; j++) {
307  int k = j % kw->binmask_length;
308  ut8 a = buf[i + j], b = kw->bin_keyword[j];
309  if (kw->icase) {
310  a = tolower(a);
311  b = tolower(b);
312  }
313  if ((a & kw->bin_binmask[k]) != (b & kw->bin_binmask[k])) {
314  dist++;
315  }
316  }
317  } else if (kw->icase) {
318  for (; j < kw->keyword_length; j++) {
319  if (tolower(buf[i + j]) != tolower(kw->bin_keyword[j])) {
320  dist++;
321  }
322  }
323  } else {
324  for (; j < kw->keyword_length; j++) {
325  if (buf[i + j] != kw->bin_keyword[j]) {
326  dist++;
327  }
328  }
329  }
330  return dist <= s->distance;
331  }
332 
333  if (kw->binmask_length > 0) {
334  for (; j < kw->keyword_length; j++) {
335  int k = j % kw->binmask_length;
336  ut8 a = buf[i + j], b = kw->bin_keyword[j];
337  if (kw->icase) {
338  a = tolower(a);
339  b = tolower(b);
340  }
341  if ((a & kw->bin_binmask[k]) != (b & kw->bin_binmask[k])) {
342  break;
343  }
344  }
345  } else if (kw->icase) {
346  while (j < kw->keyword_length &&
347  tolower(buf[i + j]) == tolower(kw->bin_keyword[j])) {
348  j++;
349  }
350  } else {
351  while (j < kw->keyword_length && buf[i + j] == kw->bin_keyword[j]) {
352  j++;
353  }
354  }
355  return j == kw->keyword_length;
356 }
357 
358 // Supported search variants: backward, binmask, icase, inverse, overlap
360  RzSearchKeyword *kw;
361  RzListIter *iter;
362  RzSearchLeftover *left;
363  int longest = 0, i;
364  const int old_nhits = s->nhits;
365 
366  rz_list_foreach (s->kws, iter, kw) {
367  longest = RZ_MAX(longest, kw->keyword_length);
368  }
369  if (!longest) {
370  return 0;
371  }
372  if (s->data) {
373  left = s->data;
374  if (left->end != from) {
375  left->len = 0;
376  }
377  } else {
378  left = malloc(sizeof(RzSearchLeftover) + (size_t)2 * (longest - 1));
379  if (!left) {
380  return -1;
381  }
382  s->data = left;
383  left->len = 0;
384  }
385  if (s->bckwrds) {
386  // XXX Change function signature from const ut8 * to ut8 *
387  ut8 *i = (ut8 *)buf, *j = i + len;
388  while (i < j) {
389  ut8 t = *i;
390  *i++ = *--j;
391  *j = t;
392  }
393  }
394 
395  ut64 len1 = left->len + RZ_MIN(longest - 1, len);
396  memcpy(left->data + left->len, buf, len1 - left->len);
397  rz_list_foreach (s->kws, iter, kw) {
398  i = s->overlap || !kw->count ? 0 : s->bckwrds ? kw->last - from < left->len ? from + left->len - kw->last : 0
399  : from - kw->last < left->len ? kw->last + left->len - from
400  : 0;
401  for (; i + kw->keyword_length <= len1 && i < left->len; i++) {
402  if (brute_force_match(s, kw, left->data, i) != s->inverse) {
403  int t = rz_search_hit_new(s, kw, s->bckwrds ? from - kw->keyword_length - i + left->len : from + i - left->len);
404  if (!t) {
405  return -1;
406  }
407  if (t > 1) {
408  return s->nhits - old_nhits;
409  }
410  if (!s->overlap) {
411  i += kw->keyword_length - 1;
412  }
413  }
414  }
415  i = s->overlap || !kw->count ? 0 : s->bckwrds ? from > kw->last ? from - kw->last : 0
416  : from < kw->last ? kw->last - from
417  : 0;
418  for (; i + kw->keyword_length <= len; i++) {
419  if (brute_force_match(s, kw, buf, i) != s->inverse) {
420  int t = rz_search_hit_new(s, kw, s->bckwrds ? from - kw->keyword_length - i : from + i);
421  if (!t) {
422  return -1;
423  }
424  if (t > 1) {
425  return s->nhits - old_nhits;
426  }
427  if (!s->overlap) {
428  i += kw->keyword_length - 1;
429  }
430  }
431  }
432  }
433  if (len < longest - 1) {
434  if (len1 < longest) {
435  left->len = len1;
436  } else {
437  left->len = longest - 1;
438  memmove(left->data, left->data + len1 - longest + 1, longest - 1);
439  }
440  } else {
441  left->len = longest - 1;
442  memcpy(left->data, buf + len - longest + 1, longest - 1);
443  }
444  left->end = s->bckwrds ? from - len : from + len;
445 
446  return s->nhits - old_nhits;
447 }
448 
450  if (dist >= RZ_SEARCH_DISTANCE_MAX) {
451  eprintf("Invalid distance\n");
452  s->distance = 0;
453  } else {
454  s->distance = (dist > 0) ? dist : 0;
455  }
456 }
457 
458 // deprecate? or standarize with ->align ??
460  s->pattern_size = size;
461 }
462 
463 RZ_API void rz_search_set_callback(RzSearch *s, RzSearchCallback(callback), void *user) {
464  s->callback = callback;
465  s->user = user;
466 }
467 
468 // backward search: from points to the right endpoint
469 // forward search: from points to the left endpoint
471  int ret = -1;
472  if (s->update) {
473  if (s->maxhits && s->nhits >= s->maxhits) {
474  return 0;
475  }
476  ret = s->update(s, from, buf, len);
477  } else {
478  eprintf("rz_search_update: No search method defined\n");
479  }
480  return ret;
481 }
482 
484  return rz_search_update(s, from, buf, len);
485 }
486 
487 static int listcb(RzSearchKeyword *k, void *user, ut64 addr) {
489  if (!hit) {
490  return 0;
491  }
492  hit->kw = k;
493  hit->addr = addr;
494  rz_list_append(user, hit);
495  return 1;
496 }
497 
499  RzList *ret = rz_list_new();
502  return ret;
503 }
504 
505 /* --- keywords --- */
507  if (!kw || !kw->keyword_length) {
508  return false;
509  }
510  kw->kwidx = s->n_kws++;
511  rz_list_append(s->kws, kw);
512  return true;
513 }
514 
515 // Reverse bin_keyword & bin_binmask for backward search
517  RzListIter *iter;
518  RzSearchKeyword *kw;
519  // Precondition: !kw->binmask_length || kw->keyword_length % kw->binmask_length == 0
520  rz_list_foreach (s->kws, iter, kw) {
521  ut8 *i = kw->bin_keyword, *j = kw->bin_keyword + kw->keyword_length;
522  while (i < j) {
523  ut8 t = *i;
524  *i++ = *--j;
525  *j = t;
526  }
527  i = kw->bin_binmask;
528  j = kw->bin_binmask + kw->binmask_length;
529  while (i < j) {
530  ut8 t = *i;
531  *i++ = *--j;
532  *j = t;
533  }
534  }
535 }
536 
538  s->nhits = 0;
539  if (!rz_search_set_mode(s, mode)) {
540  eprintf("Cannot init search for mode %d\n", mode);
541  }
542 }
543 
545  rz_list_purge(s->kws);
546  rz_list_purge(s->hits);
547  RZ_FREE(s->data);
548 }
size_t len
Definition: 6502dis.c:15
RZ_API int rz_search_aes_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: aes-find.c:43
lzma_index ** i
Definition: index.h:629
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
uint32_t ut32
const char * k
Definition: dsignal.c:11
int max
Definition: enough.c:225
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void uLong size
Definition: ioapi.h:138
const char int mode
Definition: ioapi.h:137
voidpf void * buf
Definition: ioapi.h:138
RZ_API void rz_search_keyword_free(RzSearchKeyword *kw)
Definition: keyword.c:49
uint8_t ut8
Definition: lh5801.h:11
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
static int hit(RzSearchKeyword *kw, void *user, ut64 addr)
Definition: rz-find.c:58
RZ_API RZ_OWN RzList * rz_list_newf(RzListFree f)
Returns a new initialized RzList pointer and sets the free method.
Definition: list.c:248
RZ_API RZ_OWN RzList * rz_list_new(void)
Returns a new initialized RzList pointer (free method is not initialized)
Definition: list.c:235
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137
RZ_API void rz_list_purge(RZ_NONNULL RzList *list)
Empties the list without freeing the list pointer.
Definition: list.c:120
void * malloc(size_t size)
Definition: malloc.c:123
RZ_API int rz_search_privkey_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: privkey-find.c:71
#define min(a, b)
Definition: qsort.h:83
RZ_API int rz_search_regexp_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: regexp.c:8
#define eprintf(x, y...)
Definition: rlcc.c:7
static RzSocket * s
Definition: rtr.c:28
void(* RzListFree)(void *ptr)
Definition: rz_list.h:11
@ RZ_SEARCH_REGEXP
Definition: rz_search.h:18
@ RZ_SEARCH_STRING
Definition: rz_search.h:20
@ RZ_SEARCH_AES
Definition: rz_search.h:22
@ RZ_SEARCH_PATTERN
Definition: rz_search.h:19
@ RZ_SEARCH_MAGIC
Definition: rz_search.h:25
@ RZ_SEARCH_DELTAKEY
Definition: rz_search.h:24
@ RZ_SEARCH_KEYWORD
Definition: rz_search.h:17
@ RZ_SEARCH_PRIV_KEY
Definition: rz_search.h:23
int(* RzSearchCallback)(RzSearchKeyword *kw, void *user, ut64 where)
Definition: rz_search.h:52
#define RZ_SEARCH_DISTANCE_MAX
Definition: rz_search.h:29
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_ARRAY_SIZE(x)
Definition: rz_types.h:300
#define RZ_FREE(x)
Definition: rz_types.h:369
#define PFMT64x
Definition: rz_types.h:393
#define RZ_MIN(x, y)
#define RZ_MAX(x, y)
#define UT8_MAX
#define tolower(c)
Definition: safe-ctype.h:149
RZ_API void rz_search_string_prepare_backward(RzSearch *s)
Definition: search.c:516
RZ_API RzList * rz_search_find(RzSearch *s, ut64 addr, const ut8 *buf, int len)
Definition: search.c:498
RZ_API int rz_search_update(RzSearch *s, ut64 from, const ut8 *buf, long len)
Definition: search.c:470
RZ_API void rz_search_set_callback(RzSearch *s, RzSearchCallback(callback), void *user)
Definition: search.c:463
RZ_LIB_VERSION(rz_search)
RZ_API int rz_search_begin(RzSearch *s)
Definition: search.c:96
static bool brute_force_match(RzSearch *s, RzSearchKeyword *kw, const ut8 *buf, int i)
Definition: search.c:301
static int listcb(RzSearchKeyword *k, void *user, ut64 addr)
Definition: search.c:487
RZ_API int rz_search_set_mode(RzSearch *s, int mode)
Definition: search.c:78
RZ_API void rz_search_kw_reset(RzSearch *s)
Definition: search.c:544
RZ_API int rz_search_update_i(RzSearch *s, ut64 from, const ut8 *buf, long len)
Definition: search.c:483
RZ_API RzSearch * rz_search_new(int mode)
Definition: search.c:19
RZ_API int rz_search_hit_new(RzSearch *s, RzSearchKeyword *kw, ut64 addr)
Definition: search.c:107
RZ_API int rz_search_deltakey_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: search.c:144
RZ_API int rz_search_mybinparse_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: search.c:359
RZ_API int rz_search_set_string_limits(RzSearch *s, ut32 min, ut32 max)
Definition: search.c:64
RZ_API RzSearch * rz_search_free(RzSearch *s)
Definition: search.c:52
RZ_API void rz_search_reset(RzSearch *s, int mode)
Definition: search.c:537
RZ_API void rz_search_pattern_size(RzSearch *s, int size)
Definition: search.c:459
RZ_API int rz_search_kw_add(RzSearch *s, RzSearchKeyword *kw)
Definition: search.c:506
RZ_API void rz_search_set_distance(RzSearch *s, int dist)
Definition: search.c:449
RZ_API int rz_search_magic_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: search.c:73
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
#define b(i)
Definition: sha256.c:42
#define a(i)
Definition: sha256.c:41
RZ_API int rz_search_strings_update(RzSearch *s, ut64 from, const ut8 *buf, int len)
Definition: strings.c:62
ut8 data[]
Definition: search.c:16
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int addr
Definition: z80asm.c:58