Rizin
unix-like reverse engineering framework and cli tools
str_search.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2021 borzacchiello <lucaborza@gmail.com>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
5 #include <rz_util/rz_utf8.h>
6 #include <rz_util/rz_utf16.h>
7 #include <rz_util/rz_utf32.h>
8 #include <rz_util/rz_ebcdic.h>
9 
10 typedef enum {
15 
16 typedef struct {
17  int num_ascii;
19  int num_chars;
21 
22 // clang-format off
23 static const ut8 LATIN1_CLASS[256] = {
24  0,0,0,0,0,0,0,0, 0,1,1,0,0,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
25  1,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 2,2,2,2,2,2,2,2, 2,2,6,6,6,6,6,6,
26  6,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,6,6,6,6,6,
27  6,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, 4,4,4,6,6,6,6,0,
28 
29  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
30  1,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,
31  5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,6,
32  5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,6, 5,5,5,5,5,5,5,5,
33 };
34 static const ut8 LATIN1[49] = {
35  0,0,0,0,0,0,0,
36  0,1,6,6,6,1,3,
37  0,6,12,6,6,1,3,
38  0,6,6,18,9,1,3,
39  0,6,6,9,18,1,3,
40  0,1,1,1,1,1,1,
41  0,3,3,3,3,1,3,
42 };
43 // clang-format on
44 
45 static inline int compute_index(ut8 x, ut8 y) {
46  return (x * 7 + y);
47 }
48 
49 static st64 score(RzRune *buff, const int len) {
50  int score = 0;
51  for (RzRune *src = buff, *end = buff + len - 1; src < end; ++src) {
52  RzRune b1 = src[0], b2 = src[1];
54  if (b1 > 0x7f) {
55  score -= 6;
56  }
57 
58  ut8 i = compute_index(c1, c2);
59  assert(i < 49);
60  ut8 y = LATIN1[i];
61  if (y == 0) {
62  score += -100;
63  } else {
64  score += y;
65  }
66  }
67  return score;
68 }
69 
74  if (!str) {
75  return;
76  }
77  free(str->string);
78  free(str);
79 }
80 
81 static inline bool is_c_escape_sequence(char ch) {
82  return strchr("\b\v\f\n\r\t\a\033\\", ch);
83 }
84 
86  UTF8StringInfo res = {
87  .num_ascii = 0,
88  .num_ascii_extended = 0,
89  .num_chars = 0
90  };
91 
92  const ut8 *str_ptr = str;
93  const ut8 *str_end = str + size;
94  RzRune ch = 0;
95  while (str_ptr < str_end) {
96  int ch_bytes = rz_utf8_decode(str_ptr, str_end - str_ptr, &ch);
97  if (!ch_bytes) {
98  break;
99  }
100 
101  res.num_chars += 1;
102  if (ch < 0x80u) {
103  res.num_ascii += 1;
104  }
105  if (ch < 0x100u) {
106  res.num_ascii_extended += 1;
107  }
108 
109  str_ptr += ch_bytes;
110  }
111 
112  return res;
113 }
114 
116 
117  switch (str_type) {
118  case RZ_STRING_ENC_8BIT: {
119  for (int i = 0; i < size; i++) {
120  char ch = str[i];
121  if (!is_c_escape_sequence(ch)) {
122  if (!IS_PRINTABLE(str[i])) {
123  return SKIP_STRING;
124  }
125  }
126  }
127  break;
128  }
129  case RZ_STRING_ENC_UTF8:
133  case RZ_STRING_ENC_UTF32BE: {
134  int num_blocks = 0;
135  int *block_list = rz_utf_block_list((const ut8 *)str, size - 1, NULL);
136  if (block_list) {
137  for (int i = 0; block_list[i] != -1; i++) {
138  num_blocks++;
139  }
140  }
141  free(block_list);
142 
144  if (str_info.num_ascii_extended == str_info.num_chars) {
145  return STRING_OK;
146  }
147 
148  int expected_ascii = num_blocks ? str_info.num_chars / num_blocks : 0;
149  if (opt->check_ascii_freq && str_info.num_ascii > expected_ascii) {
150  return RETRY_ASCII;
151  }
152 
153  if (num_blocks > opt->max_uni_blocks) {
154  return SKIP_STRING;
155  }
156  break;
157  }
158  default:
159  break;
160  }
161 
162  return STRING_OK;
163 }
164 
165 static ut64 adjust_offset(RzStrEnc str_type, const ut8 *buf, const ut64 str_start) {
166  switch (str_type) {
168  if (str_start > 1) {
169  const ut8 *p = buf + str_start - 2;
170  if (p[0] == 0xff && p[1] == 0xfe) {
171  return 2; // \xff\xfe
172  }
173  }
174  break;
176  if (str_start > 1) {
177  const ut8 *p = buf + str_start - 2;
178  if (p[0] == 0xfe && p[1] == 0xff) {
179  return 2; // \xfe\xff
180  }
181  }
182  break;
184  if (str_start > 3) {
185  const ut8 *p = buf + str_start - 4;
186  if (p[0] == 0xff && p[1] == 0xfe && !p[2] && !p[3]) {
187  return 4; // \xff\xfe\x00\x00
188  }
189  }
190  break;
192  if (str_start > 3) {
193  const ut8 *p = buf + str_start - 4;
194  if (!p[0] && !p[1] && p[2] == 0xfe && p[3] == 0xff) {
195  return 4; // \x00\x00\xfe\xff
196  }
197  }
198  break;
199  default:
200  break;
201  }
202 
203  return 0;
204 }
205 
206 static RzDetectedString *process_one_string(const ut8 *buf, const ut64 from, ut64 needle, const ut64 to,
207  RzStrEnc str_type, bool ascii_only, const RzUtilStrScanOptions *opt, ut8 *strbuf) {
208 
210 
211  ut64 str_addr = needle;
212  int rc = 0, i = 0, runes = 0;
213 
214  /* Eat a whole C string */
215  for (i = 0; i < opt->buf_size - 4 && needle < to; i += rc) {
216  RzRune r = 0;
217 
218  if (str_type == RZ_STRING_ENC_UTF32LE) {
219  rc = rz_utf32le_decode(buf + needle - from, to - needle, &r);
220  if (rc) {
221  rc = 4;
222  }
223  } else if (str_type == RZ_STRING_ENC_UTF16LE) {
224  rc = rz_utf16le_decode(buf + needle - from, to - needle, &r);
225  if (rc == 1) {
226  rc = 2;
227  }
228  } else if (str_type == RZ_STRING_ENC_UTF32BE) {
229  rc = rz_utf32be_decode(buf + needle - from, to - needle, &r);
230  if (rc) {
231  rc = 4;
232  }
233  } else if (str_type == RZ_STRING_ENC_UTF16BE) {
234  rc = rz_utf16be_decode(buf + needle - from, to - needle, &r);
235  if (rc == 1) {
236  rc = 2;
237  }
238  } else if (str_type == RZ_STRING_ENC_IBM037) {
239  rc = rz_str_ibm037_to_unicode(*(buf + needle - from), &r);
240  } else if (str_type == RZ_STRING_ENC_IBM290) {
241  rc = rz_str_ibm290_to_unicode(*(buf + needle - from), &r);
242  } else if (str_type == RZ_STRING_ENC_EBCDIC_ES) {
243  rc = rz_str_ebcdic_es_to_unicode(*(buf + needle - from), &r);
244  } else if (str_type == RZ_STRING_ENC_EBCDIC_UK) {
245  rc = rz_str_ebcdic_uk_to_unicode(*(buf + needle - from), &r);
246  } else if (str_type == RZ_STRING_ENC_EBCDIC_US) {
247  rc = rz_str_ebcdic_us_to_unicode(*(buf + needle - from), &r);
248  } else {
249  rc = rz_utf8_decode(buf + needle - from, to - needle, &r);
250  if (rc > 1) {
251  str_type = RZ_STRING_ENC_UTF8;
252  }
253  }
254 
255  /* Invalid sequence detected */
256  if (!rc || (ascii_only && r > 0x7f)) {
257  needle++;
258  break;
259  }
260 
261  needle += rc;
262 
263  if (rz_rune_is_printable(r) && r != '\\') {
264  if (str_type == RZ_STRING_ENC_UTF32LE || str_type == RZ_STRING_ENC_UTF32BE) {
265  if (r == 0xff) {
266  r = 0;
267  }
268  }
269  rc = rz_utf8_encode(strbuf + i, r);
270  runes++;
271  } else if (r && r < 0x100 && is_c_escape_sequence((char)r)) {
272  if ((i + 32) < opt->buf_size && r < 93) {
273  rc = rz_utf8_encode(strbuf + i, r);
274  } else {
275  // string too long
276  break;
277  }
278  runes++;
279  } else {
280  /* \0 marks the end of C-strings */
281  break;
282  }
283  }
284 
285  int strbuf_size = i;
286  if (runes >= opt->min_str_length) {
287  FalsePositiveResult false_positive_result = reduce_false_positives(opt, strbuf, strbuf_size, str_type);
288  if (false_positive_result == SKIP_STRING) {
289  return NULL;
290  } else if (false_positive_result == RETRY_ASCII) {
291  return process_one_string(buf, from, str_addr, to, str_type, true, opt, strbuf);
292  }
293 
295  if (!ds) {
296  return NULL;
297  }
298  ds->type = str_type;
299  ds->length = runes;
300  ds->size = needle - str_addr;
301  ds->addr = str_addr;
302 
303  ut64 off_adj = adjust_offset(str_type, buf, ds->addr - from);
304  ds->addr -= off_adj;
305  ds->size += off_adj;
306 
307  ds->string = rz_str_ndup((const char *)strbuf, strbuf_size);
308  return ds;
309  }
310 
311  return NULL;
312 }
313 
314 static inline bool can_be_utf16_le(const ut8 *buf, ut64 size) {
315  int rc = rz_utf8_decode(buf, size, NULL);
316  if (!rc || (size - rc) < 5) {
317  return false;
318  }
319  char *w = (char *)buf + rc;
320  return !w[0] && w[1] && !w[2] && w[3] && !w[4];
321 }
322 
323 static inline bool can_be_utf16_be(const ut8 *buf, ut64 size) {
324  if (size < 7) {
325  return false;
326  }
327  return !buf[0] && buf[1] && !buf[2] && buf[3] && !buf[4] && buf[5] && !buf[6];
328 }
329 
330 static inline bool can_be_utf32_le(const ut8 *buf, ut64 size) {
331  int rc = rz_utf8_decode(buf, size, NULL);
332  if (!rc || (size - rc) < 5) {
333  return false;
334  }
335  char *w = (char *)buf + rc;
336  return !w[0] && !w[1] && !w[2] && w[3] && !w[4];
337 }
338 
339 static inline bool can_be_utf32_be(const ut8 *buf, ut64 size) {
340  if (size < 7) {
341  return false;
342  }
343  return !buf[0] && !buf[1] && !buf[2] && buf[3] && !buf[4] && !buf[5] && !buf[6];
344 }
345 
346 static inline bool can_be_ebcdic(const ut8 *buf, ut64 size) {
347  return buf[0] < 0x20 || buf[0] > 0x3f;
348 }
349 
363  const ut64 from, const ut64 to, RzStrEnc type) {
364  rz_return_val_if_fail(opt && list && buf, -1);
365 
366  if (from == to) {
367  return 0;
368  } else if (from > to) {
369  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
370  return -1;
371  }
372 
373  ut64 needle = 0;
374  int count = 0;
375  RzStrEnc str_type = type;
376 
377  ut8 *strbuf = calloc(opt->buf_size, 1);
378  if (!strbuf) {
379  return -1;
380  }
381 
382  needle = from;
383  const ut8 *ptr = NULL;
384  ut64 size = 0;
385  int skip_ibm037 = 0;
386  while (needle < to) {
387  ptr = buf + needle - from;
388  size = to - needle;
389  --skip_ibm037;
390  if (type == RZ_STRING_ENC_GUESS) {
391  if (can_be_utf32_le(ptr, size)) {
392  str_type = RZ_STRING_ENC_UTF32LE;
393  } else if (can_be_utf16_le(ptr, size)) {
394  str_type = RZ_STRING_ENC_UTF16LE;
395  } else if (can_be_utf32_be(ptr, size)) {
396  if (to - needle > 3 && can_be_utf32_le(ptr + 3, size - 3)) {
397  // The string can be either utf32-le or utf32-be
398  RzDetectedString *ds_le = process_one_string(buf, from, needle + 3, to, RZ_STRING_ENC_UTF32LE, false, opt, strbuf);
399  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF32BE, false, opt, strbuf);
400 
401  RzDetectedString *to_add = NULL;
402  RzDetectedString *to_delete = NULL;
403  ut64 needle_offset = 0;
404 
405  if (!ds_le && !ds_be) {
406  needle++;
407  continue;
408  } else if (!ds_be) {
409  to_add = ds_le;
410  needle_offset = ds_le->size + 3;
411  } else if (!ds_le) {
412  to_add = ds_be;
413  needle_offset = ds_be->size;
414  } else if (!opt->prefer_big_endian) {
415  to_add = ds_le;
416  to_delete = ds_be;
417  needle_offset = ds_le->size + 3;
418  } else {
419  to_add = ds_be;
420  to_delete = ds_le;
421  needle_offset = ds_le->size;
422  }
423 
424  count++;
425  needle += needle_offset;
426  rz_list_append(list, to_add);
427  rz_detected_string_free(to_delete);
428  continue;
429  }
430  str_type = RZ_STRING_ENC_UTF32BE;
431  } else if (can_be_utf16_be(ptr, size)) {
432  if (to - needle > 1 && can_be_utf16_le(ptr + 1, size - 1)) {
433  // The string can be either utf16-le or utf16-be
434  RzDetectedString *ds_le = process_one_string(buf, from, needle + 1, to, RZ_STRING_ENC_UTF16LE, false, opt, strbuf);
435  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF16BE, false, opt, strbuf);
436 
437  RzDetectedString *to_add = NULL;
438  RzDetectedString *to_delete = NULL;
439  ut64 needle_offset = 0;
440 
441  if (!ds_le && !ds_be) {
442  needle++;
443  continue;
444  } else if (!ds_be) {
445  to_add = ds_le;
446  needle_offset = ds_le->size + 1;
447  } else if (!ds_le) {
448  to_add = ds_be;
449  needle_offset = ds_be->size;
450  } else if (!opt->prefer_big_endian) {
451  to_add = ds_le;
452  to_delete = ds_be;
453  needle_offset = ds_le->size + 1;
454  } else {
455  to_add = ds_be;
456  to_delete = ds_le;
457  needle_offset = ds_le->size;
458  }
459 
460  count++;
461  needle += needle_offset;
462  rz_list_append(list, to_add);
463  rz_detected_string_free(to_delete);
464  continue;
465  }
466  str_type = RZ_STRING_ENC_UTF16BE;
467  } else if (can_be_ebcdic(ptr, size) && skip_ibm037 < 0) {
468  ut8 sz = RZ_MIN(size, 15);
469  RzRune runes[15] = { 0 };
470  int i = 0;
471  for (; i < sz; i++) {
472  rz_str_ibm037_to_unicode(ptr[i], &runes[i]);
473  if (!rz_rune_is_printable(runes[i])) {
474  break;
475  }
476  }
477  int s = score(runes, i);
478  if (s >= 36) {
479  str_type = RZ_STRING_ENC_IBM037;
480  } else {
481  skip_ibm037 = i + 1;
482  continue;
483  }
484  } else {
485  int rc = rz_utf8_decode(ptr, size, NULL);
486  if (!rc) {
487  needle++;
488  continue;
489  } else {
490  str_type = RZ_STRING_ENC_8BIT;
491  }
492  }
493  } else if (type == RZ_STRING_ENC_UTF8) {
494  str_type = RZ_STRING_ENC_8BIT; // initial assumption
495  }
496 
497  RzDetectedString *ds = process_one_string(buf, from, needle, to, str_type, false, opt, strbuf);
498  if (!ds) {
499  needle++;
500  continue;
501  }
502  if (str_type == RZ_STRING_ENC_IBM037) {
503  skip_ibm037 = 0;
504  }
505 
506  count++;
507  rz_list_append(list, ds);
508  needle += ds->size;
509  }
510  free(strbuf);
511  return count;
512 }
526  const ut64 from, const ut64 to, RzStrEnc type) {
527  rz_return_val_if_fail(opt && list && buf_to_scan, -1);
528 
529  if (from == to) {
530  return 0;
531  } else if (from > to) {
532  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
533  return -1;
534  } else if (type == RZ_STRING_ENC_MUTF8 || type == RZ_STRING_ENC_BASE64) {
535  RZ_LOG_ERROR("rz_scan_strings: %s search type is not supported.\n", rz_str_enc_as_string(type));
536  return -1;
537  }
538 
539  ut64 len = to - from;
540  ut8 *buf = calloc(len, 1);
541  if (!buf) {
542  return -1;
543  }
544 
545  rz_buf_read_at(buf_to_scan, from, buf, len);
546 
547  int count = rz_scan_strings_raw(buf, list, opt, from, to, type);
548 
549  free(buf);
550  return count;
551 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
lzma_index * src
Definition: index.h:567
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c1
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c2
#define RZ_API
#define NULL
Definition: cris-opc.c:27
#define r
Definition: crypto_rc6.c:12
#define w
Definition: crypto_rc6.c:13
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void uLong size
Definition: ioapi.h:138
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
static void list(RzEgg *egg)
Definition: rz-gg.c:52
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
assert(limit<=UINT32_MAX/2)
int x
Definition: mipsasm.c:20
int type
Definition: mipsasm.c:17
static RzSocket * s
Definition: rtr.c:28
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API st64 rz_buf_read_at(RZ_NONNULL RzBuffer *b, ut64 addr, RZ_NONNULL RZ_OUT ut8 *buf, ut64 len)
Read len bytes of the buffer at the specified address.
Definition: buf.c:1136
RZ_API int rz_str_ibm037_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ibm037 char into an unicode RzRune.
Definition: ebcdic.c:457
RZ_API int rz_str_ibm290_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ibm290 char into an unicode RzRune.
Definition: ebcdic.c:516
RZ_API int rz_str_ebcdic_es_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_es char into an unicode RzRune.
Definition: ebcdic.c:650
RZ_API int rz_str_ebcdic_us_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_us char into an unicode RzRune.
Definition: ebcdic.c:607
RZ_API int rz_str_ebcdic_uk_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_uk char into an unicode RzRune.
Definition: ebcdic.c:563
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API char * rz_str_ndup(RZ_NULLABLE const char *ptr, int len)
Create new copy of string ptr limited to size len.
Definition: str.c:1006
RZ_API const char * rz_str_enc_as_string(RzStrEnc enc)
Definition: str.c:44
RzStrEnc
Definition: rz_str.h:19
@ RZ_STRING_ENC_UTF32LE
Definition: rz_str.h:24
@ RZ_STRING_ENC_8BIT
Definition: rz_str.h:20
@ RZ_STRING_ENC_BASE64
Definition: rz_str.h:27
@ RZ_STRING_ENC_EBCDIC_US
Definition: rz_str.h:31
@ RZ_STRING_ENC_MUTF8
Definition: rz_str.h:22
@ RZ_STRING_ENC_UTF32BE
Definition: rz_str.h:26
@ RZ_STRING_ENC_UTF8
Definition: rz_str.h:21
@ RZ_STRING_ENC_GUESS
Definition: rz_str.h:33
@ RZ_STRING_ENC_UTF16LE
Definition: rz_str.h:23
@ RZ_STRING_ENC_UTF16BE
Definition: rz_str.h:25
@ RZ_STRING_ENC_EBCDIC_UK
Definition: rz_str.h:30
@ RZ_STRING_ENC_IBM037
Definition: rz_str.h:28
@ RZ_STRING_ENC_IBM290
Definition: rz_str.h:29
@ RZ_STRING_ENC_EBCDIC_ES
Definition: rz_str.h:32
#define IS_PRINTABLE(x)
Definition: rz_str_util.h:10
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_NONNULL
Definition: rz_types.h:64
#define PFMT64x
Definition: rz_types.h:393
#define RZ_MIN(x, y)
#define st64
Definition: rz_types_base.h:10
RZ_API int rz_utf16le_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf16.c:36
RZ_API int rz_utf16be_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf16.c:41
RZ_API int rz_utf32le_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf32.c:39
RZ_API int rz_utf32be_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf32.c:44
RZ_API int * rz_utf_block_list(const ut8 *str, int len, int **freq_list)
Definition: utf8.c:754
RZ_API int rz_utf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf8.c:492
RZ_API bool rz_rune_is_printable(const RzRune c)
Returns true when the RzRune is a printable symbol.
Definition: utf8.c:606
ut32 RzRune
Definition: rz_utf8.h:13
RZ_API int rz_utf8_encode(ut8 *ptr, const RzRune ch)
Definition: utf8.c:535
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125
static const ut8 LATIN1[49]
Definition: str_search.c:34
RZ_API int rz_scan_strings(RZ_NONNULL RzBuffer *buf_to_scan, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
Look for strings in an RzBuffer.
Definition: str_search.c:525
static int compute_index(ut8 x, ut8 y)
Definition: str_search.c:45
static UTF8StringInfo calculate_utf8_string_info(ut8 *str, int size)
Definition: str_search.c:85
static const ut8 LATIN1_CLASS[256]
Definition: str_search.c:23
static bool can_be_utf16_le(const ut8 *buf, ut64 size)
Definition: str_search.c:314
RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
Look for strings in an RzBuffer.
Definition: str_search.c:362
RZ_API void rz_detected_string_free(RzDetectedString *str)
Definition: str_search.c:73
static bool can_be_utf16_be(const ut8 *buf, ut64 size)
Definition: str_search.c:323
static RzDetectedString * process_one_string(const ut8 *buf, const ut64 from, ut64 needle, const ut64 to, RzStrEnc str_type, bool ascii_only, const RzUtilStrScanOptions *opt, ut8 *strbuf)
Definition: str_search.c:206
static FalsePositiveResult reduce_false_positives(const RzUtilStrScanOptions *opt, ut8 *str, int size, RzStrEnc str_type)
Definition: str_search.c:115
static st64 score(RzRune *buff, const int len)
Definition: str_search.c:49
FalsePositiveResult
Definition: str_search.c:10
@ RETRY_ASCII
Definition: str_search.c:12
@ SKIP_STRING
Definition: str_search.c:11
@ STRING_OK
Definition: str_search.c:13
static bool is_c_escape_sequence(char ch)
Definition: str_search.c:81
static ut64 adjust_offset(RzStrEnc str_type, const ut8 *buf, const ut64 str_start)
Definition: str_search.c:165
static bool can_be_ebcdic(const ut8 *buf, ut64 size)
Definition: str_search.c:346
static bool can_be_utf32_be(const ut8 *buf, ut64 size)
Definition: str_search.c:339
static bool can_be_utf32_le(const ut8 *buf, ut64 size)
Definition: str_search.c:330
RzStrEnc type
String type.
Definition: rz_str_search.h:21
ut32 length
Length of string in chars.
Definition: rz_str_search.h:20
char * string
Pointer to the string.
Definition: rz_str_search.h:17
ut64 addr
Address of the string in the RzBuffer.
Definition: rz_str_search.h:18
ut32 size
Size of buffer containing the string in bytes.
Definition: rz_str_search.h:19
size_t min_str_length
Minimum string length.
Definition: rz_str_search.h:30
size_t max_uni_blocks
Maximum number of unicode blocks.
Definition: rz_str_search.h:29
bool check_ascii_freq
If true, perform check on ASCII frequencies when looking for false positives.
Definition: rz_str_search.h:32
size_t buf_size
Maximum size of a detected string.
Definition: rz_str_search.h:28
int num_ascii_extended
Definition: str_search.c:18
ut64(WINAPI *w32_GetEnabledXStateFeatures)()