Rizin
unix-like reverse engineering framework and cli tools
rz_str_search.h File Reference
#include <rz_util/rz_str.h>
#include <rz_util/rz_assert.h>
#include <rz_util/rz_buf.h>
#include <rz_list.h>

Go to the source code of this file.

Classes

struct  RzDetectedString
 
struct  RzUtilStrScanOptions
 

Functions

RZ_API void rz_detected_string_free (RzDetectedString *str)
 
RZ_API int rz_scan_strings_raw (RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
 Look for strings in an RzBuffer. More...
 
RZ_API int rz_scan_strings (RZ_NONNULL RzBuffer *buf_to_scan, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
 Look for strings in an RzBuffer. More...
 

Function Documentation

◆ rz_detected_string_free()

RZ_API void rz_detected_string_free ( RzDetectedString str)

Free a RzDetectedString

Definition at line 73 of file str_search.c.

73  {
74  if (!str) {
75  return;
76  }
77  free(str->string);
78  free(str);
79 }
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130

References free(), and cmd_descs_generate::str.

Referenced by rz_scan_strings_raw(), and to_bin_string().

◆ rz_scan_strings()

RZ_API int rz_scan_strings ( RZ_NONNULL RzBuffer buf_to_scan,
RZ_NONNULL RzList list,
RZ_NONNULL const RzUtilStrScanOptions opt,
const ut64  from,
const ut64  to,
RzStrEnc  type 
)

Look for strings in an RzBuffer.

Parameters
buf_to_scanPointer to a RzBuffer to scan
listPointer to a list that will be populated with the found strings
optPointer to a RzUtilStrScanOptions that specifies search parameters
fromMinimum address to scan
toMaximum address to scan
typeType of strings to search
Returns
Number of strings found

Used to look for strings in a give RzBuffer. The function can also automatically detect string types.

Definition at line 525 of file str_search.c.

526  {
527  rz_return_val_if_fail(opt && list && buf_to_scan, -1);
528 
529  if (from == to) {
530  return 0;
531  } else if (from > to) {
532  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
533  return -1;
534  } else if (type == RZ_STRING_ENC_MUTF8 || type == RZ_STRING_ENC_BASE64) {
535  RZ_LOG_ERROR("rz_scan_strings: %s search type is not supported.\n", rz_str_enc_as_string(type));
536  return -1;
537  }
538 
539  ut64 len = to - from;
540  ut8 *buf = calloc(len, 1);
541  if (!buf) {
542  return -1;
543  }
544 
545  rz_buf_read_at(buf_to_scan, from, buf, len);
546 
547  int count = rz_scan_strings_raw(buf, list, opt, from, to, type);
548 
549  free(buf);
550  return count;
551 }
size_t len
Definition: 6502dis.c:15
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
static void list(RzEgg *egg)
Definition: rz-gg.c:52
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
int type
Definition: mipsasm.c:17
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API st64 rz_buf_read_at(RZ_NONNULL RzBuffer *b, ut64 addr, RZ_NONNULL RZ_OUT ut8 *buf, ut64 len)
Read len bytes of the buffer at the specified address.
Definition: buf.c:1136
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API const char * rz_str_enc_as_string(RzStrEnc enc)
Definition: str.c:44
@ RZ_STRING_ENC_BASE64
Definition: rz_str.h:27
@ RZ_STRING_ENC_MUTF8
Definition: rz_str.h:22
#define PFMT64x
Definition: rz_types.h:393
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125
RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
Look for strings in an RzBuffer.
Definition: str_search.c:362
ut64(WINAPI *w32_GetEnabledXStateFeatures)()

References calloc(), count, free(), from, len, list(), PFMT64x, rz_buf_read_at(), RZ_LOG_ERROR, rz_return_val_if_fail, rz_scan_strings_raw(), rz_str_enc_as_string(), RZ_STRING_ENC_BASE64, RZ_STRING_ENC_MUTF8, to, type, and ut64().

Referenced by meta_string_guess_add().

◆ rz_scan_strings_raw()

RZ_API int rz_scan_strings_raw ( RZ_NONNULL const ut8 buf,
RZ_NONNULL RzList list,
RZ_NONNULL const RzUtilStrScanOptions opt,
const ut64  from,
const ut64  to,
RzStrEnc  type 
)

Look for strings in an RzBuffer.

Parameters
bufPointer to a raw buffer to scan
listPointer to a list that will be populated with the found strings
optPointer to a RzUtilStrScanOptions that specifies search parameters
fromMinimum address to scan
toMaximum address to scan
typeType of strings to search
Returns
Number of strings found

Used to look for strings in a give RzBuffer. The function can also automatically detect string types.

Definition at line 362 of file str_search.c.

363  {
364  rz_return_val_if_fail(opt && list && buf, -1);
365 
366  if (from == to) {
367  return 0;
368  } else if (from > to) {
369  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
370  return -1;
371  }
372 
373  ut64 needle = 0;
374  int count = 0;
375  RzStrEnc str_type = type;
376 
377  ut8 *strbuf = calloc(opt->buf_size, 1);
378  if (!strbuf) {
379  return -1;
380  }
381 
382  needle = from;
383  const ut8 *ptr = NULL;
384  ut64 size = 0;
385  int skip_ibm037 = 0;
386  while (needle < to) {
387  ptr = buf + needle - from;
388  size = to - needle;
389  --skip_ibm037;
390  if (type == RZ_STRING_ENC_GUESS) {
391  if (can_be_utf32_le(ptr, size)) {
392  str_type = RZ_STRING_ENC_UTF32LE;
393  } else if (can_be_utf16_le(ptr, size)) {
394  str_type = RZ_STRING_ENC_UTF16LE;
395  } else if (can_be_utf32_be(ptr, size)) {
396  if (to - needle > 3 && can_be_utf32_le(ptr + 3, size - 3)) {
397  // The string can be either utf32-le or utf32-be
398  RzDetectedString *ds_le = process_one_string(buf, from, needle + 3, to, RZ_STRING_ENC_UTF32LE, false, opt, strbuf);
399  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF32BE, false, opt, strbuf);
400 
401  RzDetectedString *to_add = NULL;
402  RzDetectedString *to_delete = NULL;
403  ut64 needle_offset = 0;
404 
405  if (!ds_le && !ds_be) {
406  needle++;
407  continue;
408  } else if (!ds_be) {
409  to_add = ds_le;
410  needle_offset = ds_le->size + 3;
411  } else if (!ds_le) {
412  to_add = ds_be;
413  needle_offset = ds_be->size;
414  } else if (!opt->prefer_big_endian) {
415  to_add = ds_le;
416  to_delete = ds_be;
417  needle_offset = ds_le->size + 3;
418  } else {
419  to_add = ds_be;
420  to_delete = ds_le;
421  needle_offset = ds_le->size;
422  }
423 
424  count++;
425  needle += needle_offset;
426  rz_list_append(list, to_add);
427  rz_detected_string_free(to_delete);
428  continue;
429  }
430  str_type = RZ_STRING_ENC_UTF32BE;
431  } else if (can_be_utf16_be(ptr, size)) {
432  if (to - needle > 1 && can_be_utf16_le(ptr + 1, size - 1)) {
433  // The string can be either utf16-le or utf16-be
434  RzDetectedString *ds_le = process_one_string(buf, from, needle + 1, to, RZ_STRING_ENC_UTF16LE, false, opt, strbuf);
435  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF16BE, false, opt, strbuf);
436 
437  RzDetectedString *to_add = NULL;
438  RzDetectedString *to_delete = NULL;
439  ut64 needle_offset = 0;
440 
441  if (!ds_le && !ds_be) {
442  needle++;
443  continue;
444  } else if (!ds_be) {
445  to_add = ds_le;
446  needle_offset = ds_le->size + 1;
447  } else if (!ds_le) {
448  to_add = ds_be;
449  needle_offset = ds_be->size;
450  } else if (!opt->prefer_big_endian) {
451  to_add = ds_le;
452  to_delete = ds_be;
453  needle_offset = ds_le->size + 1;
454  } else {
455  to_add = ds_be;
456  to_delete = ds_le;
457  needle_offset = ds_le->size;
458  }
459 
460  count++;
461  needle += needle_offset;
462  rz_list_append(list, to_add);
463  rz_detected_string_free(to_delete);
464  continue;
465  }
466  str_type = RZ_STRING_ENC_UTF16BE;
467  } else if (can_be_ebcdic(ptr, size) && skip_ibm037 < 0) {
468  ut8 sz = RZ_MIN(size, 15);
469  RzRune runes[15] = { 0 };
470  int i = 0;
471  for (; i < sz; i++) {
472  rz_str_ibm037_to_unicode(ptr[i], &runes[i]);
473  if (!rz_rune_is_printable(runes[i])) {
474  break;
475  }
476  }
477  int s = score(runes, i);
478  if (s >= 36) {
479  str_type = RZ_STRING_ENC_IBM037;
480  } else {
481  skip_ibm037 = i + 1;
482  continue;
483  }
484  } else {
485  int rc = rz_utf8_decode(ptr, size, NULL);
486  if (!rc) {
487  needle++;
488  continue;
489  } else {
490  str_type = RZ_STRING_ENC_8BIT;
491  }
492  }
493  } else if (type == RZ_STRING_ENC_UTF8) {
494  str_type = RZ_STRING_ENC_8BIT; // initial assumption
495  }
496 
497  RzDetectedString *ds = process_one_string(buf, from, needle, to, str_type, false, opt, strbuf);
498  if (!ds) {
499  needle++;
500  continue;
501  }
502  if (str_type == RZ_STRING_ENC_IBM037) {
503  skip_ibm037 = 0;
504  }
505 
506  count++;
507  rz_list_append(list, ds);
508  needle += ds->size;
509  }
510  free(strbuf);
511  return count;
512 }
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
voidpf void uLong size
Definition: ioapi.h:138
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
static RzSocket * s
Definition: rtr.c:28
RZ_API int rz_str_ibm037_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ibm037 char into an unicode RzRune.
Definition: ebcdic.c:457
RzStrEnc
Definition: rz_str.h:19
@ RZ_STRING_ENC_UTF32LE
Definition: rz_str.h:24
@ RZ_STRING_ENC_8BIT
Definition: rz_str.h:20
@ RZ_STRING_ENC_UTF32BE
Definition: rz_str.h:26
@ RZ_STRING_ENC_UTF8
Definition: rz_str.h:21
@ RZ_STRING_ENC_GUESS
Definition: rz_str.h:33
@ RZ_STRING_ENC_UTF16LE
Definition: rz_str.h:23
@ RZ_STRING_ENC_UTF16BE
Definition: rz_str.h:25
@ RZ_STRING_ENC_IBM037
Definition: rz_str.h:28
#define RZ_MIN(x, y)
RZ_API int rz_utf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf8.c:492
RZ_API bool rz_rune_is_printable(const RzRune c)
Returns true when the RzRune is a printable symbol.
Definition: utf8.c:606
ut32 RzRune
Definition: rz_utf8.h:13
static bool can_be_utf16_le(const ut8 *buf, ut64 size)
Definition: str_search.c:314
RZ_API void rz_detected_string_free(RzDetectedString *str)
Definition: str_search.c:73
static bool can_be_utf16_be(const ut8 *buf, ut64 size)
Definition: str_search.c:323
static RzDetectedString * process_one_string(const ut8 *buf, const ut64 from, ut64 needle, const ut64 to, RzStrEnc str_type, bool ascii_only, const RzUtilStrScanOptions *opt, ut8 *strbuf)
Definition: str_search.c:206
static st64 score(RzRune *buff, const int len)
Definition: str_search.c:49
static bool can_be_ebcdic(const ut8 *buf, ut64 size)
Definition: str_search.c:346
static bool can_be_utf32_be(const ut8 *buf, ut64 size)
Definition: str_search.c:339
static bool can_be_utf32_le(const ut8 *buf, ut64 size)
Definition: str_search.c:330
ut32 size
Size of buffer containing the string in bytes.
Definition: rz_str_search.h:19

References calloc(), can_be_ebcdic(), can_be_utf16_be(), can_be_utf16_le(), can_be_utf32_be(), can_be_utf32_le(), count, free(), from, i, list(), NULL, PFMT64x, process_one_string(), rz_detected_string_free(), rz_list_append(), RZ_LOG_ERROR, RZ_MIN, rz_return_val_if_fail, rz_rune_is_printable(), rz_str_ibm037_to_unicode(), RZ_STRING_ENC_8BIT, RZ_STRING_ENC_GUESS, RZ_STRING_ENC_IBM037, RZ_STRING_ENC_UTF16BE, RZ_STRING_ENC_UTF16LE, RZ_STRING_ENC_UTF32BE, RZ_STRING_ENC_UTF32LE, RZ_STRING_ENC_UTF8, rz_utf8_decode(), s, score(), RzDetectedString::size, to, type, and ut64().

Referenced by rz_scan_strings(), and string_scan_range().