Rizin
unix-like reverse engineering framework and cli tools
str_search.c File Reference

Go to the source code of this file.

Classes

struct  UTF8StringInfo
 

Enumerations

enum  FalsePositiveResult { SKIP_STRING , RETRY_ASCII , STRING_OK }
 

Functions

static int compute_index (ut8 x, ut8 y)
 
static st64 score (RzRune *buff, const int len)
 
RZ_API void rz_detected_string_free (RzDetectedString *str)
 
static bool is_c_escape_sequence (char ch)
 
static UTF8StringInfo calculate_utf8_string_info (ut8 *str, int size)
 
static FalsePositiveResult reduce_false_positives (const RzUtilStrScanOptions *opt, ut8 *str, int size, RzStrEnc str_type)
 
static ut64 adjust_offset (RzStrEnc str_type, const ut8 *buf, const ut64 str_start)
 
static RzDetectedStringprocess_one_string (const ut8 *buf, const ut64 from, ut64 needle, const ut64 to, RzStrEnc str_type, bool ascii_only, const RzUtilStrScanOptions *opt, ut8 *strbuf)
 
static bool can_be_utf16_le (const ut8 *buf, ut64 size)
 
static bool can_be_utf16_be (const ut8 *buf, ut64 size)
 
static bool can_be_utf32_le (const ut8 *buf, ut64 size)
 
static bool can_be_utf32_be (const ut8 *buf, ut64 size)
 
static bool can_be_ebcdic (const ut8 *buf, ut64 size)
 
RZ_API int rz_scan_strings_raw (RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
 Look for strings in an RzBuffer. More...
 
RZ_API int rz_scan_strings (RZ_NONNULL RzBuffer *buf_to_scan, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
 Look for strings in an RzBuffer. More...
 

Variables

static const ut8 LATIN1_CLASS [256]
 
static const ut8 LATIN1 [49]
 

Enumeration Type Documentation

◆ FalsePositiveResult

Enumerator
SKIP_STRING 
RETRY_ASCII 
STRING_OK 

Definition at line 10 of file str_search.c.

10  {
13  STRING_OK,
FalsePositiveResult
Definition: str_search.c:10
@ RETRY_ASCII
Definition: str_search.c:12
@ SKIP_STRING
Definition: str_search.c:11
@ STRING_OK
Definition: str_search.c:13

Function Documentation

◆ adjust_offset()

static ut64 adjust_offset ( RzStrEnc  str_type,
const ut8 buf,
const ut64  str_start 
)
static

Definition at line 165 of file str_search.c.

165  {
166  switch (str_type) {
168  if (str_start > 1) {
169  const ut8 *p = buf + str_start - 2;
170  if (p[0] == 0xff && p[1] == 0xfe) {
171  return 2; // \xff\xfe
172  }
173  }
174  break;
176  if (str_start > 1) {
177  const ut8 *p = buf + str_start - 2;
178  if (p[0] == 0xfe && p[1] == 0xff) {
179  return 2; // \xfe\xff
180  }
181  }
182  break;
184  if (str_start > 3) {
185  const ut8 *p = buf + str_start - 4;
186  if (p[0] == 0xff && p[1] == 0xfe && !p[2] && !p[3]) {
187  return 4; // \xff\xfe\x00\x00
188  }
189  }
190  break;
192  if (str_start > 3) {
193  const ut8 *p = buf + str_start - 4;
194  if (!p[0] && !p[1] && p[2] == 0xfe && p[3] == 0xff) {
195  return 4; // \x00\x00\xfe\xff
196  }
197  }
198  break;
199  default:
200  break;
201  }
202 
203  return 0;
204 }
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
@ RZ_STRING_ENC_UTF32LE
Definition: rz_str.h:24
@ RZ_STRING_ENC_UTF32BE
Definition: rz_str.h:26
@ RZ_STRING_ENC_UTF16LE
Definition: rz_str.h:23
@ RZ_STRING_ENC_UTF16BE
Definition: rz_str.h:25

References p, RZ_STRING_ENC_UTF16BE, RZ_STRING_ENC_UTF16LE, RZ_STRING_ENC_UTF32BE, and RZ_STRING_ENC_UTF32LE.

Referenced by process_one_string().

◆ calculate_utf8_string_info()

static UTF8StringInfo calculate_utf8_string_info ( ut8 str,
int  size 
)
static

Definition at line 85 of file str_search.c.

85  {
86  UTF8StringInfo res = {
87  .num_ascii = 0,
88  .num_ascii_extended = 0,
89  .num_chars = 0
90  };
91 
92  const ut8 *str_ptr = str;
93  const ut8 *str_end = str + size;
94  RzRune ch = 0;
95  while (str_ptr < str_end) {
96  int ch_bytes = rz_utf8_decode(str_ptr, str_end - str_ptr, &ch);
97  if (!ch_bytes) {
98  break;
99  }
100 
101  res.num_chars += 1;
102  if (ch < 0x80u) {
103  res.num_ascii += 1;
104  }
105  if (ch < 0x100u) {
106  res.num_ascii_extended += 1;
107  }
108 
109  str_ptr += ch_bytes;
110  }
111 
112  return res;
113 }
voidpf void uLong size
Definition: ioapi.h:138
RZ_API int rz_utf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf8.c:492
ut32 RzRune
Definition: rz_utf8.h:13
int num_ascii_extended
Definition: str_search.c:18

References UTF8StringInfo::num_ascii, UTF8StringInfo::num_ascii_extended, UTF8StringInfo::num_chars, rz_utf8_decode(), and cmd_descs_generate::str.

Referenced by reduce_false_positives().

◆ can_be_ebcdic()

static bool can_be_ebcdic ( const ut8 buf,
ut64  size 
)
inlinestatic

Definition at line 346 of file str_search.c.

346  {
347  return buf[0] < 0x20 || buf[0] > 0x3f;
348 }

Referenced by rz_scan_strings_raw().

◆ can_be_utf16_be()

static bool can_be_utf16_be ( const ut8 buf,
ut64  size 
)
inlinestatic

Definition at line 323 of file str_search.c.

323  {
324  if (size < 7) {
325  return false;
326  }
327  return !buf[0] && buf[1] && !buf[2] && buf[3] && !buf[4] && buf[5] && !buf[6];
328 }

Referenced by rz_scan_strings_raw().

◆ can_be_utf16_le()

static bool can_be_utf16_le ( const ut8 buf,
ut64  size 
)
inlinestatic

Definition at line 314 of file str_search.c.

314  {
315  int rc = rz_utf8_decode(buf, size, NULL);
316  if (!rc || (size - rc) < 5) {
317  return false;
318  }
319  char *w = (char *)buf + rc;
320  return !w[0] && w[1] && !w[2] && w[3] && !w[4];
321 }
#define NULL
Definition: cris-opc.c:27
#define w
Definition: crypto_rc6.c:13

References NULL, rz_utf8_decode(), and w.

Referenced by rz_scan_strings_raw().

◆ can_be_utf32_be()

static bool can_be_utf32_be ( const ut8 buf,
ut64  size 
)
inlinestatic

Definition at line 339 of file str_search.c.

339  {
340  if (size < 7) {
341  return false;
342  }
343  return !buf[0] && !buf[1] && !buf[2] && buf[3] && !buf[4] && !buf[5] && !buf[6];
344 }

Referenced by rz_scan_strings_raw().

◆ can_be_utf32_le()

static bool can_be_utf32_le ( const ut8 buf,
ut64  size 
)
inlinestatic

Definition at line 330 of file str_search.c.

330  {
331  int rc = rz_utf8_decode(buf, size, NULL);
332  if (!rc || (size - rc) < 5) {
333  return false;
334  }
335  char *w = (char *)buf + rc;
336  return !w[0] && !w[1] && !w[2] && w[3] && !w[4];
337 }

References NULL, rz_utf8_decode(), and w.

Referenced by rz_scan_strings_raw().

◆ compute_index()

static int compute_index ( ut8  x,
ut8  y 
)
inlinestatic

Definition at line 45 of file str_search.c.

45  {
46  return (x * 7 + y);
47 }
int x
Definition: mipsasm.c:20

References x.

Referenced by score().

◆ is_c_escape_sequence()

static bool is_c_escape_sequence ( char  ch)
inlinestatic

Definition at line 81 of file str_search.c.

81  {
82  return strchr("\b\v\f\n\r\t\a\033\\", ch);
83 }

Referenced by process_one_string(), and reduce_false_positives().

◆ process_one_string()

static RzDetectedString* process_one_string ( const ut8 buf,
const ut64  from,
ut64  needle,
const ut64  to,
RzStrEnc  str_type,
bool  ascii_only,
const RzUtilStrScanOptions opt,
ut8 strbuf 
)
static

Definition at line 206 of file str_search.c.

207  {
208 
210 
211  ut64 str_addr = needle;
212  int rc = 0, i = 0, runes = 0;
213 
214  /* Eat a whole C string */
215  for (i = 0; i < opt->buf_size - 4 && needle < to; i += rc) {
216  RzRune r = 0;
217 
218  if (str_type == RZ_STRING_ENC_UTF32LE) {
219  rc = rz_utf32le_decode(buf + needle - from, to - needle, &r);
220  if (rc) {
221  rc = 4;
222  }
223  } else if (str_type == RZ_STRING_ENC_UTF16LE) {
224  rc = rz_utf16le_decode(buf + needle - from, to - needle, &r);
225  if (rc == 1) {
226  rc = 2;
227  }
228  } else if (str_type == RZ_STRING_ENC_UTF32BE) {
229  rc = rz_utf32be_decode(buf + needle - from, to - needle, &r);
230  if (rc) {
231  rc = 4;
232  }
233  } else if (str_type == RZ_STRING_ENC_UTF16BE) {
234  rc = rz_utf16be_decode(buf + needle - from, to - needle, &r);
235  if (rc == 1) {
236  rc = 2;
237  }
238  } else if (str_type == RZ_STRING_ENC_IBM037) {
239  rc = rz_str_ibm037_to_unicode(*(buf + needle - from), &r);
240  } else if (str_type == RZ_STRING_ENC_IBM290) {
241  rc = rz_str_ibm290_to_unicode(*(buf + needle - from), &r);
242  } else if (str_type == RZ_STRING_ENC_EBCDIC_ES) {
243  rc = rz_str_ebcdic_es_to_unicode(*(buf + needle - from), &r);
244  } else if (str_type == RZ_STRING_ENC_EBCDIC_UK) {
245  rc = rz_str_ebcdic_uk_to_unicode(*(buf + needle - from), &r);
246  } else if (str_type == RZ_STRING_ENC_EBCDIC_US) {
247  rc = rz_str_ebcdic_us_to_unicode(*(buf + needle - from), &r);
248  } else {
249  rc = rz_utf8_decode(buf + needle - from, to - needle, &r);
250  if (rc > 1) {
251  str_type = RZ_STRING_ENC_UTF8;
252  }
253  }
254 
255  /* Invalid sequence detected */
256  if (!rc || (ascii_only && r > 0x7f)) {
257  needle++;
258  break;
259  }
260 
261  needle += rc;
262 
263  if (rz_rune_is_printable(r) && r != '\\') {
264  if (str_type == RZ_STRING_ENC_UTF32LE || str_type == RZ_STRING_ENC_UTF32BE) {
265  if (r == 0xff) {
266  r = 0;
267  }
268  }
269  rc = rz_utf8_encode(strbuf + i, r);
270  runes++;
271  } else if (r && r < 0x100 && is_c_escape_sequence((char)r)) {
272  if ((i + 32) < opt->buf_size && r < 93) {
273  rc = rz_utf8_encode(strbuf + i, r);
274  } else {
275  // string too long
276  break;
277  }
278  runes++;
279  } else {
280  /* \0 marks the end of C-strings */
281  break;
282  }
283  }
284 
285  int strbuf_size = i;
286  if (runes >= opt->min_str_length) {
287  FalsePositiveResult false_positive_result = reduce_false_positives(opt, strbuf, strbuf_size, str_type);
288  if (false_positive_result == SKIP_STRING) {
289  return NULL;
290  } else if (false_positive_result == RETRY_ASCII) {
291  return process_one_string(buf, from, str_addr, to, str_type, true, opt, strbuf);
292  }
293 
295  if (!ds) {
296  return NULL;
297  }
298  ds->type = str_type;
299  ds->length = runes;
300  ds->size = needle - str_addr;
301  ds->addr = str_addr;
302 
303  ut64 off_adj = adjust_offset(str_type, buf, ds->addr - from);
304  ds->addr -= off_adj;
305  ds->size += off_adj;
306 
307  ds->string = rz_str_ndup((const char *)strbuf, strbuf_size);
308  return ds;
309  }
310 
311  return NULL;
312 }
lzma_index ** i
Definition: index.h:629
#define r
Definition: crypto_rc6.c:12
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API int rz_str_ibm037_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ibm037 char into an unicode RzRune.
Definition: ebcdic.c:457
RZ_API int rz_str_ibm290_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ibm290 char into an unicode RzRune.
Definition: ebcdic.c:516
RZ_API int rz_str_ebcdic_es_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_es char into an unicode RzRune.
Definition: ebcdic.c:650
RZ_API int rz_str_ebcdic_us_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_us char into an unicode RzRune.
Definition: ebcdic.c:607
RZ_API int rz_str_ebcdic_uk_to_unicode(const ut8 src, RZ_NONNULL RZ_OUT RzRune *dst)
Convert an ebcdic_uk char into an unicode RzRune.
Definition: ebcdic.c:563
RZ_API char * rz_str_ndup(RZ_NULLABLE const char *ptr, int len)
Create new copy of string ptr limited to size len.
Definition: str.c:1006
@ RZ_STRING_ENC_EBCDIC_US
Definition: rz_str.h:31
@ RZ_STRING_ENC_UTF8
Definition: rz_str.h:21
@ RZ_STRING_ENC_GUESS
Definition: rz_str.h:33
@ RZ_STRING_ENC_EBCDIC_UK
Definition: rz_str.h:30
@ RZ_STRING_ENC_IBM037
Definition: rz_str.h:28
@ RZ_STRING_ENC_IBM290
Definition: rz_str.h:29
@ RZ_STRING_ENC_EBCDIC_ES
Definition: rz_str.h:32
#define RZ_NEW0(x)
Definition: rz_types.h:284
RZ_API int rz_utf16le_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf16.c:36
RZ_API int rz_utf16be_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf16.c:41
RZ_API int rz_utf32le_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf32.c:39
RZ_API int rz_utf32be_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf32.c:44
RZ_API bool rz_rune_is_printable(const RzRune c)
Returns true when the RzRune is a printable symbol.
Definition: utf8.c:606
RZ_API int rz_utf8_encode(ut8 *ptr, const RzRune ch)
Definition: utf8.c:535
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125
static RzDetectedString * process_one_string(const ut8 *buf, const ut64 from, ut64 needle, const ut64 to, RzStrEnc str_type, bool ascii_only, const RzUtilStrScanOptions *opt, ut8 *strbuf)
Definition: str_search.c:206
static FalsePositiveResult reduce_false_positives(const RzUtilStrScanOptions *opt, ut8 *str, int size, RzStrEnc str_type)
Definition: str_search.c:115
static bool is_c_escape_sequence(char ch)
Definition: str_search.c:81
static ut64 adjust_offset(RzStrEnc str_type, const ut8 *buf, const ut64 str_start)
Definition: str_search.c:165
RzStrEnc type
String type.
Definition: rz_str_search.h:21
ut32 length
Length of string in chars.
Definition: rz_str_search.h:20
char * string
Pointer to the string.
Definition: rz_str_search.h:17
ut64 addr
Address of the string in the RzBuffer.
Definition: rz_str_search.h:18
ut32 size
Size of buffer containing the string in bytes.
Definition: rz_str_search.h:19
size_t min_str_length
Minimum string length.
Definition: rz_str_search.h:30
size_t buf_size
Maximum size of a detected string.
Definition: rz_str_search.h:28
ut64(WINAPI *w32_GetEnabledXStateFeatures)()

References RzDetectedString::addr, adjust_offset(), RzUtilStrScanOptions::buf_size, from, i, is_c_escape_sequence(), RzDetectedString::length, RzUtilStrScanOptions::min_str_length, NULL, r, reduce_false_positives(), RETRY_ASCII, RZ_NEW0, rz_return_val_if_fail, rz_rune_is_printable(), rz_str_ebcdic_es_to_unicode(), rz_str_ebcdic_uk_to_unicode(), rz_str_ebcdic_us_to_unicode(), rz_str_ibm037_to_unicode(), rz_str_ibm290_to_unicode(), rz_str_ndup(), RZ_STRING_ENC_EBCDIC_ES, RZ_STRING_ENC_EBCDIC_UK, RZ_STRING_ENC_EBCDIC_US, RZ_STRING_ENC_GUESS, RZ_STRING_ENC_IBM037, RZ_STRING_ENC_IBM290, RZ_STRING_ENC_UTF16BE, RZ_STRING_ENC_UTF16LE, RZ_STRING_ENC_UTF32BE, RZ_STRING_ENC_UTF32LE, RZ_STRING_ENC_UTF8, rz_utf16be_decode(), rz_utf16le_decode(), rz_utf32be_decode(), rz_utf32le_decode(), rz_utf8_decode(), rz_utf8_encode(), RzDetectedString::size, SKIP_STRING, RzDetectedString::string, to, RzDetectedString::type, and ut64().

Referenced by rz_scan_strings_raw().

◆ reduce_false_positives()

static FalsePositiveResult reduce_false_positives ( const RzUtilStrScanOptions opt,
ut8 str,
int  size,
RzStrEnc  str_type 
)
static

Definition at line 115 of file str_search.c.

115  {
116 
117  switch (str_type) {
118  case RZ_STRING_ENC_8BIT: {
119  for (int i = 0; i < size; i++) {
120  char ch = str[i];
121  if (!is_c_escape_sequence(ch)) {
122  if (!IS_PRINTABLE(str[i])) {
123  return SKIP_STRING;
124  }
125  }
126  }
127  break;
128  }
129  case RZ_STRING_ENC_UTF8:
133  case RZ_STRING_ENC_UTF32BE: {
134  int num_blocks = 0;
135  int *block_list = rz_utf_block_list((const ut8 *)str, size - 1, NULL);
136  if (block_list) {
137  for (int i = 0; block_list[i] != -1; i++) {
138  num_blocks++;
139  }
140  }
141  free(block_list);
142 
144  if (str_info.num_ascii_extended == str_info.num_chars) {
145  return STRING_OK;
146  }
147 
148  int expected_ascii = num_blocks ? str_info.num_chars / num_blocks : 0;
149  if (opt->check_ascii_freq && str_info.num_ascii > expected_ascii) {
150  return RETRY_ASCII;
151  }
152 
153  if (num_blocks > opt->max_uni_blocks) {
154  return SKIP_STRING;
155  }
156  break;
157  }
158  default:
159  break;
160  }
161 
162  return STRING_OK;
163 }
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
@ RZ_STRING_ENC_8BIT
Definition: rz_str.h:20
#define IS_PRINTABLE(x)
Definition: rz_str_util.h:10
RZ_API int * rz_utf_block_list(const ut8 *str, int len, int **freq_list)
Definition: utf8.c:754
static UTF8StringInfo calculate_utf8_string_info(ut8 *str, int size)
Definition: str_search.c:85
size_t max_uni_blocks
Maximum number of unicode blocks.
Definition: rz_str_search.h:29
bool check_ascii_freq
If true, perform check on ASCII frequencies when looking for false positives.
Definition: rz_str_search.h:32

References calculate_utf8_string_info(), RzUtilStrScanOptions::check_ascii_freq, free(), i, is_c_escape_sequence(), IS_PRINTABLE, RzUtilStrScanOptions::max_uni_blocks, NULL, UTF8StringInfo::num_ascii, UTF8StringInfo::num_ascii_extended, UTF8StringInfo::num_chars, RETRY_ASCII, RZ_STRING_ENC_8BIT, RZ_STRING_ENC_UTF16BE, RZ_STRING_ENC_UTF16LE, RZ_STRING_ENC_UTF32BE, RZ_STRING_ENC_UTF32LE, RZ_STRING_ENC_UTF8, rz_utf_block_list(), SKIP_STRING, cmd_descs_generate::str, and STRING_OK.

Referenced by process_one_string().

◆ rz_detected_string_free()

RZ_API void rz_detected_string_free ( RzDetectedString str)

Free a RzDetectedString

Definition at line 73 of file str_search.c.

73  {
74  if (!str) {
75  return;
76  }
77  free(str->string);
78  free(str);
79 }

References free(), and cmd_descs_generate::str.

Referenced by rz_scan_strings_raw(), and to_bin_string().

◆ rz_scan_strings()

RZ_API int rz_scan_strings ( RZ_NONNULL RzBuffer buf_to_scan,
RZ_NONNULL RzList list,
RZ_NONNULL const RzUtilStrScanOptions opt,
const ut64  from,
const ut64  to,
RzStrEnc  type 
)

Look for strings in an RzBuffer.

Parameters
buf_to_scanPointer to a RzBuffer to scan
listPointer to a list that will be populated with the found strings
optPointer to a RzUtilStrScanOptions that specifies search parameters
fromMinimum address to scan
toMaximum address to scan
typeType of strings to search
Returns
Number of strings found

Used to look for strings in a give RzBuffer. The function can also automatically detect string types.

Definition at line 525 of file str_search.c.

526  {
527  rz_return_val_if_fail(opt && list && buf_to_scan, -1);
528 
529  if (from == to) {
530  return 0;
531  } else if (from > to) {
532  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
533  return -1;
534  } else if (type == RZ_STRING_ENC_MUTF8 || type == RZ_STRING_ENC_BASE64) {
535  RZ_LOG_ERROR("rz_scan_strings: %s search type is not supported.\n", rz_str_enc_as_string(type));
536  return -1;
537  }
538 
539  ut64 len = to - from;
540  ut8 *buf = calloc(len, 1);
541  if (!buf) {
542  return -1;
543  }
544 
545  rz_buf_read_at(buf_to_scan, from, buf, len);
546 
547  int count = rz_scan_strings_raw(buf, list, opt, from, to, type);
548 
549  free(buf);
550  return count;
551 }
size_t len
Definition: 6502dis.c:15
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static void list(RzEgg *egg)
Definition: rz-gg.c:52
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
int type
Definition: mipsasm.c:17
RZ_API st64 rz_buf_read_at(RZ_NONNULL RzBuffer *b, ut64 addr, RZ_NONNULL RZ_OUT ut8 *buf, ut64 len)
Read len bytes of the buffer at the specified address.
Definition: buf.c:1136
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API const char * rz_str_enc_as_string(RzStrEnc enc)
Definition: str.c:44
@ RZ_STRING_ENC_BASE64
Definition: rz_str.h:27
@ RZ_STRING_ENC_MUTF8
Definition: rz_str.h:22
#define PFMT64x
Definition: rz_types.h:393
RZ_API int rz_scan_strings_raw(RZ_NONNULL const ut8 *buf, RZ_NONNULL RzList *list, RZ_NONNULL const RzUtilStrScanOptions *opt, const ut64 from, const ut64 to, RzStrEnc type)
Look for strings in an RzBuffer.
Definition: str_search.c:362

References calloc(), count, free(), from, len, list(), PFMT64x, rz_buf_read_at(), RZ_LOG_ERROR, rz_return_val_if_fail, rz_scan_strings_raw(), rz_str_enc_as_string(), RZ_STRING_ENC_BASE64, RZ_STRING_ENC_MUTF8, to, type, and ut64().

Referenced by meta_string_guess_add().

◆ rz_scan_strings_raw()

RZ_API int rz_scan_strings_raw ( RZ_NONNULL const ut8 buf,
RZ_NONNULL RzList list,
RZ_NONNULL const RzUtilStrScanOptions opt,
const ut64  from,
const ut64  to,
RzStrEnc  type 
)

Look for strings in an RzBuffer.

Parameters
bufPointer to a raw buffer to scan
listPointer to a list that will be populated with the found strings
optPointer to a RzUtilStrScanOptions that specifies search parameters
fromMinimum address to scan
toMaximum address to scan
typeType of strings to search
Returns
Number of strings found

Used to look for strings in a give RzBuffer. The function can also automatically detect string types.

Definition at line 362 of file str_search.c.

363  {
364  rz_return_val_if_fail(opt && list && buf, -1);
365 
366  if (from == to) {
367  return 0;
368  } else if (from > to) {
369  RZ_LOG_ERROR("rz_scan_strings: Invalid range to find strings 0x%" PFMT64x " .. 0x%" PFMT64x "\n", from, to);
370  return -1;
371  }
372 
373  ut64 needle = 0;
374  int count = 0;
375  RzStrEnc str_type = type;
376 
377  ut8 *strbuf = calloc(opt->buf_size, 1);
378  if (!strbuf) {
379  return -1;
380  }
381 
382  needle = from;
383  const ut8 *ptr = NULL;
384  ut64 size = 0;
385  int skip_ibm037 = 0;
386  while (needle < to) {
387  ptr = buf + needle - from;
388  size = to - needle;
389  --skip_ibm037;
390  if (type == RZ_STRING_ENC_GUESS) {
391  if (can_be_utf32_le(ptr, size)) {
392  str_type = RZ_STRING_ENC_UTF32LE;
393  } else if (can_be_utf16_le(ptr, size)) {
394  str_type = RZ_STRING_ENC_UTF16LE;
395  } else if (can_be_utf32_be(ptr, size)) {
396  if (to - needle > 3 && can_be_utf32_le(ptr + 3, size - 3)) {
397  // The string can be either utf32-le or utf32-be
398  RzDetectedString *ds_le = process_one_string(buf, from, needle + 3, to, RZ_STRING_ENC_UTF32LE, false, opt, strbuf);
399  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF32BE, false, opt, strbuf);
400 
401  RzDetectedString *to_add = NULL;
402  RzDetectedString *to_delete = NULL;
403  ut64 needle_offset = 0;
404 
405  if (!ds_le && !ds_be) {
406  needle++;
407  continue;
408  } else if (!ds_be) {
409  to_add = ds_le;
410  needle_offset = ds_le->size + 3;
411  } else if (!ds_le) {
412  to_add = ds_be;
413  needle_offset = ds_be->size;
414  } else if (!opt->prefer_big_endian) {
415  to_add = ds_le;
416  to_delete = ds_be;
417  needle_offset = ds_le->size + 3;
418  } else {
419  to_add = ds_be;
420  to_delete = ds_le;
421  needle_offset = ds_le->size;
422  }
423 
424  count++;
425  needle += needle_offset;
426  rz_list_append(list, to_add);
427  rz_detected_string_free(to_delete);
428  continue;
429  }
430  str_type = RZ_STRING_ENC_UTF32BE;
431  } else if (can_be_utf16_be(ptr, size)) {
432  if (to - needle > 1 && can_be_utf16_le(ptr + 1, size - 1)) {
433  // The string can be either utf16-le or utf16-be
434  RzDetectedString *ds_le = process_one_string(buf, from, needle + 1, to, RZ_STRING_ENC_UTF16LE, false, opt, strbuf);
435  RzDetectedString *ds_be = process_one_string(buf, from, needle, to, RZ_STRING_ENC_UTF16BE, false, opt, strbuf);
436 
437  RzDetectedString *to_add = NULL;
438  RzDetectedString *to_delete = NULL;
439  ut64 needle_offset = 0;
440 
441  if (!ds_le && !ds_be) {
442  needle++;
443  continue;
444  } else if (!ds_be) {
445  to_add = ds_le;
446  needle_offset = ds_le->size + 1;
447  } else if (!ds_le) {
448  to_add = ds_be;
449  needle_offset = ds_be->size;
450  } else if (!opt->prefer_big_endian) {
451  to_add = ds_le;
452  to_delete = ds_be;
453  needle_offset = ds_le->size + 1;
454  } else {
455  to_add = ds_be;
456  to_delete = ds_le;
457  needle_offset = ds_le->size;
458  }
459 
460  count++;
461  needle += needle_offset;
462  rz_list_append(list, to_add);
463  rz_detected_string_free(to_delete);
464  continue;
465  }
466  str_type = RZ_STRING_ENC_UTF16BE;
467  } else if (can_be_ebcdic(ptr, size) && skip_ibm037 < 0) {
468  ut8 sz = RZ_MIN(size, 15);
469  RzRune runes[15] = { 0 };
470  int i = 0;
471  for (; i < sz; i++) {
472  rz_str_ibm037_to_unicode(ptr[i], &runes[i]);
473  if (!rz_rune_is_printable(runes[i])) {
474  break;
475  }
476  }
477  int s = score(runes, i);
478  if (s >= 36) {
479  str_type = RZ_STRING_ENC_IBM037;
480  } else {
481  skip_ibm037 = i + 1;
482  continue;
483  }
484  } else {
485  int rc = rz_utf8_decode(ptr, size, NULL);
486  if (!rc) {
487  needle++;
488  continue;
489  } else {
490  str_type = RZ_STRING_ENC_8BIT;
491  }
492  }
493  } else if (type == RZ_STRING_ENC_UTF8) {
494  str_type = RZ_STRING_ENC_8BIT; // initial assumption
495  }
496 
497  RzDetectedString *ds = process_one_string(buf, from, needle, to, str_type, false, opt, strbuf);
498  if (!ds) {
499  needle++;
500  continue;
501  }
502  if (str_type == RZ_STRING_ENC_IBM037) {
503  skip_ibm037 = 0;
504  }
505 
506  count++;
507  rz_list_append(list, ds);
508  needle += ds->size;
509  }
510  free(strbuf);
511  return count;
512 }
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
static RzSocket * s
Definition: rtr.c:28
RzStrEnc
Definition: rz_str.h:19
#define RZ_MIN(x, y)
static bool can_be_utf16_le(const ut8 *buf, ut64 size)
Definition: str_search.c:314
RZ_API void rz_detected_string_free(RzDetectedString *str)
Definition: str_search.c:73
static bool can_be_utf16_be(const ut8 *buf, ut64 size)
Definition: str_search.c:323
static st64 score(RzRune *buff, const int len)
Definition: str_search.c:49
static bool can_be_ebcdic(const ut8 *buf, ut64 size)
Definition: str_search.c:346
static bool can_be_utf32_be(const ut8 *buf, ut64 size)
Definition: str_search.c:339
static bool can_be_utf32_le(const ut8 *buf, ut64 size)
Definition: str_search.c:330

References calloc(), can_be_ebcdic(), can_be_utf16_be(), can_be_utf16_le(), can_be_utf32_be(), can_be_utf32_le(), count, free(), from, i, list(), NULL, PFMT64x, process_one_string(), rz_detected_string_free(), rz_list_append(), RZ_LOG_ERROR, RZ_MIN, rz_return_val_if_fail, rz_rune_is_printable(), rz_str_ibm037_to_unicode(), RZ_STRING_ENC_8BIT, RZ_STRING_ENC_GUESS, RZ_STRING_ENC_IBM037, RZ_STRING_ENC_UTF16BE, RZ_STRING_ENC_UTF16LE, RZ_STRING_ENC_UTF32BE, RZ_STRING_ENC_UTF32LE, RZ_STRING_ENC_UTF8, rz_utf8_decode(), s, score(), RzDetectedString::size, to, type, and ut64().

Referenced by rz_scan_strings(), and string_scan_range().

◆ score()

static st64 score ( RzRune buff,
const int  len 
)
static

Definition at line 49 of file str_search.c.

49  {
50  int score = 0;
51  for (RzRune *src = buff, *end = buff + len - 1; src < end; ++src) {
52  RzRune b1 = src[0], b2 = src[1];
54  if (b1 > 0x7f) {
55  score -= 6;
56  }
57 
58  ut8 i = compute_index(c1, c2);
59  assert(i < 49);
60  ut8 y = LATIN1[i];
61  if (y == 0) {
62  score += -100;
63  } else {
64  score += y;
65  }
66  }
67  return score;
68 }
lzma_index * src
Definition: index.h:567
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c1
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c2
assert(limit<=UINT32_MAX/2)
static const ut8 LATIN1[49]
Definition: str_search.c:34
static int compute_index(ut8 x, ut8 y)
Definition: str_search.c:45
static const ut8 LATIN1_CLASS[256]
Definition: str_search.c:23

References assert(), b1, b2, c1, c2, compute_index(), test_evm::end, i, LATIN1, LATIN1_CLASS, len, and src.

Referenced by rz_scan_strings_raw().

Variable Documentation

◆ LATIN1

const ut8 LATIN1[49]
static
Initial value:
= {
0,0,0,0,0,0,0,
0,1,6,6,6,1,3,
0,6,12,6,6,1,3,
0,6,6,18,9,1,3,
0,6,6,9,18,1,3,
0,1,1,1,1,1,1,
0,3,3,3,3,1,3,
}

Definition at line 34 of file str_search.c.

Referenced by score().

◆ LATIN1_CLASS

const ut8 LATIN1_CLASS[256]
static
Initial value:
= {
0,0,0,0,0,0,0,0, 0,1,1,0,0,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
1,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 2,2,2,2,2,2,2,2, 2,2,6,6,6,6,6,6,
6,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 3,3,3,6,6,6,6,6,
6,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, 4,4,4,6,6,6,6,0,
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
1,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,
5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,6,
5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,6, 5,5,5,5,5,5,5,5,
}

Definition at line 23 of file str_search.c.

Referenced by score().