Rizin
unix-like reverse engineering framework and cli tools
data.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2012-2017 pancake <pancake@nopcode.org>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <rz_analysis.h>
5 
6 #define MINLEN 1
7 static int is_string(const ut8 *buf, int size, int *len) {
8  int i;
9  if (size < 1) {
10  return 0;
11  }
12  if (size > 3 && buf[0] && !buf[1] && buf[2] && !buf[3]) {
13  *len = 1; // XXX: TODO: Measure wide string length
14  return 2; // is wide
15  }
16  for (i = 0; i < size; i++) {
17  if (!buf[i] && i > MINLEN) {
18  *len = i;
19  return 1;
20  }
21  if (buf[i] == 10 || buf[i] == 13 || buf[i] == 9) {
22  continue;
23  }
24  if (buf[i] < 32 || buf[i] > 127) {
25  // not ascii text
26  return 0;
27  }
28  if (!IS_PRINTABLE(buf[i])) {
29  *len = i;
30  return 0;
31  }
32  }
33  *len = i;
34  return 1;
35 }
36 
37 static int is_number(const ut8 *buf, int size) {
39  return (n < UT32_MAX) ? (int)n : 0;
40 }
41 
42 static int is_null(const ut8 *buf, int size) {
43  const char zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
44  return (!memcmp(buf, &zero, size)) ? 1 : 0;
45 }
46 
47 static int is_invalid(const ut8 *buf, int size) {
48  if (size < 1) {
49  return 1;
50  }
51  if (size > 8) {
52  size = 8;
53  }
54  return (!memcmp(buf, "\xff\xff\xff\xff\xff\xff\xff\xff", size)) ? 1 : 0;
55 }
56 
57 #define USE_IS_VALID_OFFSET 1
58 static ut64 is_pointer(RzAnalysis *analysis, const ut8 *buf, int size) {
59  ut64 n;
60  ut8 buf2[32];
61  RzIOBind *iob = &analysis->iob;
62  if (size > sizeof(buf2)) {
63  size = sizeof(buf2);
64  }
66  if (!n) {
67  return 1; // null pointer
68  }
69 #if USE_IS_VALID_OFFSET
70  int r = iob->is_valid_offset(iob->io, n, 0);
71  return r ? n : 0LL;
72 #else
73  // optimization to ignore very low and very high pointers
74  // this makes disasm 5x faster, but can result in some false positives
75  // we should compare with current offset, to avoid
76  // short/long references. and discard invalid ones
77  if (n < 0x1000)
78  return 0; // probably wrong
79  if (n > 0xffffffffffffLL)
80  return 0; // probably wrong
81 
82  if (iob->read_at(iob->io, n, buf2, size) != size)
83  return 0;
84  return is_invalid(buf2, size) ? 0 : n;
85 #endif
86 }
87 
88 static bool is_bin(const ut8 *buf, int size) {
89  // TODO: add more magic signatures heres
90  if ((size >= 4 && !memcmp(buf, "\xcf\xfa\xed\xfe", 4))) {
91  return true;
92  }
93  if ((size >= 4 && !memcmp(buf, "\x7f\x45\x4c\x46", 4))) { // \x7fELF
94  return true;
95  }
96  if ((size >= 2 && !memcmp(buf, "MZ", 2))) {
97  return true;
98  }
99  return false;
100 }
101 
102 // TODO: add is_flag, is comment?
103 
105  int i, len, mallocsz = 1024;
106  ut32 n32;
107 
108  if (!d) {
109  return NULL;
110  }
111 
113  if (!sb) {
114  return NULL;
115  }
116  if (!rz_strbuf_reserve(sb, mallocsz)) {
117  RZ_LOG_ERROR("Cannot allocate %d byte(s)\n", mallocsz);
119  return NULL;
120  }
121  if (pal) {
122  const char *k = pal->offset;
123  rz_strbuf_appendf(sb, "%s0x%08" PFMT64x Color_RESET " ", k, d->addr);
124  } else {
125  rz_strbuf_appendf(sb, "0x%08" PFMT64x " ", d->addr);
126  }
127  n32 = (ut32)d->ptr;
128  len = RZ_MIN(d->len, 8);
129  for (i = 0; i < len; i++) {
130  rz_strbuf_appendf(sb, "%02x", d->buf[i]);
131  }
132  if (i > 0 && d->len > len) {
133  rz_strbuf_append(sb, "..");
134  }
135  rz_strbuf_append(sb, " ");
136  switch (d->type) {
138  if (pal) {
139  rz_strbuf_appendf(sb, "%sstring \"%s\"" Color_RESET, pal->comment, d->str);
140  } else {
141  rz_strbuf_appendf(sb, "string \"%s\"", d->str);
142  }
143  break;
145  rz_strbuf_append(sb, "wide string");
146  break;
148  if (pal) {
149  const char *k = pal->num;
150  if (n32 == d->ptr) {
151  rz_strbuf_appendf(sb, "%snumber %d (0x%x)" Color_RESET, k, n32, n32);
152  } else {
153  rz_strbuf_appendf(sb, "%snumber %" PFMT64d " (0x%" PFMT64x ")" Color_RESET,
154  k, d->ptr, d->ptr);
155  }
156  } else {
157  if (n32 == d->ptr) {
158  rz_strbuf_appendf(sb, "number %d 0x%x", n32, n32);
159  } else {
160  rz_strbuf_appendf(sb, "number %" PFMT64d " 0x%" PFMT64x,
161  d->ptr, d->ptr);
162  }
163  }
164  break;
166  rz_strbuf_append(sb, "pointer ");
167  if (pal) {
168  const char *k = pal->offset;
169  rz_strbuf_appendf(sb, " %s0x%08" PFMT64x, k, d->ptr);
170  } else {
171  rz_strbuf_appendf(sb, " 0x%08" PFMT64x, d->ptr);
172  }
173  break;
175  if (pal) {
176  rz_strbuf_appendf(sb, "%sinvalid" Color_RESET, pal->invalid);
177  } else {
178  rz_strbuf_append(sb, "invalid");
179  }
180  break;
182  rz_strbuf_append(sb, "header");
183  break;
185  rz_strbuf_append(sb, "sequence");
186  break;
188  rz_strbuf_append(sb, "pattern");
189  break;
191  if (pal) {
192  rz_strbuf_appendf(sb, "%sunknown" Color_RESET, pal->invalid);
193  } else {
194  rz_strbuf_append(sb, "unknown");
195  }
196  break;
197  default:
198  if (pal) {
199  rz_strbuf_appendf(sb, "%s(null)" Color_RESET, pal->b0x00);
200  } else {
201  rz_strbuf_append(sb, "(null)");
202  }
203  break;
204  }
205  return rz_strbuf_drain(sb);
206 }
207 
208 RZ_API RzAnalysisData *rz_analysis_data_new_string(ut64 addr, const char *p, int len, int type) {
209  RzAnalysisData *ad = RZ_NEW0(RzAnalysisData);
210  if (!ad) {
211  return NULL;
212  }
213  ad->str = NULL;
214  ad->addr = addr;
215  ad->type = type;
216  if (len == 0) {
217  len = strlen(p);
218  }
219 
221  /* TODO: add support for wide strings */
222  } else {
223  ad->str = malloc(len + 1);
224  if (!ad->str) {
226  return NULL;
227  }
228  memcpy(ad->str, p, len);
229  ad->str[len] = 0;
230  ad->buf = malloc(len + 1);
231  if (!ad->buf) {
233  RZ_LOG_ERROR("Cannot allocate %d byte(s)\n", len + 1);
234  return NULL;
235  }
236  memcpy(ad->buf, ad->str, len + 1);
237  ad->len = len + 1; // string length + \x00
238  }
239  ad->ptr = 0L;
240  return ad;
241 }
242 
243 RZ_API RzAnalysisData *rz_analysis_data_new(ut64 addr, int type, ut64 n, const ut8 *buf, int len) {
244  RzAnalysisData *ad = RZ_NEW0(RzAnalysisData);
245  int l = RZ_MIN(len, 8);
246  if (!ad) {
247  return NULL;
248  }
249  ad->buf = (ut8 *)&(ad->sbuf);
250  memset(ad->buf, 0, 8);
251  if (l < 1) {
253  return NULL;
254  }
255  if (buf) {
256  memcpy(ad->buf, buf, l);
257  }
258  ad->addr = addr;
259  ad->type = type;
260  ad->str = NULL;
261  switch (type) {
264  ad->len = len;
265  break;
266  default:
267  ad->len = l;
268  }
269  ad->ptr = n;
270  return ad;
271 }
272 
273 RZ_API void rz_analysis_data_free(RzAnalysisData *d) {
274  if (d) {
275  if (d->buf != (ut8 *)&(d->sbuf)) {
276  free(d->buf);
277  }
278  free(d->str);
279  free(d);
280  }
281 }
282 
283 RZ_API RzAnalysisData *rz_analysis_data(RzAnalysis *analysis, ut64 addr, const ut8 *buf, int size, int wordsize) {
284  ut64 dst = 0;
285  int n, nsize = 0;
286  int bits = analysis->bits;
287  int word = wordsize ? wordsize : RZ_MIN(8, bits / 8);
288 
289  if (size < 4) {
290  return NULL;
291  }
292  if (size >= word && is_invalid(buf, word)) {
294  }
295  {
296  int i, len = RZ_MIN(size, 64);
297  int is_pattern = 0;
298  int is_sequence = 0;
299  char ch = buf[0];
300  char ch2 = ch + 1;
301  for (i = 1; i < len; i++) {
302  if (ch2 == buf[i]) {
303  ch2++;
304  is_sequence++;
305  } else {
306  is_sequence = 0;
307  }
308  if (ch == buf[i]) {
309  is_pattern++;
310  }
311  }
312  if (is_sequence > len - 2) {
314  buf, is_sequence);
315  }
316  if (is_pattern > len - 2) {
318  buf, is_pattern);
319  }
320  }
321  if (size >= word && is_null(buf, word)) {
323  }
324  if (is_bin(buf, size)) {
326  }
327  if (size >= word) {
328  dst = is_pointer(analysis, buf, word);
329  if (dst) {
331  }
332  }
333  switch (is_string(buf, size, &nsize)) {
334  case 1: return rz_analysis_data_new_string(addr, (const char *)buf, nsize, RZ_ANALYSIS_DATA_TYPE_STRING);
335  case 2: return rz_analysis_data_new_string(addr, (const char *)buf, nsize, RZ_ANALYSIS_DATA_TYPE_WIDE_STRING);
336  }
337  if (size >= word) {
338  n = is_number(buf, word);
339  if (n) {
341  }
342  }
344 }
345 
346 RZ_API const char *rz_analysis_data_kind(RzAnalysis *a, ut64 addr, const ut8 *buf, int len) {
347  int inv = 0;
348  int unk = 0;
349  int str = 0;
350  int num = 0;
351  int i, j;
352  RzAnalysisData *data;
353  int word = a->bits / 8;
354  for (i = j = 0; i < len; j++) {
355  if (str && !buf[i]) {
356  str++;
357  }
358  data = rz_analysis_data(a, addr + i, buf + i, len - i, 0);
359  if (!data) {
360  i += word;
361  continue;
362  }
363  switch (data->type) {
365  inv++;
366  i += word;
367  break;
369  if (data->ptr > 1000) {
370  num++;
371  }
372  i += word;
373  break;
375  unk++;
376  i += word;
377  break;
379  if (data->len > 0) {
380  i += data->len;
381  } else {
382  i += word;
383  }
384  str++;
385  break;
386  default:
387  i += word;
388  }
389  rz_analysis_data_free(data);
390  }
391  if (j < 1) {
392  return "unknown";
393  }
394  if ((inv * 100 / j) > 60) {
395  return "invalid";
396  }
397  if ((unk * 100 / j) > 60) {
398  return "code";
399  }
400  if ((num * 100 / j) > 60) {
401  return "code";
402  }
403  if ((str * 100 / j) > 40) {
404  return "text";
405  }
406  return "data";
407 }
408 
410  switch (t) {
412  return NULL;
414  return "array";
415  case RZ_ANALYSIS_DATATYPE_OBJECT: // instance
416  return "object";
418  return "string";
420  return "class";
422  return "boolean";
424  return "int16";
426  return "int32";
428  return "int64";
430  return "float";
431  }
432  return NULL;
433 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
static SblHeader sb
Definition: bin_mbn.c:26
int bits(struct state *s, int need)
Definition: blast.c:72
#define RZ_API
#define NULL
Definition: cris-opc.c:27
#define r
Definition: crypto_rc6.c:12
#define MINLEN
Definition: data.c:6
RZ_API RzAnalysisData * rz_analysis_data_new(ut64 addr, int type, ut64 n, const ut8 *buf, int len)
Definition: data.c:243
static int is_null(const ut8 *buf, int size)
Definition: data.c:42
RZ_API const char * rz_analysis_datatype_to_string(RzAnalysisDataType t)
Definition: data.c:409
RZ_API RzAnalysisData * rz_analysis_data(RzAnalysis *analysis, ut64 addr, const ut8 *buf, int size, int wordsize)
Definition: data.c:283
RZ_API const char * rz_analysis_data_kind(RzAnalysis *a, ut64 addr, const ut8 *buf, int len)
Definition: data.c:346
static ut64 is_pointer(RzAnalysis *analysis, const ut8 *buf, int size)
Definition: data.c:58
RZ_API RzAnalysisData * rz_analysis_data_new_string(ut64 addr, const char *p, int len, int type)
Definition: data.c:208
RZ_API void rz_analysis_data_free(RzAnalysisData *d)
Definition: data.c:273
static int is_number(const ut8 *buf, int size)
Definition: data.c:37
static int is_string(const ut8 *buf, int size, int *len)
Definition: data.c:7
static bool is_bin(const ut8 *buf, int size)
Definition: data.c:88
RZ_API char * rz_analysis_data_to_string(RzAnalysisData *d, RzConsPrintablePalette *pal)
Definition: data.c:104
static int is_invalid(const ut8 *buf, int size)
Definition: data.c:47
uint32_t ut32
const char * k
Definition: dsignal.c:11
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void uLong size
Definition: ioapi.h:138
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
return memset(p, 0, total)
void * p
Definition: libc.cpp:67
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
void * malloc(size_t size)
Definition: malloc.c:123
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode static oldfd struct tms static buf static getgid static geteuid const char static filename static arg static mask struct ustat static ubuf static getppid static setsid static egid sigset_t static set struct timeval struct timezone static tz fd_set fd_set fd_set struct timeval static timeout const char char static bufsiz const char static swapflags void static offset const char static length static mode static who const char struct statfs static buf unsigned unsigned num
Definition: sflib.h:126
char * dst
Definition: lz4.h:724
static uint32_t const uint8_t * buf2
Definition: memcmplen.h:43
int n
Definition: mipsasm.c:19
int type
Definition: mipsasm.c:17
@ RZ_ANALYSIS_DATA_TYPE_INVALID
Definition: rz_analysis.h:78
@ RZ_ANALYSIS_DATA_TYPE_PATTERN
Definition: rz_analysis.h:81
@ RZ_ANALYSIS_DATA_TYPE_UNKNOWN
Definition: rz_analysis.h:73
@ RZ_ANALYSIS_DATA_TYPE_NUMBER
Definition: rz_analysis.h:77
@ RZ_ANALYSIS_DATA_TYPE_STRING
Definition: rz_analysis.h:74
@ RZ_ANALYSIS_DATA_TYPE_WIDE_STRING
Definition: rz_analysis.h:75
@ RZ_ANALYSIS_DATA_TYPE_HEADER
Definition: rz_analysis.h:79
@ RZ_ANALYSIS_DATA_TYPE_POINTER
Definition: rz_analysis.h:76
@ RZ_ANALYSIS_DATA_TYPE_SEQUENCE
Definition: rz_analysis.h:80
@ RZ_ANALYSIS_DATA_TYPE_NULL
Definition: rz_analysis.h:72
@ RZ_ANALYSIS_DATATYPE_FLOAT
Definition: rz_analysis.h:806
@ RZ_ANALYSIS_DATATYPE_INT64
Definition: rz_analysis.h:805
@ RZ_ANALYSIS_DATATYPE_STRING
Definition: rz_analysis.h:800
@ RZ_ANALYSIS_DATATYPE_NULL
Definition: rz_analysis.h:797
@ RZ_ANALYSIS_DATATYPE_INT32
Definition: rz_analysis.h:804
@ RZ_ANALYSIS_DATATYPE_ARRAY
Definition: rz_analysis.h:798
@ RZ_ANALYSIS_DATATYPE_CLASS
Definition: rz_analysis.h:801
@ RZ_ANALYSIS_DATATYPE_BOOLEAN
Definition: rz_analysis.h:802
@ RZ_ANALYSIS_DATATYPE_OBJECT
Definition: rz_analysis.h:799
@ RZ_ANALYSIS_DATATYPE_INT16
Definition: rz_analysis.h:803
enum rz_analysis_data_type_t RzAnalysisDataType
#define Color_RESET
Definition: rz_cons.h:617
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API ut64 rz_mem_get_num(const ut8 *b, int size)
Definition: mem.c:152
#define IS_PRINTABLE(x)
Definition: rz_str_util.h:10
RZ_API RZ_OWN char * rz_strbuf_drain(RzStrBuf *sb)
Definition: strbuf.c:342
RZ_API bool rz_strbuf_reserve(RzStrBuf *sb, size_t len)
Definition: strbuf.c:67
RZ_API bool rz_strbuf_append(RzStrBuf *sb, const char *s)
Definition: strbuf.c:222
RZ_API RzStrBuf * rz_strbuf_new(const char *s)
Definition: strbuf.c:8
RZ_API void rz_strbuf_free(RzStrBuf *sb)
Definition: strbuf.c:358
RZ_API bool rz_strbuf_appendf(RzStrBuf *sb, const char *fmt,...) RZ_PRINTF_CHECK(2
#define PFMT64d
Definition: rz_types.h:394
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define PFMT64x
Definition: rz_types.h:393
#define RZ_MIN(x, y)
#define UT32_MAX
Definition: rz_types_base.h:99
static int
Definition: sfsocketcall.h:114
#define d(i)
Definition: sha256.c:44
#define a(i)
Definition: sha256.c:41
RzIOBind iob
Definition: rz_analysis.h:574
RzIOReadAt read_at
Definition: rz_io.h:240
RzIOIsValidOff is_valid_offset
Definition: rz_io.h:257
RzIO * io
Definition: rz_io.h:232
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int addr
Definition: z80asm.c:58
#define L
Definition: zip_err_str.c:7