Rizin
unix-like reverse engineering framework and cli tools
hex.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2007-2020 pancake <pancake@nopcode.org>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include "rz_types.h"
5 #include "rz_util.h"
6 #include <stdio.h>
7 #include <ctype.h>
8 
9 /* int c; ret = hex_to_byte(&c, 'c'); */
11  if (IS_DIGIT(c)) {
12  *val = (ut8)(*val) * 16 + (c - '0');
13  } else if (c >= 'A' && c <= 'F') {
14  *val = (ut8)(*val) * 16 + (c - 'A' + 10);
15  } else if (c >= 'a' && c <= 'f') {
16  *val = (ut8)(*val) * 16 + (c - 'a' + 10);
17  } else {
18  return 1;
19  }
20  return 0;
21 }
22 
23 RZ_API char *rz_hex_from_py_str(char *out, const char *code) {
24  if (!strncmp(code, "'''", 3)) {
25  const char *s = code + 2;
26  return rz_hex_from_c_str(out, &s);
27  }
28  return rz_hex_from_c_str(out, &code);
29 }
30 
31 static const char *skip_comment_py(const char *code) {
32  if (*code != '#') {
33  return code;
34  }
35  char *end = strchr(code, '\n');
36  if (end) {
37  code = end;
38  }
39  return code + 1;
40 }
41 
42 RZ_API char *rz_hex_from_py_array(char *out, const char *code) {
43  const char abc[] = "0123456789abcdef";
44  if (*code != '[' || !strchr(code, ']')) {
45  return NULL;
46  }
47  code++;
48  for (; *code; code++) {
49  char *comma = strchr(code, ',');
50  if (!comma) {
51  comma = strchr(code, ']');
52  }
53  if (!comma) {
54  break;
55  }
56  char *_word = rz_str_ndup(code, comma - code);
57  const char *word = _word;
58  while (*word == ' ' || *word == '\t' || *word == '\n') {
59  word++;
60  word = skip_comment_py(word);
61  }
62  if (IS_DIGIT(*word)) {
63  ut8 n = (ut8)rz_num_math(NULL, word);
64  *out++ = abc[(n >> 4) & 0xf];
65  *out++ = abc[n & 0xf];
66  }
67  free(_word);
68  code = comma;
69  if (*code == ']') {
70  break;
71  }
72  }
73  return out;
74 }
75 
76 RZ_API char *rz_hex_from_py(const char *code) {
77  if (!code) {
78  return NULL;
79  }
80  char *const ret = malloc(strlen(code) * 3);
81  if (!ret) {
82  return NULL;
83  }
84  *ret = '\0';
85  char *out = ret;
86  const char *tmp_code = strchr(code, '=');
87  if (tmp_code) {
88  code = tmp_code;
89  }
90  for (; *code && *code != '[' && *code != '\'' && *code != '"'; code++) {
92  }
93  if (*code == '[') {
95  } else if (*code == '"' || *code == '\'') {
97  }
98  if (!out) {
99  free(ret);
100  return NULL;
101  }
102  *out = '\0';
103  return ret;
104 }
105 
106 RZ_API char *rz_hex_from_c_str(char *out, const char **code) {
107  const char abc[] = "0123456789abcdefABCDEF";
108  const char *iter = *code;
109  if (*iter != '\'' && *iter != '"') {
110  return NULL;
111  }
112  const char end_char = *iter;
113  iter++;
114  for (; *iter && *iter != end_char; iter++) {
115  if (*iter == '\\') {
116  iter++;
117  switch (iter[0]) {
118  case 'e':
119  *out++ = '1';
120  *out++ = 'b';
121  break;
122  case 'r':
123  *out++ = '0';
124  *out++ = 'd';
125  break;
126  case 'n':
127  *out++ = '0';
128  *out++ = 'a';
129  break;
130  case 'x': {
131  ut8 c1 = iter[1];
132  ut8 c2 = iter[2];
133  iter += 2;
134  if (c1 == '\0' || c2 == '\0') {
135  return NULL;
136  } else if (strchr(abc, c1) && strchr(abc, c2)) {
137  *out++ = tolower(c1);
138  *out++ = tolower(c2);
139  } else {
140  return NULL;
141  }
142  break;
143  }
144  default:
145  if (iter[0] == end_char) {
146  *out++ = abc[*iter >> 4];
147  *out++ = abc[*iter & 0xf];
148  }
149  return NULL;
150  }
151  } else {
152  *out++ = abc[*iter >> 4];
153  *out++ = abc[*iter & 0xf];
154  }
155  }
156  *code = iter;
157  return out;
158 }
159 
160 const char *skip_comment_c(const char *code) {
161  if (!strncmp(code, "/*", 2)) {
162  char *end = strstr(code, "*/");
163  if (end) {
164  code = end + 2;
165  } else {
166  eprintf("Missing closing comment\n");
167  }
168  } else if (!strncmp(code, "//", 2)) {
169  char *end = strchr(code, '\n');
170  if (end) {
171  code = end + 2;
172  }
173  }
174  return code;
175 }
176 
177 RZ_API char *rz_hex_from_c_array(char *out, const char *code) {
178  const char abc[] = "0123456789abcdef";
179  if (*code != '{' || !strchr(code, '}')) {
180  return NULL;
181  }
182  code++;
183  for (; *code; code++) {
184  const char *comma = strchr(code, ',');
185  if (!comma) {
186  comma = strchr(code, '}');
187  }
188  char *_word = rz_str_ndup(code, comma - code);
189  const char *word = _word;
190  word = skip_comment_c(word);
191  while (*word == ' ' || *word == '\t' || *word == '\n') {
192  word++;
193  word = skip_comment_c(word);
194  }
195  if (IS_DIGIT(*word)) {
196  ut8 n = (ut8)rz_num_math(NULL, word);
197  *out++ = abc[(n >> 4) & 0xf];
198  *out++ = abc[n & 0xf];
199  }
200  free(_word);
201  code = comma;
202  if (code && *code == '}') {
203  break;
204  }
205  }
206  return out;
207 }
208 
209 /* convert:
210  * char *foo = "\x41\x23\x42\x1b";
211  * into:
212  * 4123421b
213  */
214 RZ_API char *rz_hex_from_c(const char *code) {
215  if (!code) {
216  return NULL;
217  }
218  char *const ret = malloc(strlen(code) * 3);
219  if (!ret) {
220  return NULL;
221  }
222  *ret = '\0';
223  char *out = ret;
224  const char *tmp_code = strchr(code, '=');
225  if (tmp_code) {
226  code = tmp_code;
227  }
228  for (; *code != '\0' && *code != '{' && *code != '"'; code++) {
230  }
231  if (*code == '{') {
233  } else if (*code == '"') {
234  const char *s1, *s2;
235  s1 = code;
236  do {
237  code = s1;
239  if (!out) {
240  break;
241  }
242  s1 = strchr(code + 1, '"');
243  s2 = strchr(code + 1, ';');
244  } while (s1 && s2 && (s1 <= s2));
245  }
246  if (!out) {
247  free(ret);
248  return NULL;
249  }
250  *out = '\0';
251  return ret;
252 }
253 
254 RZ_API char *rz_hex_from_js(const char *code) {
255  char *s1 = strchr(code, '\'');
256  char *s2 = strchr(code, '"');
257 
258  /* there are no strings in the input */
259  if (!(s1 || s2)) {
260  return NULL;
261  }
262 
263  char *start, *end;
264  if (s1 < s2) {
265  start = s1;
266  end = strchr(start + 1, '\'');
267  } else {
268  start = s2;
269  end = strchr(start + 1, '"');
270  }
271 
272  /* the string isn't properly terminated */
273  if (!end) {
274  return NULL;
275  }
276 
277  char *str = rz_str_ndup(start + 1, end - start - 1);
278 
279  /* assuming base64 input, output will always be shorter */
280  ut8 *b64d = malloc(end - start);
281  if (!b64d) {
282  free(str);
283  return NULL;
284  }
285 
286  rz_base64_decode(b64d, str, end - start - 1);
287  if (!b64d) {
288  free(str);
289  free(b64d);
290  return NULL;
291  }
292 
293  // TODO: use rz_str_bin2hex
294  int i, len = strlen((const char *)b64d);
295  char *out = malloc(len * 2 + 1);
296  if (!out) {
297  free(str);
298  free(b64d);
299  return NULL;
300  }
301  for (i = 0; i < len; i++) {
302  sprintf(&out[i * 2], "%02x", b64d[i]);
303  }
304  out[len * 2] = '\0';
305 
306  free(str);
307  free(b64d);
308  return out;
309 }
310 
311 /* convert
312  * "\x41\x23\x42\x1b"
313  * "\x41\x23\x42\x1b"
314  * into
315  * 4123421b4123421b
316  */
317 RZ_API char *rz_hex_no_code(const char *code) {
318  if (!code) {
319  return NULL;
320  }
321  char *const ret = calloc(1, strlen(code) * 3);
322  if (!ret) {
323  return NULL;
324  }
325  *ret = '\0';
326  char *out = ret;
328  code = strchr(code + 1, '"');
329  if (!out) {
330  free(ret);
331  return NULL;
332  }
333  *out = '\0';
334  while (out && code) {
335  *out = '\0';
337  code = strchr(code + 1, '"');
338  }
339  return ret;
340 }
341 
342 RZ_API char *rz_hex_from_code(const char *code) {
343  if (!strchr(code, '=')) {
344  return rz_hex_no_code(code);
345  }
346  /* C language */
347  if (strstr(code, "char") || strstr(code, "int")) {
348  return rz_hex_from_c(code);
349  }
350  /* JavaScript */
351  if (strstr(code, "var")) {
352  return rz_hex_from_js(code);
353  }
354  /* Python */
355  return rz_hex_from_py(code);
356 }
357 
358 /* int byte = hexpair2bin("A0"); */
359 // (0A) => 10 || -1 (on error)
360 RZ_API int rz_hex_pair2bin(const char *arg) {
361  ut8 *ptr, c = 0, d = 0;
362  ut32 j = 0;
363 
364  for (ptr = (ut8 *)arg;; ptr = ptr + 1) {
365  if (!*ptr || *ptr == ' ' || j == 2) {
366  break;
367  }
368  d = c;
369  if (*ptr != '.' && rz_hex_to_byte(&c, *ptr)) {
370  eprintf("Invalid hexa string at char '%c' (%s).\n",
371  *ptr, arg);
372  return -1;
373  }
374  c |= d;
375  if (j++ == 0) {
376  c <<= 4;
377  }
378  }
379  return (int)c;
380 }
381 
382 RZ_API int rz_hex_bin2str(const ut8 *in, int len, char *out) {
383  int i, idx;
384  char tmp[8];
385  if (len < 0) {
386  return 0;
387  }
388  for (idx = i = 0; i < len; i++, idx += 2) {
389  snprintf(tmp, sizeof(tmp), "%02x", in[i]);
390  memcpy(out + idx, tmp, 2);
391  }
392  out[idx] = 0;
393  return len;
394 }
395 
405 RZ_API void rz_hex_ut2st_str(const ut32 in, RZ_INOUT char *out, const int len) {
406  char tmp[12];
407  if (len < sizeof(tmp)) {
408  RZ_LOG_FATAL("Output buffer too small for negative 32bit value.\n");
409  }
410  snprintf(tmp, sizeof(tmp), "-0x%" PFMT32x, ~in + 1);
411  memcpy(out, tmp, sizeof(tmp));
412  return;
413 }
414 
415 RZ_API char *rz_hex_bin2strdup(const ut8 *in, int len) {
416  int i, idx;
417  char tmp[5], *out;
418 
419  if ((len + 1) * 2 < len) {
420  return NULL;
421  }
422  out = malloc((len + 1) * 2);
423  if (!out) {
424  return NULL;
425  }
426  for (i = idx = 0; i < len; i++, idx += 2) {
427  snprintf(tmp, sizeof(tmp), "%02x", in[i]);
428  memcpy(out + idx, tmp, 2);
429  }
430  out[idx] = 0;
431  return out;
432 }
433 
444 RZ_API int rz_hex_str2bin(const char *in, ut8 *out) {
445  long nibbles = 0;
446 
447  while (in && *in) {
448  ut8 tmp;
449  /* skip hex prefix */
450  if (*in == '0' && in[1] == 'x') {
451  in += 2;
452  }
453  /* read hex digits */
454  while (!rz_hex_to_byte(out ? &out[nibbles / 2] : &tmp, *in)) {
455  nibbles++;
456  in++;
457  }
458  if (*in == '\0') {
459  break;
460  }
461  /* comments */
462  if (*in == '#' || (*in == '/' && in[1] == '/')) {
463  if ((in = strchr(in, '\n'))) {
464  in++;
465  }
466  continue;
467  } else if (*in == '/' && in[1] == '*') {
468  if ((in = strstr(in, "*/"))) {
469  in += 2;
470  }
471  continue;
472  } else if (!IS_WHITESPACE(*in) && *in != '\n') {
473  /* this is not a valid string */
474  return 0;
475  }
476  /* ignore character */
477  in++;
478  }
479 
480  if (nibbles % 2) {
481  if (out) {
482  rz_hex_to_byte(&out[nibbles / 2], '0');
483  }
484  return -(nibbles + 1) / 2;
485  }
486 
487  return nibbles / 2;
488 }
489 
490 RZ_API int rz_hex_str2binmask(const char *in, ut8 *out, ut8 *mask) {
491  ut8 *ptr;
492  int len, ilen = strlen(in) + 1;
493  int has_nibble = 0;
494  memcpy(out, in, ilen);
495  for (ptr = out; *ptr; ptr++) {
496  if (*ptr == '.') {
497  *ptr = '0';
498  }
499  }
500  len = rz_hex_str2bin((char *)out, out);
501  if (len < 0) {
502  has_nibble = 1;
503  len = -(len + 1);
504  }
505  if (len != -1) {
506  memcpy(mask, in, ilen);
507  if (has_nibble) {
508  memcpy(mask + ilen, "f0", 3);
509  }
510  for (ptr = mask; *ptr; ptr++) {
511  if (IS_HEXCHAR(*ptr)) {
512  *ptr = 'f';
513  } else if (*ptr == '.') {
514  *ptr = '0';
515  }
516  }
517  len = rz_hex_str2bin((char *)mask, mask);
518  if (len < 0) {
519  len++;
520  }
521  }
522  return len;
523 }
524 
526  switch (n) {
527  case 1:
528  if ((in & UT8_GT0)) {
529  return UT64_8U | in;
530  }
531  return in & UT8_MAX;
532  case 2:
533  if ((in & UT16_GT0)) {
534  return UT64_16U | in;
535  }
536  return in & UT16_MAX;
537  case 4:
538  if ((in & UT32_GT0)) {
539  return UT64_32U | in;
540  }
541  return in & UT32_MAX;
542  case 8:
543  return in & UT64_MAX;
544  }
545  return in;
546 }
547 
548 // Check if str contains only hexadecimal characters and return length of bytes
549 RZ_API int rz_hex_str_is_valid(const char *str) {
550  int i;
551  int len = 0;
552  if (!strncmp(str, "0x", 2)) {
553  str += 2;
554  }
555  for (i = 0; str[i] != '\0'; i++) {
556  if (IS_HEXCHAR(str[i])) {
557  len++;
558  }
559  if (IS_HEXCHAR(str[i]) || IS_WHITESPACE(str[i])) {
560  continue;
561  }
562  return -1; // if we're here, then str isn't valid
563  }
564  return len;
565 }
size_t len
Definition: 6502dis.c:15
#define mask()
#define PFMT32x
lzma_index ** i
Definition: index.h:629
ut16 val
Definition: armass64_const.h:6
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c1
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c2
const lzma_allocator const uint8_t * in
Definition: block.h:527
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
#define ut8
Definition: dcpu16.h:8
uint32_t ut32
RZ_API char * rz_hex_from_py_array(char *out, const char *code)
Definition: hex.c:42
RZ_API char * rz_hex_from_js(const char *code)
Definition: hex.c:254
RZ_API char * rz_hex_from_c_array(char *out, const char *code)
Definition: hex.c:177
RZ_API char * rz_hex_no_code(const char *code)
Definition: hex.c:317
RZ_API int rz_hex_str2bin(const char *in, ut8 *out)
Convert an input string in into the binary form in out.
Definition: hex.c:444
RZ_API bool rz_hex_to_byte(ut8 *val, ut8 c)
Definition: hex.c:10
RZ_API int rz_hex_str_is_valid(const char *str)
Definition: hex.c:549
RZ_API st64 rz_hex_bin_truncate(ut64 in, int n)
Definition: hex.c:525
RZ_API char * rz_hex_from_code(const char *code)
Definition: hex.c:342
static const char * skip_comment_py(const char *code)
Definition: hex.c:31
RZ_API int rz_hex_pair2bin(const char *arg)
Definition: hex.c:360
RZ_API int rz_hex_bin2str(const ut8 *in, int len, char *out)
Definition: hex.c:382
RZ_API char * rz_hex_from_c(const char *code)
Definition: hex.c:214
const char * skip_comment_c(const char *code)
Definition: hex.c:160
RZ_API int rz_hex_str2binmask(const char *in, ut8 *out, ut8 *mask)
Definition: hex.c:490
RZ_API char * rz_hex_from_py(const char *code)
Definition: hex.c:76
RZ_API char * rz_hex_from_c_str(char *out, const char **code)
Definition: hex.c:106
RZ_API char * rz_hex_bin2strdup(const ut8 *in, int len)
Definition: hex.c:415
RZ_API void rz_hex_ut2st_str(const ut32 in, RZ_INOUT char *out, const int len)
Takes an unsigned 32bit integer with MSB set to 1 and returns the signed integer in hex format as str...
Definition: hex.c:405
RZ_API char * rz_hex_from_py_str(char *out, const char *code)
Definition: hex.c:23
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
snprintf
Definition: kernel.h:364
sprintf
Definition: kernel.h:365
uint8_t ut8
Definition: lh5801.h:11
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
void * malloc(size_t size)
Definition: malloc.c:123
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
int n
Definition: mipsasm.c:19
int idx
Definition: setup.py:197
const char * code
Definition: pal.c:98
#define eprintf(x, y...)
Definition: rlcc.c:7
static RzSocket * s
Definition: rtr.c:28
RZ_API int rz_base64_decode(ut8 *bout, const char *bin, int len)
Definition: ubase64.c:48
#define RZ_LOG_FATAL(fmtstr,...)
Definition: rz_log.h:60
RZ_API ut64 rz_num_math(RzNum *num, const char *str)
Definition: unum.c:456
RZ_API char * rz_str_ndup(RZ_NULLABLE const char *ptr, int len)
Create new copy of string ptr limited to size len.
Definition: str.c:1006
#define IS_WHITESPACE(x)
Definition: rz_str_util.h:13
#define IS_HEXCHAR(x)
Definition: rz_str_util.h:9
#define IS_DIGIT(x)
Definition: rz_str_util.h:11
#define RZ_INOUT
Definition: rz_types.h:52
#define UT32_GT0
Definition: rz_types_base.h:95
#define UT64_8U
Definition: rz_types_base.h:92
#define UT8_GT0
#define st64
Definition: rz_types_base.h:10
#define UT32_MAX
Definition: rz_types_base.h:99
#define UT64_16U
Definition: rz_types_base.h:91
#define UT64_32U
Definition: rz_types_base.h:90
#define UT64_MAX
Definition: rz_types_base.h:86
#define UT8_MAX
#define UT16_GT0
#define UT16_MAX
#define tolower(c)
Definition: safe-ctype.h:149
#define d(i)
Definition: sha256.c:44
#define c(i)
Definition: sha256.c:43
#define s1(x)
Definition: sha256.c:60
Definition: inftree9.h:24
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int comma
Definition: z80asm.c:76