Rizin
unix-like reverse engineering framework and cli tools
json_parser.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2020 thestr4ng3r <info@florianmaerkl.de>
2 // SPDX-FileCopyrightText: 2020 Yaroslav Stavnichiy <yarosla@gmail.com>
3 // SPDX-License-Identifier: LGPL-3.0-only
4 
5 #include <errno.h>
6 
7 #include <rz_util/rz_utf8.h>
8 #include <rz_util/rz_hex.h>
9 #include <rz_util/rz_json.h>
10 #include <rz_util/rz_assert.h>
11 #include <rz_util/rz_pj.h>
12 
13 #if 0
14 // optional error printing
15 #define RZ_JSON_REPORT_ERROR(msg, p) fprintf(stderr, "RZ_JSON PARSE ERROR (%d): " msg " at %s\n", __LINE__, p)
16 #else
17 #define RZ_JSON_REPORT_ERROR(msg, p) \
18  do { \
19  (void)(msg); \
20  (void)(p); \
21  } while (0)
22 #endif
23 
24 static RzJson *json_new(void) {
25  return RZ_NEW0(RzJson);
26 }
27 
28 static RzJson *create_json(RzJsonType type, const char *key, RzJson *parent) {
29  RzJson *js = json_new();
30  if (!js) {
31  return NULL;
32  }
33  js->type = type;
34  js->key = key;
35  if (!parent->children.last) {
36  parent->children.first = parent->children.last = js;
37  } else {
38  parent->children.last->next = js;
39  parent->children.last = js;
40  }
41  parent->children.count++;
42  return js;
43 }
44 
46  if (!js) {
47  return;
48  }
49  if (js->type == RZ_JSON_OBJECT || js->type == RZ_JSON_ARRAY) {
50  RzJson *p = js->children.first;
51  RzJson *p1;
52  while (p) {
53  p1 = p->next;
54  rz_json_free(p);
55  p = p1;
56  }
57  }
58  free(js);
59 }
60 
61 static char *unescape_string(char *s, char **end) {
62  char *p = s;
63  char *d = s;
64  char c;
65  while ((c = *p++)) {
66  if (c == '"') {
67  *d = '\0';
68  *end = p;
69  return s;
70  }
71  if (c == '\\') {
72  switch (*p) {
73  case '\\':
74  case '/':
75  case '"':
76  *d++ = *p++;
77  break;
78  case 'b':
79  *d++ = '\b';
80  p++;
81  break;
82  case 'f':
83  *d++ = '\f';
84  p++;
85  break;
86  case 'n':
87  *d++ = '\n';
88  p++;
89  break;
90  case 'r':
91  *d++ = '\r';
92  p++;
93  break;
94  case 't':
95  *d++ = '\t';
96  p++;
97  break;
98  case 'u': { // unicode
99  char *ps = p - 1;
100  ut8 high = 0, low = 0;
101  if (rz_hex_to_byte(&high, p[1]) || rz_hex_to_byte(&high, p[2]) || rz_hex_to_byte(&low, p[3]) || rz_hex_to_byte(&low, p[4])) {
102  RZ_JSON_REPORT_ERROR("invalid unicode escape", p - 1);
103  return NULL;
104  }
105  RzRune codepoint = (RzRune)high << 8 | (RzRune)low;
106  if ((codepoint & 0xfc00) == 0xd800) { // high surrogate; need one more unicode to succeed
107  p += 6;
108  high = low = 0;
109  if (p[-1] != '\\' || *p != 'u' || rz_hex_to_byte(&high, p[1]) || rz_hex_to_byte(&high, p[2]) || rz_hex_to_byte(&low, p[3]) || rz_hex_to_byte(&low, p[4])) {
110  RZ_JSON_REPORT_ERROR("invalid unicode surrogate", ps);
111  return NULL;
112  }
113  RzRune codepoint2 = (RzRune)high << 8 | (RzRune)low;
114  if ((codepoint2 & 0xfc00) != 0xdc00) {
115  RZ_JSON_REPORT_ERROR("invalid unicode surrogate", ps);
116  return NULL;
117  }
118  codepoint = 0x10000 + ((codepoint - 0xd800) << 10) + (codepoint2 - 0xdc00);
119  }
120  int sz = rz_utf8_encode((ut8 *)d, codepoint);
121  if (!s) {
122  RZ_JSON_REPORT_ERROR("invalid codepoint", ps);
123  return NULL;
124  }
125  d += sz;
126  p += 5;
127  break;
128  }
129  default:
130  // leave untouched
131  *d++ = c;
132  break;
133  }
134  } else {
135  *d++ = c;
136  }
137  }
138  RZ_JSON_REPORT_ERROR("no closing quote for string", s);
139  return NULL;
140 }
141 
142 static char *skip_block_comment(char *ps) {
143  // ps is at "/* ..."
144  // caller must ensure that ps[0], ps[1] and ps[2] are valid.
145  char *p = ps + 2;
146  if (!*p) {
147  RZ_JSON_REPORT_ERROR("endless comment", ps);
148  return NULL;
149  }
150 REPEAT:
151  p = strchr(p + 1, '/');
152  if (!p) {
153  RZ_JSON_REPORT_ERROR("endless comment", ps);
154  return NULL;
155  }
156  if (p[-1] != '*') {
157  goto REPEAT;
158  }
159  return p + 1;
160 }
161 
162 static char *skip_whitespace(char *p) {
163  while (*p) {
164  if (*p == '/') {
165  if (p[1] == '/') { // line comment
166  char *ps = p;
167  p = strchr(p + 2, '\n');
168  if (!p) {
169  RZ_JSON_REPORT_ERROR("endless comment", ps);
170  return NULL; // error
171  }
172  p++;
173  } else if (p[1] == '*') { // block comment
175  if (!p) {
176  return NULL;
177  }
178  continue;
179  } else {
180  RZ_JSON_REPORT_ERROR("unexpected chars", p);
181  return NULL; // error
182  }
183  continue;
184  } else if (!IS_WHITECHAR(*p)) {
185  break;
186  }
187  p++;
188  }
189  return p;
190 }
191 
192 static char *parse_key(const char **key, char *p) {
193  // on '}' return with *p=='}'
194  p = skip_whitespace(p);
195  if (!p) {
196  return NULL;
197  }
198  char c;
199  while ((c = *p++)) {
200  if (c == '"') {
201  *key = unescape_string(p, &p);
202  if (!*key) {
203  return NULL; // propagate error
204  }
205  p = skip_whitespace(p);
206  if (!p) {
207  return NULL;
208  }
209  if (*p == ':') {
210  return p + 1;
211  }
212  RZ_JSON_REPORT_ERROR("unexpected chars", p);
213  return NULL;
214  }
215  if (c == '}') {
216  return p - 1;
217  }
218  RZ_JSON_REPORT_ERROR("unexpected chars", p - 1);
219  return NULL; // error
220  }
221  RZ_JSON_REPORT_ERROR("unexpected chars", p - 1);
222  return NULL; // error
223 }
224 
225 static char *parse_value(RzJson *parent, const char *key, char *p) {
226  RzJson *js;
227  p = skip_whitespace(p);
228  if (!p) {
229  return NULL;
230  }
231  switch (*p) {
232  case '\0':
233  RZ_JSON_REPORT_ERROR("unexpected end of text", p);
234  return NULL; // error
235  case '{':
236  js = create_json(RZ_JSON_OBJECT, key, parent);
237  p++;
238  while (1) {
239  const char *new_key = NULL;
240  p = parse_key(&new_key, p);
241  if (!p) {
242  return NULL; // error
243  }
244  if (*p != '}') {
245  p = parse_value(js, new_key, p);
246  if (!p) {
247  return NULL; // error
248  }
249  }
250  p = skip_whitespace(p);
251  if (!p) {
252  return NULL;
253  }
254  if (*p == ',') {
255  char *commapos = p;
256  p++;
257  p = skip_whitespace(p);
258  if (!p) {
259  return NULL;
260  }
261  if (*p == '}') {
262  RZ_JSON_REPORT_ERROR("trailing comma", commapos);
263  return NULL;
264  }
265  } else if (*p == '}') {
266  return p + 1; // end of object
267  } else {
268  RZ_JSON_REPORT_ERROR("unexpected chars", p);
269  return NULL;
270  }
271  }
272  case '[':
273  js = create_json(RZ_JSON_ARRAY, key, parent);
274  p++;
275  while (1) {
276  p = parse_value(js, 0, p);
277  if (!p) {
278  return NULL; // error
279  }
280  p = skip_whitespace(p);
281  if (!p) {
282  return NULL;
283  }
284  if (*p == ',') {
285  char *commapos = p;
286  p++;
287  p = skip_whitespace(p);
288  if (!p) {
289  return NULL;
290  }
291  if (*p == ']') {
292  RZ_JSON_REPORT_ERROR("trailing comma", commapos);
293  return NULL;
294  }
295  } else if (*p == ']') {
296  return p + 1; // end of array
297  } else {
298  RZ_JSON_REPORT_ERROR("unexpected chars", p);
299  return NULL;
300  }
301  }
302  case ']':
303  return p;
304  case '"':
305  p++;
306  js = create_json(RZ_JSON_STRING, key, parent);
307  js->str_value = unescape_string(p, &p);
308  if (!js->str_value) {
309  return NULL; // propagate error
310  }
311  return p;
312  case '-':
313  case '0':
314  case '1':
315  case '2':
316  case '3':
317  case '4':
318  case '5':
319  case '6':
320  case '7':
321  case '8':
322  case '9': {
323  js = create_json(RZ_JSON_INTEGER, key, parent);
324  errno = 0;
325  char *pe;
326  if (*p == '-') {
327  js->num.s_value = (st64)strtoll(p, &pe, 10);
328  } else {
329  js->num.u_value = (ut64)strtoull(p, &pe, 10);
330  }
331  if (pe == p || errno == ERANGE) {
332  RZ_JSON_REPORT_ERROR("invalid number", p);
333  return NULL; // error
334  }
335  if (*pe == '.' || *pe == 'e' || *pe == 'E') { // double value
336  js->type = RZ_JSON_DOUBLE;
337  errno = 0;
338  js->num.dbl_value = strtod(p, &pe);
339  if (pe == p || errno == ERANGE) {
340  RZ_JSON_REPORT_ERROR("invalid fractional number", p);
341  return NULL; // error
342  }
343  } else {
344  if (*p == '-') {
345  js->num.dbl_value = (double)js->num.s_value;
346  } else {
347  js->num.dbl_value = (double)js->num.u_value;
348  }
349  }
350  return pe;
351  }
352  case 't':
353  if (!strncmp(p, "true", 4)) {
354  js = create_json(RZ_JSON_BOOLEAN, key, parent);
355  js->num.u_value = 1;
356  return p + 4;
357  }
358  RZ_JSON_REPORT_ERROR("unexpected chars", p);
359  return NULL; // error
360  case 'f':
361  if (!strncmp(p, "false", 5)) {
362  js = create_json(RZ_JSON_BOOLEAN, key, parent);
363  js->num.u_value = 0;
364  return p + 5;
365  }
366  RZ_JSON_REPORT_ERROR("unexpected chars", p);
367  return NULL; // error
368  case 'n':
369  if (!strncmp(p, "null", 4)) {
370  create_json(RZ_JSON_NULL, key, parent);
371  return p + 4;
372  }
373  RZ_JSON_REPORT_ERROR("unexpected chars", p);
374  return NULL; // error
375  default:
376  RZ_JSON_REPORT_ERROR("unexpected chars", p);
377  return NULL; // error
378  }
379  return NULL;
380 }
381 
383  RzJson js = { 0 };
384  if (!parse_value(&js, 0, text)) {
385  if (js.children.first) {
386  rz_json_free(js.children.first);
387  }
388  return 0;
389  }
390  return js.children.first;
391 }
392 
393 // getter with explicit size parameter, since in rz_json_get_path our key is
394 // not zero-terminated.
395 static const RzJson *rz_json_get_len(const RzJson *json, const char *key, size_t keysize) {
396  RzJson *js;
397  for (js = json->children.first; js; js = js->next) {
398  if (js->key && !strncmp(js->key, key, keysize)) {
399  return js;
400  }
401  }
402  return NULL;
403 }
404 
405 RZ_API const RzJson *rz_json_get(const RzJson *json, const char *key) {
406  return rz_json_get_len(json, key, strlen(key));
407 }
408 
409 RZ_API const RzJson *rz_json_item(const RzJson *json, size_t idx) {
410  RzJson *js;
411  for (js = json->children.first; js; js = js->next) {
412  if (!idx--) {
413  return js;
414  }
415  }
416  return NULL;
417 }
418 
419 RZ_API const RzJson *rz_json_get_path(const RzJson *json, const char *path) {
420  const RzJson *js = json;
421  const char *key;
422  size_t keysize;
423  ut64 index;
424 
425  while (*path) {
426  switch (*path++) {
427  case '\0':
428  break;
429  case '[':
430  // we could check if js->type != RZ_JSON_ARRAY but rz_json_item will
431  // fail in that case anyway
432  key = path;
433  index = (ut64)strtoull(key, (char **)&path, 10);
434  if (key == path || *path != ']') {
435  RZ_JSON_REPORT_ERROR("JSON path: expected ]", path - 1);
436  return NULL;
437  }
438  ++path;
439  js = rz_json_item(js, index);
440  if (!js) {
441  return NULL;
442  }
443  break;
444  case '.':
445  key = path;
446  for (keysize = 0; key[keysize]; ++keysize) {
447  if (strchr(".[", key[keysize])) {
448  break;
449  }
450  }
451  if (keysize == 0) {
452  RZ_JSON_REPORT_ERROR("JSON path: expected key", path - 1);
453  return NULL;
454  }
455  js = rz_json_get_len(js, key, keysize);
456  if (!js) {
457  return NULL;
458  }
459  path = key + keysize;
460  break;
461  default:
462  RZ_JSON_REPORT_ERROR("JSON path: unexpected char", path - 1);
463  return NULL;
464  }
465  }
466  // js == json means we've not done any access at all
467  return (js == json) ? NULL : js;
468 }
469 
470 static void json_pj_recurse(const RzJson *json, PJ *pj, bool with_key) {
471  rz_return_if_fail(json && pj);
472  switch (json->type) {
473  case RZ_JSON_NULL: {
474  if (with_key && json->key) {
475  pj_knull(pj, json->key);
476  } else {
477  pj_null(pj);
478  }
479  break;
480  }
481  case RZ_JSON_OBJECT: {
482  if (with_key && json->key) {
483  pj_ko(pj, json->key);
484  } else {
485  pj_o(pj);
486  }
487  RzJson *baby;
488  for (baby = json->children.first; baby; baby = baby->next) {
489  // Always print keys for children
490  json_pj_recurse(baby, pj, true);
491  }
492  pj_end(pj);
493  break;
494  }
495  case RZ_JSON_ARRAY: {
496  if (with_key && json->key) {
497  pj_ka(pj, json->key);
498  } else {
499  pj_a(pj);
500  }
501  RzJson *baby;
502  for (baby = json->children.first; baby; baby = baby->next) {
503  // Always print keys for children
504  json_pj_recurse(baby, pj, true);
505  }
506  pj_end(pj);
507  break;
508  }
509  case RZ_JSON_STRING: {
510  if (with_key && json->key) {
511  pj_ks(pj, json->key, json->str_value);
512  } else {
513  pj_s(pj, json->str_value);
514  }
515  break;
516  }
517  case RZ_JSON_INTEGER: {
518  if (with_key && json->key) {
519  pj_kN(pj, json->key, json->num.u_value);
520  } else {
521  pj_N(pj, json->num.u_value);
522  }
523  break;
524  }
525  case RZ_JSON_DOUBLE: {
526  if (with_key && json->key) {
527  pj_kd(pj, json->key, json->num.dbl_value);
528  } else {
529  pj_d(pj, json->num.dbl_value);
530  }
531  break;
532  }
533  case RZ_JSON_BOOLEAN: {
534  if (with_key && json->key) {
535  pj_kb(pj, json->key, (bool)json->num.u_value);
536  } else {
537  pj_b(pj, (bool)json->num.u_value);
538  }
539  }
540  }
541 }
542 
543 /* \brief returns the string representation of RzJson object
544  * \param with_key choose if include the object key name in the output
545  */
546 RZ_API RZ_OWN char *rz_json_as_string(const RzJson *json, bool with_key) {
548  PJ *pj = pj_new();
549  if (json->type == RZ_JSON_STRING) {
550  if (with_key && json->key) {
551  pj_ks(pj, json->key, json->str_value);
552  } else {
553  // Printing string without surrounding quotes
554  pj_S(pj, json->str_value);
555  }
556  } else {
557  json_pj_recurse(json, pj, with_key);
558  }
559  char *str = pj_drain(pj);
560  return str;
561 }
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static fork const void static count static fd const char const char static newpath const char static path const char path
Definition: sflib.h:35
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len key
Definition: sflib.h:118
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
static char * parse_key(const char **key, char *p)
Definition: json_parser.c:192
static RzJson * json_new(void)
Definition: json_parser.c:24
static char * skip_block_comment(char *ps)
Definition: json_parser.c:142
#define RZ_JSON_REPORT_ERROR(msg, p)
Definition: json_parser.c:17
RZ_API const RzJson * rz_json_get_path(const RzJson *json, const char *path)
Definition: json_parser.c:419
RZ_API const RzJson * rz_json_get(const RzJson *json, const char *key)
Definition: json_parser.c:405
static void json_pj_recurse(const RzJson *json, PJ *pj, bool with_key)
Definition: json_parser.c:470
static char * parse_value(RzJson *parent, const char *key, char *p)
Definition: json_parser.c:225
RZ_API RzJson * rz_json_parse(char *text)
Definition: json_parser.c:382
static RzJson * create_json(RzJsonType type, const char *key, RzJson *parent)
Definition: json_parser.c:28
RZ_API void rz_json_free(RzJson *js)
Definition: json_parser.c:45
static char * skip_whitespace(char *p)
Definition: json_parser.c:162
static const RzJson * rz_json_get_len(const RzJson *json, const char *key, size_t keysize)
Definition: json_parser.c:395
RZ_API RZ_OWN char * rz_json_as_string(const RzJson *json, bool with_key)
Definition: json_parser.c:546
RZ_API const RzJson * rz_json_item(const RzJson *json, size_t idx)
Definition: json_parser.c:409
static char * unescape_string(char *s, char **end)
Definition: json_parser.c:61
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
int type
Definition: mipsasm.c:17
int idx
Definition: setup.py:197
static RzSocket * s
Definition: rtr.c:28
#define rz_return_if_fail(expr)
Definition: rz_assert.h:100
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API bool rz_hex_to_byte(ut8 *val, ut8 c)
Definition: hex.c:10
#define REPEAT(c, b)
@ RZ_JSON_INTEGER
Definition: rz_json.h:33
@ RZ_JSON_ARRAY
Definition: rz_json.h:31
@ RZ_JSON_DOUBLE
Definition: rz_json.h:34
@ RZ_JSON_OBJECT
Definition: rz_json.h:30
@ RZ_JSON_BOOLEAN
Definition: rz_json.h:35
@ RZ_JSON_STRING
Definition: rz_json.h:32
@ RZ_JSON_NULL
Definition: rz_json.h:29
enum rz_json_type_t RzJsonType
RZ_API PJ * pj_S(PJ *j, const char *k)
Definition: pj.c:212
RZ_API PJ * pj_ko(PJ *j, const char *k)
Definition: pj.c:156
RZ_API PJ * pj_ka(PJ *j, const char *k)
Definition: pj.c:163
RZ_API PJ * pj_new(void)
Definition: pj.c:25
RZ_API PJ * pj_kb(PJ *j, const char *k, bool v)
Definition: pj.c:177
RZ_API char * pj_drain(PJ *j)
Definition: pj.c:50
RZ_API PJ * pj_b(PJ *j, bool v)
Definition: pj.c:190
RZ_API PJ * pj_end(PJ *j)
Definition: pj.c:87
RZ_API PJ * pj_knull(PJ *j, const char *k)
Definition: pj.c:114
RZ_API PJ * pj_o(PJ *j)
Definition: pj.c:75
RZ_API PJ * pj_d(PJ *j, double d)
Definition: pj.c:276
RZ_API PJ * pj_null(PJ *j)
Definition: pj.c:184
RZ_API PJ * pj_s(PJ *j, const char *k)
Definition: pj.c:197
RZ_API PJ * pj_ks(PJ *j, const char *k, const char *v)
Definition: pj.c:170
RZ_API PJ * pj_N(PJ *j, st64 n)
Definition: pj.c:260
RZ_API PJ * pj_kd(PJ *j, const char *k, double d)
Definition: pj.c:136
RZ_API PJ * pj_a(PJ *j)
Definition: pj.c:81
RZ_API PJ * pj_kN(PJ *j, const char *k, st64 n)
Definition: pj.c:128
#define IS_WHITECHAR(x)
Definition: rz_str_util.h:5
#define RZ_OWN
Definition: rz_types.h:62
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define st64
Definition: rz_types_base.h:10
ut32 RzRune
Definition: rz_utf8.h:13
RZ_API int rz_utf8_encode(ut8 *ptr, const RzRune ch)
Definition: utf8.c:535
#define ERANGE
Definition: sftypes.h:144
#define d(i)
Definition: sha256.c:44
#define c(i)
Definition: sha256.c:43
Definition: rz_pj.h:12
struct rz_json_t::@304::@307 children
const char * str_value
Definition: rz_json.h:42
const char * key
Definition: rz_json.h:40
struct rz_json_t * next
Definition: rz_json.h:56
RzJsonType type
Definition: rz_json.h:39
struct rz_json_t::@304::@306 num
ut64(WINAPI *w32_GetEnabledXStateFeatures)()