Rizin
unix-like reverse engineering framework and cli tools
marshal.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2016-2020 c0riolis
2 // SPDX-FileCopyrightText: 2016-2020 Tardy
3 // SPDX-FileCopyrightText: 2016-2020 l0stb1t
4 // SPDX-License-Identifier: LGPL-3.0-only
5 
6 #include <rz_io.h>
7 #include <rz_bin.h>
8 #include "marshal.h"
9 #include "pyc_magic.h"
10 
11 // avoiding using rizin internals asserts
12 #define if_true_return(cond, ret) \
13  if (cond) { \
14  return (ret); \
15  }
16 
18 static pyc_object *copy_object(pyc_object *object);
19 static void free_object(pyc_object *object);
20 
21 static ut8 get_ut8(RzBuffer *buffer, bool *error) {
22  ut8 ret = 0;
23  int size = rz_buf_read(buffer, &ret, sizeof(ret));
24  if (size < sizeof(ret)) {
25  *error = true;
26  }
27  return ret;
28 }
29 
30 static ut16 get_ut16(RzBuffer *buffer, bool *error) {
31  ut16 ret = 0;
32 
33  if (!rz_buf_read_le16(buffer, &ret)) {
34  *error = true;
35  }
36  return ret;
37 }
38 
39 static ut32 get_ut32(RzBuffer *buffer, bool *error) {
40  ut32 ret = 0;
41  if (!rz_buf_read_le32(buffer, &ret)) {
42  *error = true;
43  }
44  return ret;
45 }
46 
47 static st32 get_st32(RzBuffer *buffer, bool *error) {
48  st32 ret = 0;
49  if (!rz_buf_read_le32(buffer, (ut32 *)&ret)) {
50  *error = true;
51  }
52  return ret;
53 }
54 
55 static st64 get_st64(RzBuffer *buffer, bool *error) {
56  st64 ret = 0;
57  if (!rz_buf_read_le64(buffer, (ut64 *)&ret)) {
58  *error = true;
59  }
60  return ret;
61 }
62 
63 static double get_float64(RzBuffer *buffer, bool *error) {
64  double ret = 0;
65  if (!rz_buf_read_le64(buffer, (ut64 *)&ret)) {
66  *error = true;
67  }
68  return ret;
69 }
70 
72  ut8 *ret = RZ_NEWS0(ut8, size + 1);
73  if (!ret) {
74  return NULL;
75  }
76  if (rz_buf_read(buffer, ret, size) < size) {
77  free(ret);
78  return NULL;
79  }
80  return ret;
81 }
82 
83 static pyc_object *get_none_object(void) {
84  pyc_object *ret;
85 
86  ret = RZ_NEW0(pyc_object);
87  if (!ret) {
88  return NULL;
89  }
90  ret->type = TYPE_NONE;
91  ret->data = strdup("None");
92  if (!ret->data) {
93  RZ_FREE(ret);
94  }
95  return ret;
96 }
97 
100  if (!ret) {
101  return NULL;
102  }
103  ret->type = TYPE_FALSE;
104  ret->data = strdup("False");
105  if (!ret->data) {
106  RZ_FREE(ret);
107  }
108  return ret;
109 }
110 
112  pyc_object *ret = RZ_NEW0(pyc_object);
113  if (!ret) {
114  return NULL;
115  }
116  ret->type = TYPE_TRUE;
117  ret->data = strdup("True");
118  if (!ret->data) {
119  RZ_FREE(ret);
120  }
121  return ret;
122 }
123 
125  bool error = false;
126  pyc_object *ret = NULL;
127 
128  st32 i = get_st32(buffer, &error);
129  if (error) {
130  return NULL;
131  }
132  ret = RZ_NEW0(pyc_object);
133  if (!ret) {
134  return NULL;
135  }
136  ret->type = TYPE_INT;
137  ret->data = rz_str_newf("%d", i);
138  if (!ret->data) {
139  RZ_FREE(ret);
140  }
141  return ret;
142 }
143 
145  pyc_object *ret = NULL;
146  bool error = false;
147  st64 i;
148 
149  i = get_st64(buffer, &error);
150 
151  if (error) {
152  return NULL;
153  }
154  ret = RZ_NEW0(pyc_object);
155  if (!ret) {
156  return NULL;
157  }
158  ret->type = TYPE_INT64;
159  ret->data = rz_str_newf("%lld", i);
160  if (!ret->data) {
161  RZ_FREE(ret);
162  }
163  return ret;
164 }
165 
166 /* long is used when the number is > MAX_INT64 */
168  pyc_object *ret = NULL;
169  bool error = false;
170  bool neg = false;
171  ut32 tmp = 0;
172  size_t size;
173  size_t i, j = 0, left = 0;
174  ut32 n;
175  char *hexstr;
176  char digist2hex[] = "0123456789abcdef";
177 
178  st32 ndigits = get_st32(buffer, &error);
179  if (error) {
180  return NULL;
181  }
182  ret = RZ_NEW0(pyc_object);
183  if (!ret) {
184  return NULL;
185  }
186  ret->type = TYPE_LONG;
187  if (ndigits < 0) {
188  ndigits = -ndigits;
189  neg = true;
190  }
191  if (ndigits == 0) {
192  ret->data = strdup("0x0");
193  } else {
194  // the explicit cast is safe since ndigits is positive
195  size = (size_t)ndigits * 15;
196  size = (size - 1) / 4 + 1;
197  size += 4 + (neg ? 1 : 0);
198  hexstr = malloc(size);
199  if (!hexstr) {
200  free(ret);
201  return NULL;
202  }
203  memset(hexstr, 0x20, size);
204  j = size - 1;
205  hexstr[j] = 0;
206  for (i = 0; i < ndigits; i++) {
207  n = get_ut16(buffer, &error);
208  tmp |= n << left;
209  left += 15;
210 
211  while (left >= 4) {
212  hexstr[--j] = digist2hex[tmp & 0xf];
213  tmp >>= 4;
214  left -= 4;
215  }
216  }
217 
218  if (tmp) {
219  hexstr[--j] = digist2hex[tmp & 0xf];
220  }
221 
222  hexstr[--j] = 'x';
223  hexstr[--j] = '0';
224  if (neg) {
225  hexstr[--j] = '-';
226  }
227 
228  rz_str_trim(hexstr);
229  ret->data = hexstr;
230  }
231  return ret;
232 }
233 
235  pyc_object *ret = NULL;
236  bool error = false;
237  ut32 n = 0;
238 
239  n = get_st32(buffer, &error);
240  if (n >= rz_list_length(pyc->interned_table)) {
241  RZ_LOG_ERROR("bad marshal data (string ref out of range)");
242  return NULL;
243  }
244  if (error) {
245  return NULL;
246  }
247  ret = RZ_NEW0(pyc_object);
248  if (!ret) {
249  return NULL;
250  }
251  ret->type = TYPE_STRINGREF;
252  ret->data = rz_list_get_n(pyc->interned_table, n);
253  if (!ret->data) {
254  RZ_FREE(ret);
255  }
256  return ret;
257 }
258 
260  pyc_object *ret = NULL;
261  bool error = false;
262  ut32 size = 0;
263  ut8 n = 0;
264 
265  n = get_ut8(buffer, &error);
266  if (error) {
267  return NULL;
268  }
269  ret = RZ_NEW0(pyc_object);
270  if (!ret) {
271  return NULL;
272  }
273  ut8 *s = malloc(n + 1);
274  if (!s) {
275  free(ret);
276  return NULL;
277  }
278  /* object contain string representation of the number */
279  size = rz_buf_read(buffer, s, n);
280  if (size != n) {
281  RZ_FREE(s);
282  RZ_FREE(ret);
283  return NULL;
284  }
285  s[n] = '\0';
286  ret->type = TYPE_FLOAT;
287  ret->data = s;
288  return ret;
289 }
290 
292  pyc_object *ret = NULL;
293  bool error = false;
294  double f;
295 
296  f = get_float64(buffer, &error);
297  if (error) {
298  return NULL;
299  }
300  ret = RZ_NEW0(pyc_object);
301  if (!ret) {
302  return NULL;
303  }
304  ret->type = TYPE_FLOAT;
305  ret->data = rz_str_newf("%.15g", f);
306  if (!ret->data) {
307  RZ_FREE(ret);
308  return NULL;
309  }
310  return ret;
311 }
312 
314  pyc_object *ret = NULL;
315  bool error = false;
316  ut32 size = 0;
317  ut32 n1 = 0;
318  ut32 n2 = 0;
319 
320  ret = RZ_NEW0(pyc_object);
321  if (!ret) {
322  return NULL;
323  }
324 
325  if ((pyc->magic_int & 0xffff) <= 62061) {
326  n1 = get_ut8(buffer, &error);
327  } else {
328  n1 = get_st32(buffer, &error);
329  }
330  if (error) {
331  free(ret);
332  return NULL;
333  }
334  ut8 *s1 = malloc(n1 + 1);
335  if (!s1) {
336  return NULL;
337  }
338  /* object contain string representation of the number */
339  size = rz_buf_read(buffer, s1, n1);
340  if (size != n1) {
341  RZ_FREE(s1);
342  RZ_FREE(ret);
343  return NULL;
344  }
345  s1[n1] = '\0';
346 
347  if ((pyc->magic_int & 0xffff) <= 62061) {
348  n2 = get_ut8(buffer, &error);
349  } else
350  n2 = get_st32(buffer, &error);
351  if (error) {
352  return NULL;
353  }
354  ut8 *s2 = malloc(n2 + 1);
355  if (!s2) {
356  return NULL;
357  }
358  /* object contain string representation of the number */
359  size = rz_buf_read(buffer, s2, n2);
360  if (size != n2) {
361  RZ_FREE(s1);
362  RZ_FREE(s2);
363  RZ_FREE(ret);
364  return NULL;
365  }
366  s2[n2] = '\0';
367 
368  ret->type = TYPE_COMPLEX;
369  ret->data = rz_str_newf("%s+%sj", s1, s2);
370  RZ_FREE(s1);
371  RZ_FREE(s2);
372  if (!ret->data) {
373  RZ_FREE(ret);
374  return NULL;
375  }
376  return ret;
377 }
378 
380  pyc_object *ret = NULL;
381  bool error = false;
382  double a, b;
383 
384  // a + bj
385  a = get_float64(buffer, &error);
386  b = get_float64(buffer, &error);
387  if (error) {
388  return NULL;
389  }
390  ret = RZ_NEW0(pyc_object);
391  if (!ret) {
392  return NULL;
393  }
394  ret->type = TYPE_BINARY_COMPLEX;
395  ret->data = rz_str_newf("%.15g+%.15gj", a, b);
396  if (!ret->data) {
397  RZ_FREE(ret);
398  return NULL;
399  }
400  return ret;
401 }
402 
404  pyc_object *ret = NULL;
405  bool error = false;
406  ut32 n = 0;
407 
408  n = get_ut32(buffer, &error);
409  if (n > ST32_MAX) {
410  RZ_LOG_ERROR("bad marshal data (string size out of range)");
411  return NULL;
412  }
413  if (error) {
414  return NULL;
415  }
416  ret = RZ_NEW0(pyc_object);
417  if (!ret) {
418  return NULL;
419  }
420  ret->type = TYPE_STRING;
421  ret->data = get_bytes(buffer, n);
422  if (!ret->data) {
423  RZ_FREE(ret);
424  return NULL;
425  }
426  return ret;
427 }
428 
430  pyc_object *ret = NULL;
431  bool error = false;
432  ut32 n = 0;
433 
434  n = get_ut32(buffer, &error);
435  if (n > ST32_MAX) {
436  RZ_LOG_ERROR("bad marshal data (unicode size out of range)");
437  return NULL;
438  }
439  if (error) {
440  return NULL;
441  }
442  ret = RZ_NEW0(pyc_object);
443  ret->type = TYPE_UNICODE;
444  ret->data = get_bytes(buffer, n);
445  if (!ret->data) {
446  RZ_FREE(ret);
447  return NULL;
448  }
449  return ret;
450 }
451 
453  pyc_object *ret = NULL;
454  bool error = false;
455  ut32 n = 0;
456 
457  n = get_ut32(buffer, &error);
458  if (n > ST32_MAX) {
459  RZ_LOG_ERROR("bad marshal data (string size out of range)");
460  return NULL;
461  }
462  if (error) {
463  return NULL;
464  }
465  ret = RZ_NEW0(pyc_object);
466  if (!ret) {
467  return NULL;
468  }
469  ret->type = TYPE_INTERNED;
470  ret->data = get_bytes(buffer, n);
471  /* add data pointer to interned table */
472  rz_list_append(pyc->interned_table, ret->data);
473  if (!ret->data) {
474  RZ_FREE(ret);
475  }
476  return ret;
477 }
478 
480  pyc_object *tmp = NULL;
481  pyc_object *ret = NULL;
482  ut32 i = 0;
483 
484  ret = RZ_NEW0(pyc_object);
485  if (!ret) {
486  return NULL;
487  }
489  if (!ret->data) {
490  free(ret);
491  return NULL;
492  }
493  for (i = 0; i < size; i++) {
494  tmp = get_object(pyc, buffer);
495  if (!tmp) {
496  rz_list_free(ret->data);
497  RZ_FREE(ret);
498  return NULL;
499  }
500  if (!rz_list_append(ret->data, tmp)) {
501  free_object(tmp);
502  rz_list_free(ret->data);
503  free(ret);
504  return NULL;
505  }
506  }
507  return ret;
508 }
509 
510 /* small TYPE_SMALL_TUPLE doesn't exist in python2 */
511 /* */
513  pyc_object *ret = NULL;
514  bool error = false;
515  ut8 n = 0;
516 
517  n = get_ut8(buffer, &error);
518  if (error) {
519  return NULL;
520  }
521  ret = get_array_object_generic(pyc, buffer, n);
522  if (ret) {
523  ret->type = TYPE_SMALL_TUPLE;
524  return ret;
525  }
526  return NULL;
527 }
528 
530  pyc_object *ret = NULL;
531  bool error = false;
532  ut32 n = 0;
533 
534  n = get_ut32(buffer, &error);
535  if (n > ST32_MAX) {
536  RZ_LOG_ERROR("bad marshal data (tuple size out of range)\n");
537  return NULL;
538  }
539  if (error) {
540  return NULL;
541  }
542  ret = get_array_object_generic(pyc, buffer, n);
543  if (ret) {
544  ret->type = TYPE_TUPLE;
545  return ret;
546  }
547  return NULL;
548 }
549 
551  pyc_object *ret = NULL;
552  bool error = false;
553  ut32 n = 0;
554 
555  n = get_ut32(buffer, &error);
556  if (n > ST32_MAX) {
557  RZ_LOG_ERROR("bad marshal data (list size out of range)\n");
558  return NULL;
559  }
560  if (error) {
561  return NULL;
562  }
563  ret = get_array_object_generic(pyc, buffer, n);
564  if (ret) {
565  ret->type = TYPE_LIST;
566  return ret;
567  }
568  return NULL;
569 }
570 
572  pyc_object *ret = NULL,
573  *key = NULL,
574  *val = NULL;
575 
576  ret = RZ_NEW0(pyc_object);
577  if (!ret) {
578  return NULL;
579  }
581  if (!ret->data) {
582  RZ_FREE(ret);
583  return NULL;
584  }
585  for (;;) {
586  key = get_object(pyc, buffer);
587  if (!key) {
588  break;
589  }
590  if (!rz_list_append(ret->data, key)) {
591  rz_list_free(ret->data);
592  RZ_FREE(ret);
593  free_object(key);
594  return NULL;
595  }
596  val = get_object(pyc, buffer);
597  if (!val) {
598  break;
599  }
600  if (!rz_list_append(ret->data, val)) {
601  rz_list_free(ret->data);
602  RZ_FREE(ret);
603  free_object(val);
604  return NULL;
605  }
606  }
607  ret->type = TYPE_DICT;
608  return ret;
609 }
610 
612  pyc_object *ret = NULL;
613  bool error = false;
614  ut32 n = 0;
615 
616  n = get_ut32(buffer, &error);
617  if (n > ST32_MAX) {
618  RZ_LOG_ERROR("bad marshal data (set size out of range)\n");
619  return NULL;
620  }
621  if (error) {
622  return NULL;
623  }
624  ret = get_array_object_generic(pyc, buffer, n);
625  if (!ret) {
626  return NULL;
627  }
628  ret->type = TYPE_SET;
629  return ret;
630 }
631 
633  pyc_object *ret = NULL;
634 
635  ret = RZ_NEW0(pyc_object);
636  if (!ret) {
637  return NULL;
638  }
639  ret->type = TYPE_ASCII;
640  ret->data = get_bytes(buffer, size);
641  if (!ret->data) {
642  RZ_FREE(ret);
643  }
644  return ret;
645 }
646 
648  bool error = false;
649  ut32 n = 0;
650 
651  n = get_ut32(buffer, &error);
652  if (error) {
653  return NULL;
654  }
655  return get_ascii_object_generic(buffer, n, true);
656 }
657 
659  bool error = false;
660  ut32 n;
661 
662  n = get_ut32(buffer, &error);
663  if (error) {
664  return NULL;
665  }
666  return get_ascii_object_generic(buffer, n, true);
667 }
668 
670  bool error = false;
671  ut8 n;
672 
673  n = get_ut8(buffer, &error);
674  if (error) {
675  return NULL;
676  }
677  return get_ascii_object_generic(buffer, n, false);
678 }
679 
681  bool error = false;
682  ut8 n;
683 
684  n = get_ut8(buffer, &error);
685  if (error) {
686  return NULL;
687  }
688  return get_ascii_object_generic(buffer, n, true);
689 }
690 
692  bool error = false;
693  pyc_object *ret;
694  pyc_object *obj;
695  ut32 index;
696 
697  index = get_ut32(buffer, &error);
698  if (error) {
699  return NULL;
700  }
701  if (index >= rz_list_length(pyc->refs)) {
702  return NULL;
703  }
704  obj = rz_list_get_n(pyc->refs, index);
705  if (!obj) {
706  return NULL;
707  }
708  ret = copy_object(obj);
709  return ret;
710 }
711 
712 static void free_object(pyc_object *object) {
713  if (!object) {
714  return;
715  }
716  switch (object->type) {
717  case TYPE_SMALL_TUPLE:
718  case TYPE_TUPLE:
719  rz_list_free(object->data);
720  break;
721  case TYPE_STRING:
722  case TYPE_TRUE:
723  case TYPE_FALSE:
724  case TYPE_INT:
725  case TYPE_NONE:
726  case TYPE_NULL:
727  case TYPE_ASCII_INTERNED:
728  case TYPE_SHORT_ASCII:
729  case TYPE_ASCII:
731  free(object->data);
732  break;
733  case TYPE_CODE_v0:
734  case TYPE_CODE_v1: {
735  pyc_code_object *cobj = object->data;
736  free_object(cobj->code);
737  free_object(cobj->consts);
738  free_object(cobj->names);
739  free_object(cobj->varnames);
740  free_object(cobj->freevars);
741  free_object(cobj->cellvars);
742  free_object(cobj->filename);
743  free_object(cobj->name);
744  free_object(cobj->lnotab);
745  free(object->data);
746  } break;
747  case TYPE_REF:
748  free_object(object->data);
749  break;
750  case TYPE_SET:
751  case TYPE_FROZENSET:
752  case TYPE_ELLIPSIS:
753  case TYPE_STOPITER:
754  case TYPE_BINARY_COMPLEX:
755  case TYPE_BINARY_FLOAT:
756  case TYPE_COMPLEX:
757  case TYPE_STRINGREF:
758  case TYPE_DICT:
759  case TYPE_FLOAT:
760  case TYPE_INT64:
761  case TYPE_INTERNED:
762  case TYPE_LIST:
763  case TYPE_LONG:
764  case TYPE_UNICODE:
765  case TYPE_UNKNOWN:
766  RZ_LOG_ERROR("Free not implemented for type %x\n", object->type);
767  break;
768  default:
769  RZ_LOG_ERROR("Undefined type in free_object (%x)\n", object->type);
770  break;
771  }
772  free(object);
773 }
774 
776  pyc_object *copy = RZ_NEW0(pyc_object);
777  if (!copy || !object) {
778  free(copy);
779  return NULL;
780  }
781  copy->type = object->type;
782  switch (object->type) {
783  case TYPE_NULL:
784  break;
785  case TYPE_TUPLE:
786  case TYPE_SMALL_TUPLE:
787  copy->data = rz_list_clone(object->data);
788  break;
789  case TYPE_INT:
790  case TYPE_INT64:
791  case TYPE_NONE:
792  case TYPE_TRUE:
793  case TYPE_FALSE:
794  case TYPE_STRING:
795  case TYPE_ASCII:
796  case TYPE_SHORT_ASCII:
797  case TYPE_ASCII_INTERNED:
799  copy->data = strdup(object->data);
800  break;
801  case TYPE_CODE_v0:
802  case TYPE_CODE_v1: {
803  pyc_code_object *src = object->data;
805  if (!dst) {
806  break;
807  }
808  memcpy(dst, src, sizeof(*dst));
809  dst->code = copy_object(src->code);
810  dst->consts = copy_object(src->consts);
811  dst->names = copy_object(src->names);
812  dst->varnames = copy_object(src->varnames);
813  dst->freevars = copy_object(src->freevars);
814  dst->cellvars = copy_object(src->cellvars);
815  dst->filename = copy_object(src->filename);
816  dst->name = copy_object(src->name);
817  dst->lnotab = copy_object(src->lnotab);
818  copy->data = dst;
819  } break;
820  case TYPE_REF:
821  copy->data = copy_object(object->data);
822  break;
823  case TYPE_ELLIPSIS:
824  case TYPE_STOPITER:
825  case TYPE_BINARY_COMPLEX:
826  case TYPE_BINARY_FLOAT:
827  case TYPE_COMPLEX:
828  case TYPE_STRINGREF:
829  case TYPE_DICT:
830  case TYPE_FLOAT:
831  case TYPE_FROZENSET:
832  case TYPE_INTERNED:
833  case TYPE_LIST:
834  case TYPE_LONG:
835  case TYPE_SET:
836  case TYPE_UNICODE:
837  case TYPE_UNKNOWN:
838  RZ_LOG_ERROR("Copy not implemented for type %x\n", object->type);
839  break;
840  default:
841  RZ_LOG_ERROR("Undefined type in copy_object (%x)\n", object->type);
842  break;
843  }
844  if (!copy->data) {
845  RZ_FREE(copy);
846  }
847  return copy;
848 }
849 
851  bool error = false;
852 
853  pyc_object *ret = RZ_NEW0(pyc_object);
855  if (!ret || !cobj) {
856  free(ret);
857  free(cobj);
858  return NULL;
859  }
860 
861  // ret->type = TYPE_CODE_v1;
862  // support start from v1.0
863  ret->data = cobj;
864 
865  bool v10_to_12 = magic_int_within(pyc->magic_int, 39170, 16679, &error); // 1.0.1 - 1.2
866  bool v13_to_22 = magic_int_within(pyc->magic_int, 11913, 60718, &error); // 1.3b1 - 2.2a1
867  bool v11_to_14 = magic_int_within(pyc->magic_int, 39170, 20117, &error); // 1.0.1 - 1.4
868  bool v15_to_22 = magic_int_within(pyc->magic_int, 20121, 60718, &error); // 1.5a1 - 2.2a1
869  bool v13_to_20 = magic_int_within(pyc->magic_int, 11913, 50824, &error); // 1.3b1 - 2.0b1
870  // bool v21_to_27 = (!v13_to_20) && magic_int_within (magic_int, 60124, 62212, &error);
871  bool has_posonlyargcount = magic_int_within(pyc->magic_int, 3410, 3424, &error); // v3.8.0a4 - latest
872  if (error) {
873  free(ret);
874  free(cobj);
875  return NULL;
876  }
877 
878  if (v13_to_22) {
879  cobj->argcount = get_ut16(buffer, &error);
880  } else if (v10_to_12) {
881  cobj->argcount = 0;
882  } else {
883  cobj->argcount = get_ut32(buffer, &error);
884  }
885 
886  if (has_posonlyargcount) {
887  cobj->posonlyargcount = get_ut32(buffer, &error); // Included in argcount
888  } else {
889  cobj->posonlyargcount = 0; // None
890  }
891 
892  if (((3020 < (pyc->magic_int & 0xffff)) && ((pyc->magic_int & 0xffff) < 20121)) && (!v11_to_14)) {
893  cobj->kwonlyargcount = get_ut32(buffer, &error); // Not included in argcount
894  } else {
895  cobj->kwonlyargcount = 0;
896  }
897 
898  if (v13_to_22) {
899  cobj->nlocals = get_ut16(buffer, &error);
900  } else if (v10_to_12) {
901  cobj->nlocals = 0;
902  } else {
903  cobj->nlocals = get_ut32(buffer, &error);
904  }
905 
906  if (v15_to_22) {
907  cobj->stacksize = get_ut16(buffer, &error);
908  } else if (v11_to_14 || v10_to_12) {
909  cobj->stacksize = 0;
910  } else {
911  cobj->stacksize = get_ut32(buffer, &error);
912  }
913 
914  if (v13_to_22) {
915  cobj->flags = get_ut16(buffer, &error);
916  } else if (v10_to_12) {
917  cobj->flags = 0;
918  } else {
919  cobj->flags = get_ut32(buffer, &error);
920  }
921 
922  // to help disassemble the code
923  cobj->start_offset = rz_buf_tell(buffer) + 5; // 1 from get_object() and 4 from get_string_object()
924  if (!pyc->refs) {
925  return ret; // return for entried part to get the root object of this file
926  }
927  cobj->code = get_object(pyc, buffer);
928  cobj->end_offset = rz_buf_tell(buffer);
929 
930  cobj->consts = get_object(pyc, buffer);
931  cobj->names = get_object(pyc, buffer);
932 
933  if (v10_to_12) {
934  cobj->varnames = NULL;
935  } else {
936  cobj->varnames = get_object(pyc, buffer);
937  }
938 
939  if (!(v10_to_12 || v13_to_20)) {
940  cobj->freevars = get_object(pyc, buffer);
941  cobj->cellvars = get_object(pyc, buffer);
942  } else {
943  cobj->freevars = NULL;
944  cobj->cellvars = NULL;
945  }
946 
947  cobj->filename = get_object(pyc, buffer);
948  cobj->name = get_object(pyc, buffer);
949 
950  if (v15_to_22) {
951  cobj->firstlineno = get_ut16(buffer, &error);
952  } else if (v11_to_14) {
953  cobj->firstlineno = 0;
954  } else {
955  cobj->firstlineno = get_ut32(buffer, &error);
956  }
957 
958  if (v11_to_14) {
959  cobj->lnotab = NULL;
960  } else {
961  cobj->lnotab = get_object(pyc, buffer);
962  }
963 
964  if (error) {
965  free_object(cobj->code);
966  free_object(cobj->consts);
967  free_object(cobj->names);
968  free_object(cobj->varnames);
969  free_object(cobj->freevars);
970  free_object(cobj->cellvars);
971  free_object(cobj->filename);
972  free_object(cobj->name);
973  free_object(cobj->lnotab);
974  free(cobj);
975  RZ_FREE(ret);
976  return NULL;
977  }
978  return ret;
979 }
980 
982  pyc->magic_int = magic;
983  pyc_object *co = get_code_object(pyc, buffer);
984  ut64 result = 0;
985  if (!co) {
986  return 0;
987  }
988 
989  pyc_code_object *cobj = co->data;
990  result = cobj->start_offset;
991  free_object(co);
992 
993  return result;
994 }
995 
997  bool error = false;
998  pyc_object *ret = NULL;
999  ut8 code = get_ut8(buffer, &error);
1000  ut8 flag = code & FLAG_REF;
1001  RzListIter *ref_idx = NULL;
1002  ut8 type = code & ~FLAG_REF;
1003 
1004  if (error) {
1005  return NULL;
1006  }
1007 
1008  if (flag) {
1009  ret = get_none_object();
1010  if (!ret) {
1011  return NULL;
1012  }
1013  ref_idx = rz_list_append(pyc->refs, ret);
1014  if (!ref_idx) {
1015  free_object(ret);
1016  return NULL;
1017  }
1018  }
1019 
1020  switch (type) {
1021  case TYPE_NULL:
1022  return NULL;
1023  case TYPE_TRUE:
1024  return get_true_object();
1025  case TYPE_FALSE:
1026  return get_false_object();
1027  case TYPE_NONE:
1028  return get_none_object();
1029  case TYPE_REF:
1030  return get_ref_object(pyc, buffer);
1031  case TYPE_SMALL_TUPLE:
1032  ret = get_small_tuple_object(pyc, buffer);
1033  break;
1034  case TYPE_TUPLE:
1035  ret = get_tuple_object(pyc, buffer);
1036  break;
1037  case TYPE_STRING:
1038  ret = get_string_object(buffer);
1039  break;
1040  case TYPE_CODE_v0:
1041  ret = get_code_object(pyc, buffer);
1042  if (ret) {
1043  ret->type = TYPE_CODE_v0;
1044  }
1045  break;
1046  case TYPE_CODE_v1:
1047  ret = get_code_object(pyc, buffer);
1048  if (ret) {
1049  ret->type = TYPE_CODE_v1;
1050  }
1051  break;
1052  case TYPE_INT:
1053  ret = get_int_object(buffer);
1054  break;
1055  case TYPE_ASCII_INTERNED:
1057  break;
1058  case TYPE_SHORT_ASCII:
1060  break;
1061  case TYPE_ASCII:
1062  ret = get_ascii_object(buffer);
1063  break;
1066  break;
1067  case TYPE_INT64:
1068  ret = get_int64_object(buffer);
1069  break;
1070  case TYPE_INTERNED:
1071  ret = get_interned_object(pyc, buffer);
1072  break;
1073  case TYPE_STRINGREF:
1074  ret = get_stringref_object(pyc, buffer);
1075  break;
1076  case TYPE_FLOAT:
1077  ret = get_float_object(buffer);
1078  break;
1079  case TYPE_BINARY_FLOAT:
1081  break;
1082  case TYPE_COMPLEX:
1083  ret = get_complex_object(pyc, buffer); // behaviour depends on Python version
1084  break;
1085  case TYPE_BINARY_COMPLEX:
1087  break;
1088  case TYPE_LIST:
1089  ret = get_list_object(pyc, buffer);
1090  break;
1091  case TYPE_LONG:
1092  ret = get_long_object(buffer);
1093  break;
1094  case TYPE_UNICODE:
1095  ret = get_unicode_object(buffer);
1096  break;
1097  case TYPE_DICT:
1098  ret = get_dict_object(pyc, buffer);
1099  break;
1100  case TYPE_FROZENSET:
1101  case TYPE_SET:
1102  ret = get_set_object(pyc, buffer);
1103  break;
1104  case TYPE_STOPITER:
1105  ret = RZ_NEW0(pyc_object);
1106  break;
1107  case TYPE_ELLIPSIS:
1108  ret = RZ_NEW0(pyc_object);
1109  break;
1110  case TYPE_UNKNOWN:
1111  RZ_LOG_ERROR("Get not implemented for type 0x%x\n", type);
1112  return NULL;
1113  default:
1114  RZ_LOG_ERROR("Undefined type in get_object (0x%x)\n", type);
1115  return NULL;
1116  }
1117 
1118  if (flag && ref_idx) {
1119  free_object(ref_idx->data);
1120  ref_idx->data = copy_object(ret);
1121  }
1122  return ret;
1123 }
1124 
1126  pyc_code_object *cobj = NULL;
1128  RzBinSymbol *symbol = NULL;
1129  RzListIter *i = NULL;
1130 
1131  // each code object is a section
1132  if_true_return(!obj || (obj->type != TYPE_CODE_v1 && obj->type != TYPE_CODE_v0), false);
1133 
1134  cobj = obj->data;
1135 
1136  if_true_return(!cobj || !cobj->name, false);
1137  if_true_return(cobj->name->type != TYPE_ASCII && cobj->name->type != TYPE_STRING && cobj->name->type != TYPE_INTERNED, false);
1138  if_true_return(!cobj->name->data, false);
1139  if_true_return(!cobj->consts, false);
1140 
1141  // add the cobj to objs list
1142  if (!rz_list_append(cobjs, cobj)) {
1143  goto fail;
1144  }
1146  symbol = RZ_NEW0(RzBinSymbol);
1147  prefix = rz_str_newf("%s%s%s", prefix ? prefix : "",
1148  prefix ? "." : "", (const char *)cobj->name->data);
1149  if (!prefix || !section || !symbol) {
1150  goto fail;
1151  }
1152  section->name = strdup(prefix);
1153  if (!section->name) {
1154  goto fail;
1155  }
1156  section->paddr = cobj->start_offset;
1157  section->vaddr = cobj->start_offset;
1158  section->size = cobj->end_offset - cobj->start_offset;
1159  section->vsize = cobj->end_offset - cobj->start_offset;
1160  if (!rz_list_append(sections, section)) {
1161  goto fail;
1162  }
1163  section = NULL;
1164  // start building symbol
1165  symbol->name = strdup(prefix);
1166  // symbol->bind;
1167  symbol->type = RZ_BIN_TYPE_FUNC_STR;
1168  symbol->size = cobj->end_offset - cobj->start_offset;
1169  symbol->vaddr = cobj->start_offset;
1170  symbol->paddr = cobj->start_offset;
1171  symbol->ordinal = pyc->symbols_ordinal++;
1172  if (cobj->consts->type != TYPE_TUPLE && cobj->consts->type != TYPE_SMALL_TUPLE) {
1173  goto fail;
1174  }
1175  if (!rz_list_append(symbols, symbol)) {
1176  goto fail;
1177  }
1178  rz_list_foreach (((RzList *)(cobj->consts->data)), i, obj)
1179  extract_sections_symbols(pyc, obj, sections, symbols, cobjs, prefix);
1180  free(prefix);
1181  return true;
1182 fail:
1183 
1184  free(section);
1185  free(prefix);
1186  free(symbol);
1187  return false;
1188 }
1189 
1191  bool ret;
1192  pyc->magic_int = magic;
1194  if (!pyc->refs) {
1195  return false;
1196  }
1197  ret = extract_sections_symbols(pyc, get_object(pyc, buffer), sections, symbols, cobjs, NULL);
1198  rz_list_free(pyc->refs);
1199  return ret;
1200 }
lzma_index ** i
Definition: index.h:629
lzma_index * src
Definition: index.h:567
static ut32 neg(ArmOp *op)
Definition: armass64.c:981
ut16 val
Definition: armass64_const.h:6
RzList * symbols(RzBinFile *bf)
Definition: bin_ne.c:102
RzList * sections(RzBinFile *bf)
Definition: bin_ne.c:110
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len key
Definition: sflib.h:118
uint16_t ut16
uint32_t ut32
unsigned short prefix[65536]
Definition: gun.c:163
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void uLong size
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
return memset(p, 0, total)
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
RZ_API RZ_OWN RzList * rz_list_newf(RzListFree f)
Returns a new initialized RzList pointer and sets the free method.
Definition: list.c:248
RZ_API RZ_OWN RzList * rz_list_clone(RZ_NONNULL const RzList *list)
Shallow copies of the list (but doesn't free its elements)
Definition: list.c:496
RZ_API RZ_BORROW void * rz_list_get_n(RZ_NONNULL const RzList *list, ut32 n)
Returns the N-th element of the list.
Definition: list.c:574
RZ_API ut32 rz_list_length(RZ_NONNULL const RzList *list)
Returns the length of the list.
Definition: list.c:109
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137
void * malloc(size_t size)
Definition: malloc.c:123
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
char * dst
Definition: lz4.h:724
static pyc_object * get_ascii_interned_object(RzBuffer *buffer)
Definition: marshal.c:658
static pyc_object * get_float_object(RzBuffer *buffer)
Definition: marshal.c:259
static pyc_object * get_int64_object(RzBuffer *buffer)
Definition: marshal.c:144
static ut8 get_ut8(RzBuffer *buffer, bool *error)
Definition: marshal.c:21
static double get_float64(RzBuffer *buffer, bool *error)
Definition: marshal.c:63
static st64 get_st64(RzBuffer *buffer, bool *error)
Definition: marshal.c:55
ut64 get_code_object_addr(RzBinPycObj *pyc, RzBuffer *buffer, ut32 magic)
Definition: marshal.c:981
static void free_object(pyc_object *object)
Definition: marshal.c:712
static pyc_object * get_unicode_object(RzBuffer *buffer)
Definition: marshal.c:429
static pyc_object * get_ascii_object(RzBuffer *buffer)
Definition: marshal.c:647
static pyc_object * get_list_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:550
static ut16 get_ut16(RzBuffer *buffer, bool *error)
Definition: marshal.c:30
static st32 get_st32(RzBuffer *buffer, bool *error)
Definition: marshal.c:47
static pyc_object * get_long_object(RzBuffer *buffer)
Definition: marshal.c:167
static pyc_object * get_small_tuple_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:512
static pyc_object * get_stringref_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:234
static pyc_object * get_string_object(RzBuffer *buffer)
Definition: marshal.c:403
static pyc_object * get_short_ascii_interned_object(RzBuffer *buffer)
Definition: marshal.c:680
static pyc_object * get_none_object(void)
Definition: marshal.c:83
static pyc_object * get_ref_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:691
static ut8 * get_bytes(RzBuffer *buffer, ut32 size)
Definition: marshal.c:71
static pyc_object * get_false_object(void)
Definition: marshal.c:98
static pyc_object * get_binary_float_object(RzBuffer *buffer)
Definition: marshal.c:291
static pyc_object * get_complex_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:313
static bool extract_sections_symbols(RzBinPycObj *pyc, pyc_object *obj, RzList *sections, RzList *symbols, RzList *cobjs, char *prefix)
Definition: marshal.c:1125
static pyc_object * get_set_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:611
static pyc_object * get_int_object(RzBuffer *buffer)
Definition: marshal.c:124
static pyc_object * get_dict_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:571
static pyc_object * get_true_object(void)
Definition: marshal.c:111
#define if_true_return(cond, ret)
Definition: marshal.c:12
static pyc_object * get_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:996
static pyc_object * get_code_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:850
static pyc_object * get_binary_complex_object(RzBuffer *buffer)
Definition: marshal.c:379
static pyc_object * get_interned_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:452
bool get_sections_symbols_from_code_objects(RzBinPycObj *pyc, RzBuffer *buffer, RzList *sections, RzList *symbols, RzList *cobjs, ut32 magic)
Definition: marshal.c:1190
static pyc_object * get_array_object_generic(RzBinPycObj *pyc, RzBuffer *buffer, ut32 size)
Definition: marshal.c:479
static pyc_object * get_tuple_object(RzBinPycObj *pyc, RzBuffer *buffer)
Definition: marshal.c:529
static pyc_object * copy_object(pyc_object *object)
Definition: marshal.c:775
static pyc_object * get_short_ascii_object(RzBuffer *buffer)
Definition: marshal.c:669
static ut32 get_ut32(RzBuffer *buffer, bool *error)
Definition: marshal.c:39
static pyc_object * get_ascii_object_generic(RzBuffer *buffer, ut32 size, bool interned)
Definition: marshal.c:632
@ FLAG_REF
Definition: marshal.h:47
@ TYPE_NONE
Definition: marshal.h:30
@ TYPE_STRINGREF
Definition: marshal.h:38
@ TYPE_INT64
Definition: marshal.h:25
@ TYPE_SHORT_ASCII
Definition: marshal.h:35
@ TYPE_NULL
Definition: marshal.h:31
@ TYPE_BINARY_FLOAT
Definition: marshal.h:16
@ TYPE_FLOAT
Definition: marshal.h:23
@ TYPE_LIST
Definition: marshal.h:28
@ TYPE_BINARY_COMPLEX
Definition: marshal.h:15
@ TYPE_TUPLE
Definition: marshal.h:41
@ TYPE_TRUE
Definition: marshal.h:40
@ TYPE_LONG
Definition: marshal.h:29
@ TYPE_INT
Definition: marshal.h:27
@ TYPE_UNKNOWN
Definition: marshal.h:43
@ TYPE_DICT
Definition: marshal.h:20
@ TYPE_UNICODE
Definition: marshal.h:42
@ TYPE_SMALL_TUPLE
Definition: marshal.h:36
@ TYPE_INTERNED
Definition: marshal.h:26
@ TYPE_ELLIPSIS
Definition: marshal.h:21
@ TYPE_COMPLEX
Definition: marshal.h:19
@ TYPE_CODE_v1
Definition: marshal.h:18
@ TYPE_ASCII_INTERNED
Definition: marshal.h:14
@ TYPE_REF
Definition: marshal.h:32
@ TYPE_FALSE
Definition: marshal.h:22
@ TYPE_SET
Definition: marshal.h:33
@ TYPE_CODE_v0
Definition: marshal.h:17
@ TYPE_STRING
Definition: marshal.h:39
@ TYPE_FROZENSET
Definition: marshal.h:24
@ TYPE_ASCII
Definition: marshal.h:13
@ TYPE_STOPITER
Definition: marshal.h:37
@ TYPE_SHORT_ASCII_INTERNED
Definition: marshal.h:34
int n
Definition: mipsasm.c:19
int type
Definition: mipsasm.c:17
bool magic_int_within(ut32 target_magic, ut32 lower, ut32 upper, bool *error)
Definition: pyc_magic.c:283
static RzSocket * s
Definition: rtr.c:28
#define RZ_BIN_TYPE_FUNC_STR
Definition: rz_bin.h:119
RZ_API ut64 rz_buf_tell(RZ_NONNULL RzBuffer *b)
Return the current cursor position.
Definition: buf.c:1238
#define rz_buf_read_le16(b, result)
Read a big endian or little endian (ut16, ut32, ut64) at the specified offset in the buffer and shift...
Definition: rz_buf.h:266
#define rz_buf_read_le32(b, result)
Definition: rz_buf.h:267
#define rz_buf_read_le64(b, result)
Definition: rz_buf.h:268
RZ_API st64 rz_buf_read(RZ_NONNULL RzBuffer *b, RZ_NONNULL RZ_OUT ut8 *buf, ut64 len)
void(* RzListFree)(void *ptr)
Definition: rz_list.h:11
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API char * rz_str_newf(const char *fmt,...) RZ_PRINTF_CHECK(1
RZ_API void rz_str_trim(RZ_NONNULL RZ_INOUT char *str)
Removes whitespace characters (space, tab, newline etc.) from the beginning and end of a string.
Definition: str_trim.c:190
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_NEWS0(x, y)
Definition: rz_types.h:282
#define RZ_FREE(x)
Definition: rz_types.h:369
#define st64
Definition: rz_types_base.h:10
#define ST32_MAX
Definition: rz_types_base.h:97
#define st32
Definition: rz_types_base.h:12
int size_t
Definition: sftypes.h:40
#define b(i)
Definition: sha256.c:42
#define f(i)
Definition: sha256.c:46
#define a(i)
Definition: sha256.c:41
#define s1(x)
Definition: sha256.c:60
Definition: buffer.h:15
Definition: inftree9.h:24
ut32 kwonlyargcount
Definition: marshal.h:58
pyc_object * names
Definition: marshal.h:64
ut32 stacksize
Definition: marshal.h:60
ut32 posonlyargcount
Definition: marshal.h:57
st64 end_offset
Definition: marshal.h:73
ut32 firstlineno
Definition: marshal.h:70
pyc_object * varnames
Definition: marshal.h:65
pyc_object * freevars
Definition: marshal.h:66
pyc_object * code
Definition: marshal.h:62
ut32 nlocals
Definition: marshal.h:59
st64 start_offset
Definition: marshal.h:72
pyc_object * filename
Definition: marshal.h:68
pyc_object * cellvars
Definition: marshal.h:67
pyc_object * lnotab
Definition: marshal.h:71
pyc_object * name
Definition: marshal.h:69
pyc_object * consts
Definition: marshal.h:63
ut32 argcount
Definition: marshal.h:56
ut32 symbols_ordinal
Definition: marshal.h:85
RzList * refs
Definition: marshal.h:83
RzList * interned_table
Definition: marshal.h:80
ut32 magic_int
Definition: marshal.h:84
void * data
Definition: marshal.h:52
pyc_marshal_type type
Definition: marshal.h:51
const char * type
Definition: rz_bin.h:682
char * name
Definition: rz_bin.h:675
ut32 ordinal
Definition: rz_bin.h:692
void * data
Definition: rz_list.h:14
uint32_t size
#define fail(test)
Definition: tests.h:29
void error(const char *msg)
Definition: untgz.c:593
ut64(WINAPI *w32_GetEnabledXStateFeatures)()