Rizin
unix-like reverse engineering framework and cli tools
microsoft_demangle.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2015-2016 inisider <inisider@gmail.com>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include "microsoft_demangle.h"
5 #include <ctype.h>
6 
7 #define MICROSOFT_NAME_LEN (256)
8 #define MICROSOFR_CLASS_NAMESPACE_LEN (256)
9 #define IMPOSSIBLE_LEN (MICROSOFT_NAME_LEN + MICROSOFR_CLASS_NAMESPACE_LEN)
10 
11 // TODO: it will be good to change this to some kind of map data structure
14 
15 typedef enum EObjectType {
18  eObjectTypeMax = 99
20 
22 // State machine for parsing type codes data types
24 typedef enum ETCStateMachineErr {
31 
32 typedef enum ETCState { // TC - type code
63 
64 typedef struct STypeCodeStr {
65  char *type_str;
67  size_t type_str_len;
68  size_t curr_pos;
70 
71 struct SStateInfo;
72 typedef void (*state_func)(struct SStateInfo *, STypeCodeStr *type_code_str);
73 
74 typedef struct SStateInfo {
76  const char *buff_for_parsing;
80 
81 typedef struct SStrInfo {
82  char *str_ptr;
83  size_t len;
85 
86 typedef struct SDataType {
87  char *left;
88  char *right;
90 
91 static inline void sdatatype_fini(SDataType *data_type) {
92  if (!data_type) {
93  return;
94  }
95  RZ_FREE(data_type->left);
96  RZ_FREE(data_type->right);
97 }
98 
99 static void sstrinfo_free(SStrInfo *sstrinfo) {
100  if (!sstrinfo) {
101  return;
102  }
103  free(sstrinfo->str_ptr);
104  free(sstrinfo);
105 }
106 
107 #define DECL_STATE_ACTION(action) static void tc_state_##action(SStateInfo *state, STypeCodeStr *type_code_str);
136 #undef DECL_STATE_ACTION
137 
138 #define NAME(action) tc_state_##action
140  NAME(start),
141  NAME(end),
142  NAME(A),
143  NAME(B),
144  NAME(C),
145  NAME(D),
146  NAME(E),
147  NAME(F),
148  NAME(G),
149  NAME(H),
150  NAME(I),
151  NAME(J),
152  NAME(K),
153  NAME(M),
154  NAME(N),
155  NAME(O),
156  NAME(P),
157  NAME(Q),
158  NAME(R),
159  NAME(S),
160  NAME(T),
161  NAME(U),
162  NAME(V),
163  NAME(W),
164  NAME(X),
165  NAME(Z),
166  NAME(_),
167  NAME($),
168 };
169 #undef NAME
171 // End of data types for state machine which parse type codes
173 
175 // State machine for parsing type codes functions
177 
178 static void init_state_struct(SStateInfo *state, const char *buff_for_parsing);
179 static EDemanglerErr get_type_code_string(const char *sym, size_t *amount_of_read_chars, char **str_type_code);
180 static bool init_type_code_str_struct(STypeCodeStr *type_code_str);
181 static void free_type_code_str_struct(STypeCodeStr *type_code_str);
182 static char *type_code_str_get(STypeCodeStr *type_code_str);
183 static size_t get_template(const char *buf, SStrInfo *str_info, bool memorize);
184 static char *get_num(SStateInfo *state);
185 static EDemanglerErr parse_data_type(const char *sym, SDataType *demangled_type, size_t *len);
186 static size_t get_namespace_and_name(const char *buf, STypeCodeStr *type_code_str, size_t *amount_of_names, bool memorize);
187 static inline EDemanglerErr get_storage_class(const char encoded, const char **storage_class);
188 static inline size_t get_ptr_modifier(const char *encoded, SDataType *ptr_modifier);
189 static EDemanglerErr parse_function(const char *sym, STypeCodeStr *type_code_str, char **demangled_function, size_t *chars_read);
190 static EDemanglerErr parse_microsoft_mangled_name(const char *sym, char **demangled_name, size_t *chars_read);
191 static EDemanglerErr parse_microsoft_rtti_mangled_name(const char *sym, char **demangled_name, size_t *chars_read);
192 
193 static void run_state(SStateInfo *state_info, STypeCodeStr *type_code_str) {
194  state_table[state_info->state](state_info, type_code_str);
195 }
196 
197 #define copy_string(type_code_str, str_for_copy) copy_string_n(type_code_str, str_for_copy, str_for_copy ? strlen(str_for_copy) : 0)
198 
199 static bool copy_string_n(STypeCodeStr *type_code_str, const char *str_for_copy, size_t copy_len) {
200  if (!copy_len) {
201  return true;
202  }
203  size_t free_space = type_code_str->type_str_len - type_code_str->curr_pos - 1;
204 
205  if (free_space < copy_len) {
206  size_t newlen = type_code_str->type_str_len + copy_len;
207  if (newlen < type_code_str->type_str_len) {
208  return false;
209  }
210  newlen *= 2;
211  if (newlen < type_code_str->type_str_len) {
212  return false;
213  }
214  type_code_str->type_str_len = newlen;
215  char *type_str;
216  if (type_code_str->type_str != type_code_str->type_str_buf) {
217  type_str = realloc(type_code_str->type_str, newlen);
218  } else {
219  type_str = malloc(newlen);
220  if (!type_str) {
221  return false;
222  }
223  memcpy(type_str, type_code_str->type_str, type_code_str->curr_pos);
224  }
225  if (!type_str) {
226  return false;
227  }
228  type_code_str->type_str = type_str;
229  if (!type_code_str->type_str) {
230  return false;
231  }
232  }
233 
234  char *dst = type_code_str->type_str + type_code_str->curr_pos;
235  if (str_for_copy) {
236  memcpy(dst, str_for_copy, copy_len);
237  } else {
238  memset(dst, 0, copy_len);
239  }
240  type_code_str->curr_pos += copy_len;
241  if (type_code_str->type_str) {
242  type_code_str->type_str[type_code_str->curr_pos] = '\0';
243  }
244  return true;
245 }
246 
247 static int get_template_params(const char *sym, size_t *amount_of_read_chars, char **str_type_code) {
249  init_state_struct(&state, sym);
250  const char template_param[] = "template-parameter-";
251  char *tmp, *res = NULL;
252  const char *const start_sym = sym;
253  if (!strncmp(sym, "?", 1)) {
254  // anonymous template param
255  state.amount_of_read_chars += 1;
256  state.buff_for_parsing += 1;
257  res = get_num(&state);
258  if (res) {
259  tmp = dem_str_newf("%s%s", template_param, res);
260  free(res);
261  res = tmp;
262  }
263  } else {
264  if (strncmp(sym, "$", 1)) {
266  }
267  sym++;
268  state.amount_of_read_chars += 2;
269  state.buff_for_parsing += 2;
270  char *a, *b, *c;
271  switch (*sym) {
272  case '0':
273  // Signed integer
274  a = get_num(&state);
275  if (a) {
276  int signed_a = atoi(a);
277  res = dem_str_newf("%d", signed_a);
278  free(a);
279  }
280  break;
281  case '1': {
284  if (!*(++sym) || !(*sym == '?')) {
286  }
289  }
290  size_t ret = get_namespace_and_name(++sym, &str, NULL, true);
291  if (!ret) {
294  }
295  sym += ret;
296  if (!*sym) {
299  }
300  SDataType data_type = { 0 };
301  if (isdigit((int)*++sym)) {
302  err = parse_data_type(sym, &data_type, &ret);
303  *str_type_code = dem_str_newf("&%s %s%s", data_type.left, str.type_str, data_type.right);
304  sdatatype_fini(&data_type);
305  } else {
306  char *tmp = NULL;
307  err = parse_function(sym, &str, &tmp, &ret);
308  *str_type_code = dem_str_newf("&%s", tmp);
309  free(tmp);
310  }
311  sym += ret;
312  *amount_of_read_chars = sym - start_sym;
314  return err;
315  }
316  case '2':
317  // real value a ^ b
318  a = get_num(&state);
319  b = get_num(&state);
320  if (a && b) {
321  int signed_b = atoi(b);
322  res = dem_str_newf("%sE%d", a, signed_b);
323  }
324  free(a);
325  free(b);
326  break;
327  case 'D':
328  // anonymous template param
329  res = get_num(&state);
330  if (res) {
331  tmp = dem_str_newf("%s%s", template_param, res);
332  free(res);
333  res = tmp;
334  }
335  break;
336  case 'F':
337  // Signed {a, b}
338  a = get_num(&state);
339  b = get_num(&state);
340  if (a && b) {
341  int signed_a = atoi(a);
342  int signed_b = atoi(b);
343  res = dem_str_newf("{%d, %d}", signed_a, signed_b);
344  }
345  free(a);
346  free(b);
347  break;
348  case 'G':
349  // Signed {a, b, c}
350  a = get_num(&state);
351  b = get_num(&state);
352  c = get_num(&state);
353  if (a && b && c) {
354  int signed_a = atoi(a);
355  int signed_b = atoi(b);
356  int signed_c = atoi(c);
357  res = dem_str_newf("{%d, %d, %d}", signed_a, signed_b, signed_c);
358  }
359  free(a);
360  free(b);
361  free(c);
362  break;
363  case 'H':
364  // Unsigned integer
365  res = get_num(&state);
366  break;
367  case 'I':
368  // Unsigned {x, y}
369  a = get_num(&state);
370  b = get_num(&state);
371  if (a && b) {
372  res = dem_str_newf("{%s, %s}", a, b);
373  }
374  free(a);
375  free(b);
376  break;
377  case 'J':
378  // Unsigned {x, y, z}
379  a = get_num(&state);
380  b = get_num(&state);
381  c = get_num(&state);
382  if (a && b && c) {
383  res = dem_str_newf("{%s, %s, %s}", a, b, c);
384  }
385  free(a);
386  free(b);
387  free(c);
388  break;
389  case 'Q':
390  // anonymous non-type template parameter
391  res = get_num(&state);
392  if (res) {
393  tmp = dem_str_newf("non-type-%s%s", template_param, res);
394  free(res);
395  res = tmp;
396  }
397  break;
398  case 'S':
399  // empty non-type parameter pack
400  res = strdup("");
401  break;
402  default:
403  break;
404  }
405  }
406 
407  if (!res) {
409  }
410 
411  *str_type_code = res;
412  *amount_of_read_chars = state.amount_of_read_chars;
413 
414  return eDemanglerErrOK;
415 }
416 
417 static inline ut16 read_be16(const void *src) {
418  const ut8 *s = (const ut8 *)src;
419  return (((ut16)s[0]) << 8) | (((ut16)s[1]) << 0);
420 }
421 
422 static ssize_t utf16be_to_utf8_impl(const char *utf16be, size_t utf16be_len, char *utf8, size_t utf8_len) {
423  char *const outstart = utf8;
424  char *const outend = utf8 + utf8_len;
425  const ut16 *in = (const ut16 *)utf16be;
426  ut32 c, d;
427  int bits;
428 
429  const size_t inlen = utf16be_len / 2;
430  const ut16 *const inend = in + inlen;
431  while ((in < inend) && (utf8 - outstart + 5 < utf8_len)) {
432  c = read_be16(in++);
433  if ((c & 0xFC00) == 0xD800) { /* surrogates */
434  if (in >= inend) { /* (in > inend) shouldn't happen */
435  break;
436  }
437  d = read_be16(in++);
438  if ((d & 0xFC00) == 0xDC00) {
439  c &= 0x03FF;
440  c <<= 10;
441  c |= d & 0x03FF;
442  c += 0x10000;
443  } else {
444  return -1;
445  }
446  }
447 
448  /* assertion: c is a single UTF-4 value */
449  if (utf8 >= outend) {
450  break;
451  }
452  if (c < 0x80) {
453  *utf8++ = c;
454  bits = -6;
455  } else if (c < 0x800) {
456  *utf8++ = ((c >> 6) & 0x1F) | 0xC0;
457  bits = 0;
458  } else if (c < 0x10000) {
459  *utf8++ = ((c >> 12) & 0x0F) | 0xE0;
460  bits = 6;
461  } else {
462  *utf8++ = ((c >> 18) & 0x07) | 0xF0;
463  bits = 12;
464  }
465 
466  for (; bits >= 0; bits -= 6) {
467  if (utf8 >= outend) {
468  break;
469  }
470  *utf8++ = ((c >> bits) & 0x3F) | 0x80;
471  }
472  }
473  return utf8 - outstart;
474 }
475 
476 static EDemanglerErr utf16be_to_utf8(const char *utf16be, size_t utf16be_len, char **utf8, size_t *utf8_len) {
477  const size_t utf8_len_tmp = utf16be_len * 4;
478  *utf8 = malloc(utf8_len_tmp);
479  if (!*utf8) {
481  }
482  ssize_t res = utf16be_to_utf8_impl(utf16be, utf16be_len, *utf8, utf8_len_tmp);
483  if (res < 0) {
484  free(*utf8);
486  }
487  *utf8_len = res;
488  return eDemanglerErrOK;
489 }
490 
491 static size_t get_operator_code(const char *buf, DemList *names_l, bool memorize) {
492  // C++ operator code (one character, or two if the first is '_')
493 #define SET_OPERATOR_CODE(str) \
494  { \
495  str_info = malloc(sizeof(SStrInfo)); \
496  if (!str_info) \
497  break; \
498  str_info->len = strlen(str); \
499  str_info->str_ptr = strdup(str); \
500  dem_list_append(names_l, str_info); \
501  }
502  SStrInfo *str_info;
503  size_t read_len = 1;
504  switch (*++buf) {
505  case '0': SET_OPERATOR_CODE("constructor"); break;
506  case '1': SET_OPERATOR_CODE("~destructor"); break;
507  case '2': SET_OPERATOR_CODE("operator new"); break;
508  case '3': SET_OPERATOR_CODE("operator delete"); break;
509  case '4': SET_OPERATOR_CODE("operator="); break;
510  case '5': SET_OPERATOR_CODE("operator>>"); break;
511  case '6': SET_OPERATOR_CODE("operator<<"); break;
512  case '7': SET_OPERATOR_CODE("operator!"); break;
513  case '8': SET_OPERATOR_CODE("operator=="); break;
514  case '9': SET_OPERATOR_CODE("operator!="); break;
515  case 'A': SET_OPERATOR_CODE("operator[]"); break;
516  case 'B': SET_OPERATOR_CODE("operator #{return_type}"); break;
517  case 'C': SET_OPERATOR_CODE("operator->"); break;
518  case 'D': SET_OPERATOR_CODE("operator*"); break;
519  case 'E': SET_OPERATOR_CODE("operator++"); break;
520  case 'F': SET_OPERATOR_CODE("operator--"); break;
521  case 'G': SET_OPERATOR_CODE("operator-"); break;
522  case 'H': SET_OPERATOR_CODE("operator+"); break;
523  case 'I': SET_OPERATOR_CODE("operator&"); break;
524  case 'J': SET_OPERATOR_CODE("operator->*"); break;
525  case 'K': SET_OPERATOR_CODE("operator/"); break;
526  case 'L': SET_OPERATOR_CODE("operator%"); break;
527  case 'M': SET_OPERATOR_CODE("operator<"); break;
528  case 'N': SET_OPERATOR_CODE("operator<="); break;
529  case 'O': SET_OPERATOR_CODE("operator>"); break;
530  case 'P': SET_OPERATOR_CODE("operator>="); break;
531  case 'Q': SET_OPERATOR_CODE("operator,"); break;
532  case 'R': SET_OPERATOR_CODE("operator()"); break;
533  case 'S': SET_OPERATOR_CODE("operator~"); break;
534  case 'T': SET_OPERATOR_CODE("operator^"); break;
535  case 'U': SET_OPERATOR_CODE("operator|"); break;
536  case 'V': SET_OPERATOR_CODE("operator&"); break;
537  case 'W': SET_OPERATOR_CODE("operator||"); break;
538  case 'X': SET_OPERATOR_CODE("operator*="); break;
539  case 'Y': SET_OPERATOR_CODE("operator+="); break;
540  case 'Z': SET_OPERATOR_CODE("operator-="); break;
541  case '$': {
542  str_info = malloc(sizeof(SStrInfo));
543  if (!str_info) {
544  goto fail;
545  }
546  size_t i = get_template(buf + 1, str_info, memorize);
547  if (!i) {
548  RZ_FREE(str_info);
549  goto fail;
550  }
551  dem_list_append(names_l, str_info);
552  read_len += i;
553  break;
554  }
555  case '_':
556  switch (*++buf) {
557  case '0': SET_OPERATOR_CODE("operator/="); break;
558  case '1': SET_OPERATOR_CODE("operator%="); break;
559  case '2': SET_OPERATOR_CODE("operator>>="); break;
560  case '3': SET_OPERATOR_CODE("operator<<="); break;
561  case '4': SET_OPERATOR_CODE("operator&="); break;
562  case '5': SET_OPERATOR_CODE("operator|="); break;
563  case '6': SET_OPERATOR_CODE("operator^="); break;
564  case '7': SET_OPERATOR_CODE("vftable"); break;
565  case '8': SET_OPERATOR_CODE("vbtable"); break;
566  case '9': SET_OPERATOR_CODE("vcall"); break;
567  case 'A': SET_OPERATOR_CODE("typeof"); break;
568  case 'B': SET_OPERATOR_CODE("local_static_guard"); break;
569  case 'C':
570  if (*++buf != '@') {
571  goto fail;
572  }
573  if (*++buf != '_') {
574  goto fail;
575  }
576  buf++;
577  read_len += 3;
578  bool is_double_byte;
579  if (*buf == '0') {
580  is_double_byte = false;
581  } else if (*buf == '1') {
582  is_double_byte = true;
583  } else {
584  goto fail;
585  }
586  buf++;
587  read_len++;
588  const char *const str_buf_start = buf;
589  SStateInfo state_info;
590  init_state_struct(&state_info, buf);
591  char *len = get_num(&state_info);
592  if (!len) {
593  goto fail;
594  }
595  free(len);
596  buf += state_info.amount_of_read_chars;
597  init_state_struct(&state_info, buf);
598  char *checksum = get_num(&state_info);
599  buf += state_info.amount_of_read_chars;
601  if (!s) {
602  free(checksum);
603  goto fail;
604  }
605  dem_string_append(s, "`string'::");
606  if (checksum) {
607  dem_string_appendf(s, "%s::\"", checksum);
608  free(checksum);
609  } else {
610  dem_string_append(s, "\"");
611  }
612  DemString *unicode = NULL;
613  if (is_double_byte) {
614  unicode = dem_string_new();
615  if (!unicode) {
617  goto fail;
618  }
619  }
620  char c[2];
621  int high = 0;
622  const char *encoded = ",/\\:. \v\n'-";
623  while (*buf && *buf != '@') {
624  if (*buf == '?') {
625  buf++;
626  if (*buf == '$') {
627  buf++;
628  if (buf[0] < 'A' || buf[0] > 'P' ||
629  buf[1] < 'A' || buf[1] > 'P') {
631  dem_string_free(unicode);
632  goto fail;
633  }
634  const char nibble_high = (*buf++ - 'A');
635  const char nibble_low = (*buf - 'A');
636  c[high] = nibble_high | nibble_low;
637  } else if (isdigit((int)*buf)) {
638  c[high] = encoded[*buf - '0'];
639  } else if ((*buf > 'a' && *buf < 'p') || (*buf > 'A' && *buf < 'P')) {
640  c[high] = *buf + 0x80;
641  } else {
642  dem_string_free(unicode);
644  goto fail;
645  }
646  } else {
647  c[high] = *buf;
648  }
649  buf++;
650  if (is_double_byte) {
651  high++;
652  if (high > 1) {
653  if (!c[0] && !c[1]) {
654  break;
655  }
656  if (!dem_string_append_n(unicode, c, 2)) {
657  dem_string_free(unicode);
659  goto fail;
660  }
661  high = 0;
662  }
663  } else {
664  if (!c[0]) {
665  break;
666  }
667  if (!dem_string_append_n(s, c, 1)) {
669  goto fail;
670  }
671  }
672  }
673  if (is_double_byte) {
674  size_t utf16_len = unicode->len;
675  char *utf16 = dem_string_drain(unicode);
676  char *utf8_buf = NULL;
677  size_t utf8_len = 0;
678  if (utf16be_to_utf8(utf16, utf16_len, &utf8_buf, &utf8_len) != eDemanglerErrOK) {
679  free(utf16);
681  goto fail;
682  }
683  free(utf16);
684  if (!dem_string_append_n(s, utf8_buf, utf8_len)) {
685  free(utf8_buf);
687  goto fail;
688  }
689  free(utf8_buf);
690  }
691  dem_string_append_n(s, "\"", 1);
692  if (*buf == '@' && buf[1]) {
693  buf++;
694  init_state_struct(&state_info, buf);
695  char *unk = get_num(&state_info);
696  if (unk) {
697  buf += state_info.amount_of_read_chars - 1;
698  dem_string_appendf(s, "::%s", unk);
699  free(unk);
700  }
701  }
702  char *str = dem_string_drain(s);
703  if (!str) {
704  goto fail;
705  }
707  free(str);
708  read_len += buf - str_buf_start;
709  break;
710  case 'D': SET_OPERATOR_CODE("vbase_dtor"); break;
711  case 'E': SET_OPERATOR_CODE("vector_dtor"); break;
712  case 'F': SET_OPERATOR_CODE("default_ctor_closure"); break;
713  case 'G': SET_OPERATOR_CODE("scalar_dtor"); break;
714  case 'H': SET_OPERATOR_CODE("vector_ctor_iter"); break;
715  case 'I': SET_OPERATOR_CODE("vector_dtor_iter"); break;
716  case 'J': SET_OPERATOR_CODE("vector_vbase_ctor_iter"); break;
717  case 'K': SET_OPERATOR_CODE("virtual_displacement_map"); break;
718  case 'L': SET_OPERATOR_CODE("eh_vector_ctor_iter"); break;
719  case 'M': SET_OPERATOR_CODE("eh_vector_dtor_iter"); break;
720  case 'N': SET_OPERATOR_CODE("eh_vector_vbase_ctor_iter"); break;
721  case 'O': SET_OPERATOR_CODE("copy_ctor_closure"); break;
722  case 'R':
723  buf++;
724  read_len++;
725  switch (*buf++) {
726  case '0': {
727  size_t len;
728  char *str = NULL;
729  if (!*buf++) {
730  goto fail;
731  }
733  goto fail;
734  }
735  read_len += len + 1;
736  str = dem_str_append(str, " `RTTI Type Descriptor'");
738  free(str);
739  break;
740  }
741  case '1': {
744  char *a = get_num(&state);
745  char *b = get_num(&state);
746  char *c = get_num(&state);
747  char *d = get_num(&state);
748  if (!a || !b || !c || !d) {
749  free(a);
750  free(b);
751  free(c);
752  free(d);
753  goto fail;
754  }
755  read_len += state.amount_of_read_chars;
756  char *tmp = dem_str_newf("`RTTI Base Class Descriptor at (%s,%s,%s,%s)'", a, b, c, d);
758  free(tmp);
759  free(a);
760  free(b);
761  free(c);
762  free(d);
763  break;
764  }
765  case '2': SET_OPERATOR_CODE("`RTTI Base Class Array'"); break;
766  case '3': SET_OPERATOR_CODE("`RTTI Class Hierarchy Descriptor'"); break;
767  case '4': SET_OPERATOR_CODE("`RTTI Complete Object Locator'"); break;
768  default: goto fail;
769  }
770  break;
771  case 'S': SET_OPERATOR_CODE("local_vftable"); break;
772  case 'T': SET_OPERATOR_CODE("local_vftable_ctor_closure"); break;
773  case 'U': SET_OPERATOR_CODE("operator new[]"); break;
774  case 'V': SET_OPERATOR_CODE("operator delete[]"); break;
775  case 'X': SET_OPERATOR_CODE("placement_new_closure"); break;
776  case 'Y': SET_OPERATOR_CODE("placement_delete_closure"); break;
777  case '_':
778  buf++;
779  read_len++;
780  switch (*buf++) {
781  case 'A': SET_OPERATOR_CODE("managed_vector_ctor_iter"); break;
782  case 'B': SET_OPERATOR_CODE("managed_vector_dtor_iter"); break;
783  case 'C': SET_OPERATOR_CODE("eh_vector_copy_ctor_iter"); break;
784  case 'D': SET_OPERATOR_CODE("eh_vector_vbase_copy_ctor_iter"); break;
785  case 'E': {
786  const char *end;
787  const char *op = "dynamic initializer";
788  char *name;
789  size_t name_len;
790  goto get_name;
791  case 'F':
792  op = "dynamic atexit destructor";
793  get_name:
794  end = strchr(buf, '@');
795  if (!end) {
796  goto fail;
797  }
798  name_len = end - buf;
799  read_len += name_len + 1;
800  name = malloc(name_len + 1);
801  if (!name) {
802  goto fail;
803  }
804  memcpy(name, buf, name_len);
805  name[name_len] = '\0';
806  char *tmp = dem_str_newf("`%s for '%s''", op, name);
807  free(name);
809  free(tmp);
810  break;
811  }
812  case 'G': SET_OPERATOR_CODE("vector_copy_ctor_iter"); break;
813  case 'H': SET_OPERATOR_CODE("vector_vbase_copy_ctor_iter"); break;
814  case 'I': SET_OPERATOR_CODE("managed_vector_copy_ctor_iter"); break;
815  case 'J': SET_OPERATOR_CODE("local_static_thread_guard"); break;
816  case 'K': SET_OPERATOR_CODE("user_defined_literal_op"); break;
817  default: goto fail;
818  }
819  break;
820  default: goto fail;
821  }
822  read_len++;
823  break;
824  default: goto fail;
825  }
826  if (*buf) {
827  read_len++;
828  }
829  return read_len;
830 #undef SET_OPERATOR_CODE
831 fail:
832  dem_list_free(names_l);
833  return 0;
834 }
835 
837 static size_t get_template(const char *buf, SStrInfo *str_info, bool memorize) {
838  size_t len = 0;
839  char *str_type_code = NULL;
840  STypeCodeStr type_code_str;
841  // DemListIter *it = NULL;
842  DemList *saved_abbr_names = abbr_names; // save current abbr names, this
843  DemList *new_abbr_names = dem_list_newf(free);
844  memset(str_info, 0, sizeof(*str_info));
845  if (!init_type_code_str_struct(&type_code_str)) {
846  goto get_template_err;
847  }
848 
849  if (*buf == '?') {
851  if (!names_l) {
852  goto get_template_err;
853  }
854  size_t i = get_operator_code(buf, names_l, memorize);
855  if (!i) {
856  goto get_template_err;
857  }
858  len += i;
859  buf += i;
860  SStrInfo *name = dem_list_head(names_l)->data;
861  copy_string_n(&type_code_str, name->str_ptr, name->len);
862  dem_list_free(names_l);
863  } else {
864  char *tmp = strchr(buf, '@');
865  if (!tmp) {
866  goto get_template_err;
867  }
868 
869  // get/copy template len/name
870  len += (tmp - buf + 1);
871  copy_string_n(&type_code_str, buf, len - 1);
872  dem_list_append(new_abbr_names, dem_str_ndup(buf, len - 1));
873  buf += len;
874  }
875 
876  copy_string(&type_code_str, "<");
877 
878  abbr_names = new_abbr_names;
879  bool first = true;
880  // get identifier
881  size_t i = 0;
882  while (*buf != '@') {
883  if (get_type_code_string(buf, &i, &str_type_code) != eDemanglerErrOK) {
884  if (get_template_params(buf, &i, &str_type_code) != eDemanglerErrOK) {
885  RZ_FREE(str_type_code);
886  goto get_template_err;
887  }
888  }
889  if (!RZ_STR_ISEMPTY(str_type_code)) {
890  if (!first) {
891  copy_string(&type_code_str, ", ");
892  }
893  copy_string(&type_code_str, str_type_code);
894  first = false;
895  }
896  buf += i;
897  len += i;
898  RZ_FREE(str_type_code);
899  }
900  if (*buf != '@') {
901  goto get_template_err;
902  }
903  len++;
904 
905 get_template_err:
906  if (type_code_str.curr_pos) {
907  copy_string(&type_code_str, ">");
908  str_info->str_ptr = type_code_str_get(&type_code_str);
909  str_info->len = type_code_str.curr_pos;
910  }
911 
912  dem_list_free(new_abbr_names);
913  abbr_names = saved_abbr_names; // restore global list with name abbr.
914 
915  if (memorize && str_info->str_ptr) {
917  }
918  return len;
919 }
920 
928 static size_t get_namespace_and_name(const char *buf, STypeCodeStr *type_code_str, size_t *amount_of_names, bool memorize) {
929  const char *curr_pos = NULL, *prev_pos = NULL, *tmp = NULL;
930  DemList /* <SStrInfo *> */ *names_l = NULL;
931  DemListIter *it = NULL;
932 
933  if (RZ_STR_ISEMPTY(buf)) {
934  return 0;
935  }
936 
937  size_t len = 0, read_len = 0, tmp_len = 0;
938 
940 
941  if (*buf == '?') {
942  size_t res = get_operator_code(buf, names_l, memorize);
943  if (!res) {
944  return 0;
945  }
946  memorize = true;
947  buf += res;
948  read_len += res;
949  }
950 
951  prev_pos = buf;
952  curr_pos = strchr(buf, '@');
953 
954  // hack for nested templates
955  // think about how better to fix this...
956  len = curr_pos - prev_pos;
957  if (len == 0) {
958  goto get_namespace_and_name_err;
959  }
960 
961  while (curr_pos != NULL) {
962  len = curr_pos - prev_pos;
963  tmp = prev_pos;
964 
965  if ((len == 0) && (*(curr_pos) == '@')) {
966  break;
967  }
968 
969  // TODO:maybe add check of name correctness? like name can not start
970  // with number
971  if ((len <= 0) || (len >= MICROSOFT_NAME_LEN)) {
972  goto get_namespace_and_name_err;
973  }
974 
975  // check if it is a template
976  if ((*tmp == '?') && (*(tmp + 1) == '$')) {
977  size_t i = 0;
978  SStrInfo *str_info = RZ_NEW0(SStrInfo);
979  if (!str_info) {
980  break;
981  }
982  i = get_template(tmp + 2, str_info, memorize);
983  if (!i) {
984  RZ_FREE(str_info);
985  goto get_namespace_and_name_err;
986  }
987  dem_list_append(names_l, str_info);
988 
989  prev_pos = tmp + i + 2;
990  curr_pos = strchr(prev_pos, '@');
991  read_len += i + 2;
992  continue;
993  }
994 
995  if ((*tmp == '?') && (*(tmp + 1) == 'Q')) {
998  break;
999  }
1000  size_t i = get_namespace_and_name(tmp + 2, &str, NULL, true);
1001  read_len += i + 2;
1002  if (!i || *(tmp + i + 2) != '@') {
1004  break;
1005  }
1006  prev_pos = tmp + i + 3;
1007  curr_pos = strchr(prev_pos, '@');
1008  read_len++;
1009  SStrInfo *str_info = RZ_NEW0(SStrInfo);
1010  if (!str_info) {
1011  break;
1012  }
1013  str_info->str_ptr = dem_str_newf("[%s]", str.type_str);
1014  str_info->len = strlen(str_info->str_ptr);
1015  dem_list_append(names_l, str_info);
1017  continue;
1018  }
1019 
1020  // Nested name or numbered namespace
1021  if (*tmp == '?') {
1022  tmp++;
1023  read_len++;
1024  // Optional sequence number or numbered namespace
1025  bool nested_name = false;
1026  char *num = NULL;
1027  if (*tmp != '?') {
1028  SStateInfo state;
1030  num = get_num(&state);
1031  if (!num) {
1032  break;
1033  }
1034  tmp += state.amount_of_read_chars;
1035  read_len += state.amount_of_read_chars;
1036  if (*tmp == '?' && tmp[1] == '?') {
1037  tmp += 2;
1038  read_len += 2;
1039  nested_name = true;
1040  }
1041  } else {
1042  tmp++;
1043  read_len++;
1044  nested_name = true;
1045  }
1046  char *demangled = NULL;
1047  if (nested_name) {
1048  parse_microsoft_mangled_name(tmp, &demangled, &len);
1049  tmp += len;
1050  read_len += len;
1051  }
1052  SStrInfo *str_info = RZ_NEW0(SStrInfo);
1053  if (!str_info) {
1054  free(num);
1055  break;
1056  }
1057  if (num && demangled) {
1058  str_info->str_ptr = dem_str_newf("`%s'::`%s'", demangled, num);
1059  } else if (demangled) {
1060  str_info->str_ptr = dem_str_newf("`%s'", demangled);
1061  } else if (num) {
1062  str_info->str_ptr = dem_str_newf("`%s'", num);
1063  } else {
1064  str_info->str_ptr = strdup("");
1065  }
1066  if (!str_info->str_ptr) {
1067  RZ_FREE(str_info);
1068  free(num);
1069  break;
1070  }
1071  str_info->len = strlen(str_info->str_ptr);
1072  dem_list_append(names_l, str_info);
1073  if (demangled) {
1075  }
1076  free(demangled);
1077  free(num);
1078  prev_pos = tmp;
1079  curr_pos = strchr(tmp, '@');
1080  continue;
1081  }
1082 
1083  bool abbreviation = false;
1084  if (isdigit((ut8)*tmp)) {
1085  abbreviation = true;
1086  tmp = dem_list_get_n(abbr_names, *tmp - '0');
1087  if (!tmp) {
1088  break;
1089  }
1090  len = 1;
1091  } else {
1092  char *tmpname = malloc(len + 1);
1093  if (!tmpname) {
1094  break;
1095  }
1096  memset(tmpname, 0, len + 1);
1097  memcpy(tmpname, prev_pos, len);
1098  dem_list_append(abbr_names, tmpname);
1099  tmp = tmpname;
1100  }
1101  SStrInfo *str_info = RZ_NEW0(SStrInfo);
1102  if (!str_info) {
1103  break;
1104  }
1105  str_info->str_ptr = strdup(tmp);
1106  str_info->len = strlen(tmp);
1107 
1108  dem_list_append(names_l, str_info);
1109 
1110  memorize = true;
1111 
1112  read_len += len;
1113  if (abbreviation) {
1114  if (*(prev_pos + 1) == '@') {
1115  prev_pos = curr_pos;
1116  } else {
1117  prev_pos++;
1118  }
1119  } else {
1120  prev_pos = curr_pos + 1;
1121  curr_pos = strchr(curr_pos + 1, '@');
1122  if (curr_pos) {
1123  read_len++;
1124  }
1125  }
1126  }
1127 
1128 get_namespace_and_name_err:
1129  tmp_len = dem_list_length(names_l);
1130  if (amount_of_names) {
1131  *amount_of_names = tmp_len;
1132  }
1133  SStrInfo *str_info;
1134  dem_list_foreach_prev(names_l, it, str_info) {
1135  copy_string_n(type_code_str, str_info->str_ptr, str_info->len);
1136 
1137  if (--tmp_len) {
1138  copy_string(type_code_str, "::");
1139  }
1140  }
1141  dem_list_free(names_l);
1142 
1143  return read_len;
1144 }
1145 
1146 #define SINGLEQUOTED_A 'A'
1147 #define SINGLEQUOTED_B 'B'
1148 #define SINGLEQUOTED_C 'C'
1149 #define SINGLEQUOTED_D 'D'
1150 #define SINGLEQUOTED_E 'E'
1151 #define SINGLEQUOTED_F 'F'
1152 #define SINGLEQUOTED_G 'G'
1153 #define SINGLEQUOTED_H 'H'
1154 #define SINGLEQUOTED_I 'I'
1155 #define SINGLEQUOTED_J 'J'
1156 #define SINGLEQUOTED_K 'K'
1157 #define SINGLEQUOTED_L 'L'
1158 #define SINGLEQUOTED_M 'M'
1159 #define SINGLEQUOTED_N 'N'
1160 #define SINGLEQUOTED_O 'O'
1161 #define SINGLEQUOTED_P 'P'
1162 #define SINGLEQUOTED_Q 'Q'
1163 #define SINGLEQUOTED_R 'R'
1164 #define SINGLEQUOTED_S 'S'
1165 #define SINGLEQUOTED_T 'T'
1166 #define SINGLEQUOTED_U 'U'
1167 #define SINGLEQUOTED_V 'V'
1168 #define SINGLEQUOTED_W 'W'
1169 #define SINGLEQUOTED_X 'X'
1170 #define SINGLEQUOTED_Y 'Y'
1171 #define SINGLEQUOTED_Z 'Z'
1172 #define SINGLEQUOTED__ '_'
1173 #define SINGLEQUOTED_$ '$'
1174 #define CHAR_WITH_QUOTES(letter) (SINGLEQUOTED_##letter)
1175 
1176 #define DEF_STATE_ACTION(action) static void tc_state_##action(SStateInfo *state, STypeCodeStr *type_code_str)
1177 #define GO_TO_NEXT_STATE(state, new_state) \
1178  { \
1179  (state)->amount_of_read_chars++; \
1180  (state)->buff_for_parsing++; \
1181  (state)->state = eTCStateEnd; \
1182  }
1183 #define ONE_LETTER_ACTION(action, type) \
1184  static void tc_state_##action(SStateInfo *state, STypeCodeStr *type_code_str) { \
1185  if (copy_string(type_code_str, type) == 0) { \
1186  state->err = eTCStateMachineErrAlloc; \
1187  } \
1188  state->state = eTCStateEnd; \
1189  }
1190 
1191 ONE_LETTER_ACTION(C, "signed char")
1192 ONE_LETTER_ACTION(D, "char")
1193 ONE_LETTER_ACTION(E, "unsigned char")
1194 ONE_LETTER_ACTION(F, "short int")
1195 ONE_LETTER_ACTION(G, "unsigned short int")
1196 ONE_LETTER_ACTION(H, "int")
1197 ONE_LETTER_ACTION(I, "unsigned int")
1198 ONE_LETTER_ACTION(J, "long int")
1199 ONE_LETTER_ACTION(K, "unsigned long int")
1200 ONE_LETTER_ACTION(M, "float")
1201 ONE_LETTER_ACTION(N, "double")
1202 ONE_LETTER_ACTION(O, "long double")
1203 ONE_LETTER_ACTION(X, "void")
1204 ONE_LETTER_ACTION(Z, "varargs ...")
1205 
1207 #define PROCESS_CASE(letter, type_str) \
1208  case CHAR_WITH_QUOTES(letter): \
1209  copy_string(type_code_str, type_str); \
1210  break;
1211 
1212  switch (*(state->buff_for_parsing)) {
1213  PROCESS_CASE(D, "__int8")
1214  PROCESS_CASE(E, "unsigned __int8")
1215  PROCESS_CASE(F, "__int16")
1216  PROCESS_CASE(G, "unsigned __int16")
1217  PROCESS_CASE(H, "__int32")
1218  PROCESS_CASE(I, "unsigned __int32")
1219  PROCESS_CASE(J, "__int64")
1220  PROCESS_CASE(K, "unsigned __int64")
1221  PROCESS_CASE(L, "__int128")
1222  PROCESS_CASE(M, "unsigned __int128")
1223  PROCESS_CASE(N, "bool")
1224  PROCESS_CASE(Q, "char8_t")
1225  PROCESS_CASE(S, "char16_t")
1226  PROCESS_CASE(T, "long double(80 bit precision)")
1227  PROCESS_CASE(U, "char32_t")
1228  PROCESS_CASE(W, "wchar_t")
1229  PROCESS_CASE(Z, "long double(64 bit precision)")
1230  default:
1232  break;
1233  }
1234 
1235  state->amount_of_read_chars++;
1236  state->buff_for_parsing++;
1237  state->state = eTCStateEnd;
1238 #undef PROCESS_CASE
1239 }
1240 
1242 // isdigit need to check is it need to do deabbreviation of names
1243 // +2 -> skipp @@ ( the end of class, union,...
1244 // or +2 -> skip abbreviated_num + '@'
1245 #define GET_USER_DEF_TYPE_NAME(data_struct_str) \
1246  { \
1247  copy_string(type_code_str, data_struct_str); \
1248 \
1249  check_len = get_namespace_and_name(state->buff_for_parsing, type_code_str, NULL, true); \
1250  if (check_len) { \
1251  state->amount_of_read_chars += check_len; \
1252  state->buff_for_parsing += check_len; \
1253  if (*state->buff_for_parsing) { \
1254  state->buff_for_parsing++; \
1255  state->amount_of_read_chars++; \
1256  } \
1257  } else { \
1258  state->err = eTCStateMachineErrUncorrectTypeCode; \
1259  } \
1260  }
1261 
1263 #define PROCESS_CASE(case_string, type_str) \
1264  { \
1265  check_len = strlen(case_string); \
1266  if (strncmp(state->buff_for_parsing, case_string, check_len) == 0) { \
1267  copy_string(type_code_str, type_str); \
1268  state->buff_for_parsing += check_len; \
1269  state->amount_of_read_chars += check_len; \
1270  return; \
1271  } \
1272  }
1273 
1274  size_t check_len = 0;
1275 
1276  state->state = eTCStateEnd;
1277 
1278  PROCESS_CASE("__m64@@", "__m64");
1279  PROCESS_CASE("__m128@@", "__m128");
1280  PROCESS_CASE("__m128i@@", "__m128i");
1281  PROCESS_CASE("__m256@@", "__m256");
1282  PROCESS_CASE("__m256i@@", "__m256i");
1283  PROCESS_CASE("__m512@@", "__m512");
1284  PROCESS_CASE("__m512i@@", "__m512i");
1285 
1286  GET_USER_DEF_TYPE_NAME("union ");
1287 #undef PROCESS_CASE
1288 }
1289 
1291 #define PROCESS_CASE(case_string, type_str) \
1292  { \
1293  check_len = strlen(case_string); \
1294  if (strncmp(state->buff_for_parsing, case_string, check_len) == 0) { \
1295  copy_string(type_code_str, type_str); \
1296  state->amount_of_read_chars += check_len; \
1297  state->buff_for_parsing += check_len; \
1298  return; \
1299  } \
1300  }
1301 
1302  size_t check_len = 0;
1303 
1304  state->state = eTCStateEnd;
1305 
1306  PROCESS_CASE("__m128d@@", "__m128d");
1307  PROCESS_CASE("__m256d@@", "__m256d");
1308  PROCESS_CASE("__m512d@@", "__m512d");
1309 
1310  GET_USER_DEF_TYPE_NAME("struct ");
1311 #undef PROCESS_CASE
1312 }
1313 
1315  // W4X@@ -> enum X, W4X@Y@@ -> enum Y::X
1316  size_t check_len = 0;
1317  state->state = eTCStateEnd;
1318 
1319  if (*state->buff_for_parsing != '4') {
1321  return;
1322  }
1323 
1324  state->buff_for_parsing++;
1325  state->amount_of_read_chars++;
1326 
1327  GET_USER_DEF_TYPE_NAME("enum ");
1328 }
1329 
1331  // VX@@ -> class X
1332  size_t check_len = 0;
1333  state->state = eTCStateEnd;
1334 
1335  GET_USER_DEF_TYPE_NAME("class ");
1336 }
1337 
1338 #undef GET_USER_DEF_TYPE_NAME
1339 
1340 static char *get_num(SStateInfo *state) {
1341  char *ptr = NULL;
1342  bool negative = false;
1343  if (*state->buff_for_parsing == '?') {
1344  negative = true;
1345  state->buff_for_parsing++;
1346  state->amount_of_read_chars++;
1347  }
1348  if (*state->buff_for_parsing == '@') {
1349  state->buff_for_parsing++;
1350  state->amount_of_read_chars++;
1351  return strdup("0");
1352  }
1353  if (*state->buff_for_parsing >= '0' && *state->buff_for_parsing <= '8') {
1354  ptr = malloc(2);
1355  if (!ptr) {
1356  return NULL;
1357  }
1358  ptr[0] = *state->buff_for_parsing + 1;
1359  ptr[1] = '\0';
1360  state->buff_for_parsing++;
1361  state->amount_of_read_chars++;
1362  } else if (*state->buff_for_parsing == '9') {
1363  ptr = strdup("10");
1364  state->buff_for_parsing++;
1365  state->amount_of_read_chars++;
1366  } else if (*state->buff_for_parsing >= 'A' && *state->buff_for_parsing <= 'P') {
1367  ut32 ret = 0;
1368 
1369  if (state->buff_for_parsing[1] == '0' && state->buff_for_parsing[2] == 'x' && isxdigit(state->buff_for_parsing[3])) {
1370  size_t chars = 0;
1371  state->buff_for_parsing += 3;
1372  state->amount_of_read_chars += 3;
1373  while (isxdigit(*state->buff_for_parsing)) {
1374  state->buff_for_parsing++;
1375  state->amount_of_read_chars++;
1376  chars++;
1377  }
1378  ret = strtoul(state->buff_for_parsing - chars, NULL, 16);
1379  } else {
1380  while (*state->buff_for_parsing >= 'A' && *state->buff_for_parsing <= 'P') {
1381  ret *= 16;
1382  ret += *state->buff_for_parsing - 'A';
1383  state->buff_for_parsing++;
1384  state->amount_of_read_chars++;
1385  }
1386  }
1387 
1388  ptr = dem_str_newf("%u", ret);
1389  if (*state->buff_for_parsing && *state->buff_for_parsing == '@') {
1390  state->buff_for_parsing++;
1391  state->amount_of_read_chars++;
1392  }
1393  }
1394  if (negative && ptr) {
1395  char *tmp = ptr;
1396  ptr = dem_str_newf("-%s", tmp);
1397  free(tmp);
1398  }
1399  return ptr;
1400 }
1401 
1402 static inline void parse_type_modifier(SStateInfo *state, STypeCodeStr *type_code_str, const char *modifier_str) {
1403  size_t i = 0;
1405  char *tmp = NULL;
1406  STypeCodeStr tmp_str;
1407  STypeCodeStr storage_class;
1408  bool is_pin_ptr = false;
1409  char clr_type = '\0';
1410 
1411  state->state = eTCStateEnd;
1412 
1413  if (!init_type_code_str_struct(&tmp_str)) {
1415  return;
1416  }
1417  if (!init_type_code_str_struct(&storage_class)) {
1418  free_type_code_str_struct(&tmp_str);
1420  return;
1421  }
1422 
1423  STypeCodeStr mod_left;
1424  STypeCodeStr mod_right;
1425  if (!init_type_code_str_struct(&mod_left) ||
1426  !init_type_code_str_struct(&mod_right)) {
1428  goto MODIFIER_err;
1429  }
1430 
1431  SDataType mod = { 0 };
1432  i = get_ptr_modifier(state->buff_for_parsing, &mod);
1433  copy_string(&mod_left, mod.left);
1434  copy_string(&mod_right, mod.right);
1435  sdatatype_fini(&mod);
1436 
1437  state->buff_for_parsing += i;
1438  state->amount_of_read_chars += i;
1439 
1440  if (*state->buff_for_parsing == '$') {
1441  state->buff_for_parsing++;
1442  switch (*state->buff_for_parsing++) {
1443  case 'A':
1444  clr_type = '^';
1445  break;
1446  case 'B': // cli::pin_ptr<T>
1447  is_pin_ptr = true;
1448  break;
1449  case 'C':
1450  clr_type = '%';
1451  break;
1452  case '\0':
1454  goto MODIFIER_err;
1455  default:
1457  break;
1458  }
1459  state->amount_of_read_chars += 2;
1460  }
1461 
1462  do {
1463  i = get_ptr_modifier(state->buff_for_parsing, &mod);
1464  state->buff_for_parsing += i;
1465  state->amount_of_read_chars += i;
1466 
1467  const char *storage_class_str;
1468  err = get_storage_class(*state->buff_for_parsing, &storage_class_str);
1469  if (storage_class_str) {
1470  copy_string(&storage_class, storage_class_str);
1471  }
1472  if (err == eDemanglerErrOK) {
1473  state->buff_for_parsing++;
1474  state->amount_of_read_chars++;
1475  }
1476  copy_string(&mod_left, mod.left);
1477  copy_string(&mod_right, mod.right);
1478  sdatatype_fini(&mod);
1479  } while (i);
1480 
1481  if (*state->buff_for_parsing == 'Y') {
1482  char *n1;
1483  int num;
1484 
1485  state->buff_for_parsing++;
1486  state->amount_of_read_chars++;
1487  if (!(n1 = get_num(state))) {
1488  goto MODIFIER_err;
1489  }
1490  num = atoi(n1);
1491  RZ_FREE(n1);
1492 
1493  if (*storage_class.type_str || *mod_left.type_str || *mod_right.type_str || *modifier_str) {
1494  copy_string(&tmp_str, "(");
1495  copy_string_n(&tmp_str, storage_class.type_str, storage_class.curr_pos);
1496  copy_string_n(&tmp_str, mod_left.type_str, mod_left.curr_pos);
1497  const char last_char = tmp_str.type_str[tmp_str.curr_pos - 1];
1498  if (last_char != ' ' && last_char != '(') {
1499  copy_string(&tmp_str, " ");
1500  }
1501  copy_string(&tmp_str, modifier_str);
1502  copy_string_n(&tmp_str, mod_right.type_str, mod_right.curr_pos);
1503  copy_string(&tmp_str, ")");
1504  }
1505 
1506  while (num--) {
1507  n1 = get_num(state);
1508  if (!n1) {
1509  break;
1510  }
1511  copy_string(&tmp_str, "[");
1512  copy_string(&tmp_str, n1);
1513  copy_string(&tmp_str, "]");
1514  RZ_FREE(n1);
1515  }
1516  }
1517 
1518  if (tmp_str.curr_pos == 0) {
1519  copy_string_n(&tmp_str, storage_class.type_str, storage_class.curr_pos);
1520  copy_string_n(&tmp_str, mod_left.type_str, mod_left.curr_pos);
1521  if (tmp_str.curr_pos && tmp_str.type_str[tmp_str.curr_pos - 1] != ' ') {
1522  copy_string(&tmp_str, " ");
1523  }
1524  if (clr_type && *modifier_str != '\0') {
1525  copy_string_n(&tmp_str, &clr_type, 1);
1526  const char *rest_of_mod_str = modifier_str + (*(modifier_str + 1) == '&' ? 2 : 1);
1527  copy_string(&tmp_str, rest_of_mod_str);
1528  } else {
1529  if (is_pin_ptr && *modifier_str) {
1530  while (*++modifier_str == ' ') {
1531  };
1532  }
1533  copy_string(&tmp_str, modifier_str);
1534  }
1535  copy_string_n(&tmp_str, mod_right.type_str, mod_right.curr_pos);
1536  }
1537 
1538  if (!strncmp(state->buff_for_parsing, "__Z", 3)) {
1539  // TODO: no idea what this means
1540  state->buff_for_parsing += 3;
1541  state->amount_of_read_chars += 3;
1542  }
1543 
1544  err = get_type_code_string(state->buff_for_parsing, &i, &tmp);
1545  if (err != eDemanglerErrOK) {
1547  goto MODIFIER_err;
1548  }
1549 
1550  state->amount_of_read_chars += i;
1551  state->buff_for_parsing += i;
1552  if (is_pin_ptr) {
1553  copy_string(type_code_str, "cli::pin_ptr<");
1554  }
1555  copy_string(type_code_str, tmp);
1556  if (type_code_str->curr_pos && type_code_str->type_str[type_code_str->curr_pos - 1] != ' ' && tmp_str.type_str[0] != ' ') {
1557  copy_string(type_code_str, " ");
1558  }
1559  copy_string_n(type_code_str, tmp_str.type_str, tmp_str.curr_pos);
1560  if (is_pin_ptr) {
1561  copy_string(type_code_str, ">");
1562  }
1563 
1564 MODIFIER_err:
1565  RZ_FREE(tmp);
1566  sdatatype_fini(&mod);
1567  free_type_code_str_struct(&tmp_str);
1568  free_type_code_str_struct(&storage_class);
1569  free_type_code_str_struct(&mod_left);
1570  free_type_code_str_struct(&mod_right);
1571 }
1572 
1574  parse_type_modifier(state, type_code_str, "* const volatile");
1575 }
1576 
1577 static inline const char *get_calling_convention(char calling_convention) {
1578  switch (calling_convention) {
1579  case 'A': return "__cdecl";
1580  case 'B': return "__cdecl __declspec(dllexport)";
1581  case 'C': return "__pascal";
1582  case 'D': return "__pascal __declspec(dllexport)";
1583  case 'E': return "__thiscall";
1584  case 'F': return "__thiscall __declspec(dllexport)";
1585  case 'G': return "__stdcall";
1586  case 'H': return "__stdcall __declspec(dllexport)";
1587  case 'I': return "__fastcall";
1588  case 'J': return "__fastcall __declspec(dllexport)";
1589  case 'K': return "default (none given)";
1590  case 'L': return "default (none given) __declspec(dllexport)";
1591  case 'M': return "__clrcall";
1592  case 'N': return "__clrcall __declspec(dllexport)";
1593  case 'O': return "__eabi";
1594  case 'P': return "__eabi __declspec(dllexport)";
1595  case 'Q': return "__vectorcall";
1596  case 'R': return "__vectorcall __declspec(dllexport)";
1597  case 'S': return "__swift_1";
1598  case 'T': return "__swift_1 __declspec(dllexport)";
1599  case 'U': return "__swift_2";
1600  case 'V': return "__swift_2 __declspec(dllexport)";
1601  case 'W': return "__swift_3";
1602  default: return NULL;
1603  }
1604 }
1605 
1606 static EDemanglerErr parse_function_args(const char *sym, char **demangled_args, size_t *read_chars) {
1608  const char *curr_pos = sym;
1609  size_t len = 0;
1610 
1611  STypeCodeStr func_str;
1612  if (!init_type_code_str_struct(&func_str)) {
1614  }
1615 
1616  copy_string(&func_str, "(");
1617 
1618  // Function arguments
1619  while (*curr_pos && *curr_pos != 'Z') {
1620  if (*curr_pos != '@') {
1621  char *tmp;
1622  bool is_abbr_type = false;
1623  if (len) {
1624  copy_string(&func_str, ", ");
1625  }
1626  err = get_type_code_string(curr_pos, &len, &tmp);
1627  if (err != eDemanglerErrOK) {
1628  // abbreviation of type processing
1629  if ((*curr_pos >= '0') && (*curr_pos <= '9')) {
1630  free(tmp);
1631  tmp = dem_list_get_n(abbr_types, (ut32)(*curr_pos - '0'));
1632  if (!tmp) {
1634  break;
1635  }
1636  err = eDemanglerErrOK;
1637  len = 1;
1638  is_abbr_type = true;
1639  } else {
1641  free(tmp);
1642  break;
1643  }
1644  }
1645  curr_pos += len;
1646 
1647  if (len > 1) {
1649  }
1650 
1651  copy_string(&func_str, tmp);
1652 
1653  if (tmp && strncmp(tmp, "void", 4) == 0 && strlen(tmp) == 4) {
1654  // arguments list is void
1655  if (!is_abbr_type) {
1656  free(tmp);
1657  }
1658  break;
1659  }
1660  if (!is_abbr_type) {
1661  free(tmp);
1662  }
1663  } else {
1664  curr_pos++;
1665  }
1666  }
1667 
1668  while (*curr_pos == '@') {
1669  curr_pos++;
1670  }
1671 
1672  if (*curr_pos != 'Z') {
1674  }
1675 
1676  copy_string(&func_str, ")");
1677 
1678  if (demangled_args) {
1679  *demangled_args = strdup(func_str.type_str);
1680  }
1681 
1682  free_type_code_str_struct(&func_str);
1683 
1684  if (read_chars) {
1685  *read_chars = curr_pos - sym + 1;
1686  }
1687 
1688  return err;
1689 }
1690 
1691 // TODO: use parse_function() instead
1692 static void parse_function_pointer(SStateInfo *state, STypeCodeStr *type_code_str, const char *pointer_str) {
1693  const char *call_conv = NULL;
1694  const char *storage = NULL;
1695  char *ret_type = NULL;
1696  size_t i = 0;
1698 
1699  state->state = eTCStateEnd;
1700 
1701  // Calling convention
1702  call_conv = get_calling_convention(*state->buff_for_parsing++);
1703  if (!call_conv) {
1704  // XXX unify error messages into a single enum
1706  return;
1707  }
1708 
1709  state->amount_of_read_chars += 2; // '6' + call_conv
1710 
1711  // return type
1712  if (*state->buff_for_parsing == '?') {
1713  err = get_storage_class(*++state->buff_for_parsing, &storage);
1714  if (err != eDemanglerErrOK) {
1716  return;
1717  }
1718  state->buff_for_parsing++;
1719  state->amount_of_read_chars += 2;
1720  }
1721  err = get_type_code_string(state->buff_for_parsing, &i, &ret_type);
1722  if (err != eDemanglerErrOK) {
1724  return;
1725  }
1726 
1727  copy_string(type_code_str, ret_type);
1728  if (storage) {
1729  copy_string(type_code_str, " ");
1730  copy_string(type_code_str, storage);
1731  }
1732  copy_string(type_code_str, " (");
1733  RZ_FREE(ret_type);
1734 
1735  copy_string(type_code_str, call_conv);
1736  copy_string(type_code_str, " ");
1737  copy_string(type_code_str, pointer_str);
1738  copy_string(type_code_str, ")");
1739 
1740  state->amount_of_read_chars += i;
1741  state->buff_for_parsing += i;
1742 
1743  char *demangled_args = NULL;
1744  if (parse_function_args(state->buff_for_parsing, &demangled_args, &i) != eDemanglerErrOK) {
1745  free(demangled_args);
1747  return;
1748  }
1749  state->amount_of_read_chars += i;
1750  state->buff_for_parsing += i;
1751  copy_string(type_code_str, demangled_args);
1752  free(demangled_args);
1753  return;
1754 }
1755 
1756 static void parse_pointer(SStateInfo *state, STypeCodeStr *type_code_str, const char *pointer_str) {
1757  if (isdigit((ut8)*state->buff_for_parsing)) {
1758  ut8 digit = *state->buff_for_parsing++;
1759  if (digit == '6' || digit == '7') {
1760  parse_function_pointer(state, type_code_str, pointer_str);
1761  return;
1762  } else if (digit == '8' || digit == '9') {
1763  STypeCodeStr func_str;
1764  if (!init_type_code_str_struct(&func_str)) {
1766  return;
1767  };
1768  size_t read = get_namespace_and_name(state->buff_for_parsing, &func_str, NULL, true);
1769  if (!read || !*(state->buff_for_parsing + read) || *(state->buff_for_parsing + read) != '@') {
1771  free_type_code_str_struct(&func_str);
1772  return;
1773  }
1774  read++;
1775  state->amount_of_read_chars += read;
1776  state->buff_for_parsing += read;
1777  copy_string(&func_str, "::");
1778  copy_string(&func_str, pointer_str);
1779  SDataType data_type = { 0 };
1780  size_t ptr_mod_cnt = get_ptr_modifier(state->buff_for_parsing, &data_type);
1781  state->buff_for_parsing += ptr_mod_cnt;
1782  state->amount_of_read_chars += ptr_mod_cnt;
1783  const char *storage;
1784  if (get_storage_class(*state->buff_for_parsing++, &storage) != eDemanglerErrOK) {
1786  free_type_code_str_struct(&func_str);
1787  sdatatype_fini(&data_type);
1788  return;
1789  }
1790  state->amount_of_read_chars++;
1791  copy_string(type_code_str, data_type.left);
1792  parse_function_pointer(state, type_code_str, func_str.type_str);
1793  free_type_code_str_struct(&func_str);
1794  if (storage) {
1795  copy_string(type_code_str, " ");
1796  copy_string(type_code_str, storage);
1797  }
1798  copy_string(type_code_str, data_type.right);
1799  sdatatype_fini(&data_type);
1800  state->state = eTCStateEnd;
1801  return;
1802  }
1803  }
1804  parse_type_modifier(state, type_code_str, pointer_str);
1805 }
1806 
1807 #define PARSE_POINTER(pointer_str) parse_pointer(state, type_code_str, pointer_str)
1808 
1810  PARSE_POINTER("*");
1811 }
1812 
1814  PARSE_POINTER("* volatile");
1815 }
1816 
1818  PARSE_POINTER("* const");
1819 }
1820 
1822  PARSE_POINTER("&");
1823 }
1824 
1826  PARSE_POINTER("& volatile");
1827 }
1828 
1830  if (*(state->buff_for_parsing++) != '$') {
1832  return;
1833  }
1834  state->amount_of_read_chars += 2;
1835  switch (*state->buff_for_parsing++) {
1836  case 'A':
1837  state->state = eTCStateP;
1838  break;
1839  case 'B':
1840  parse_type_modifier(state, type_code_str, "");
1841  break;
1842  case 'C':
1843  parse_type_modifier(state, type_code_str, "");
1844  break;
1845  case 'F':
1846  state->state = eTCStateP;
1847  break;
1848  case 'Q':
1849  parse_type_modifier(state, type_code_str, "&&");
1850  state->state = eTCStateEnd;
1851  break;
1852  case 'R':
1853  parse_type_modifier(state, type_code_str, "&& volatile");
1854  state->state = eTCStateEnd;
1855  break;
1856  case 'T':
1857  copy_string(type_code_str, "std::nullptr_t");
1858  state->state = eTCStateEnd;
1859  break;
1860  case 'V':
1861  state->state = eTCStateEnd;
1862  break;
1863  case 'Z':
1864  state->state = eTCStateEnd;
1865  break;
1866  default:
1868  break;
1869  }
1870 }
1871 
1872 #undef ONE_LETTER_ACTION
1873 #undef GO_TO_NEXT_STATE
1874 #undef DEF_STATE_ACTION
1875 
1876 static void tc_state_start(SStateInfo *state, STypeCodeStr *type_code_str) {
1877 #define ONE_LETTER_STATE(letter) \
1878  case CHAR_WITH_QUOTES(letter): \
1879  state->state = eTCState##letter; \
1880  break;
1881 
1882  switch (*(state->buff_for_parsing)) {
1893  ONE_LETTER_STATE(K)
1902  ONE_LETTER_STATE(U)
1908  ONE_LETTER_STATE($)
1909  default:
1910  // eprintf("[uncorrect type] error while parsing type\n");
1911 
1912  state->state = eTCStateEnd;
1914  break;
1915  }
1916 
1917  state->amount_of_read_chars++;
1918  state->buff_for_parsing++;
1919 #undef ONE_LETTER_STATE
1920 }
1921 
1922 static void tc_state_end(SStateInfo *state, STypeCodeStr *type_code_str) {
1923  return;
1924 }
1925 
1926 static void init_state_struct(SStateInfo *state, const char *buff_for_parsing) {
1927  state->state = eTCStateStart;
1928  state->buff_for_parsing = buff_for_parsing;
1929  state->amount_of_read_chars = 0;
1930  state->err = eTCStateMachineErrOK;
1931 }
1932 
1933 static bool init_type_code_str_struct(STypeCodeStr *type_code_str) {
1934  type_code_str->type_str_len = MICROSOFT_NAME_LEN;
1935  type_code_str->type_str = type_code_str->type_str_buf;
1936  *type_code_str->type_str = '\0';
1937  type_code_str->curr_pos = 0;
1938  return true;
1939 }
1940 
1941 static void free_type_code_str_struct(STypeCodeStr *type_code_str) {
1942  if (type_code_str->type_str && type_code_str->type_str != type_code_str->type_str_buf) {
1943  RZ_FREE(type_code_str->type_str);
1944  }
1945  type_code_str->type_str_len = 0;
1946 }
1947 
1948 static char *type_code_str_get(STypeCodeStr *type_code_str) {
1949  char *ret;
1950  if (type_code_str->type_str == type_code_str->type_str_buf) {
1951  ret = malloc(type_code_str->curr_pos + 1);
1952  if (!ret) {
1953  return NULL;
1954  }
1955  memcpy(ret, type_code_str->type_str, type_code_str->curr_pos);
1956  ret[type_code_str->curr_pos] = '\0';
1957  } else {
1958  ret = type_code_str->type_str;
1959  }
1960  type_code_str->type_str = NULL;
1961  return ret;
1962 }
1963 
1965 // End of machine functions for parsing type codes
1967 
1968 static EDemanglerErr get_type_code_string(const char *sym, size_t *amount_of_read_chars, char **str_type_code) {
1970  STypeCodeStr type_code_str;
1971  SStateInfo state;
1972 
1973  if (!init_type_code_str_struct(&type_code_str)) {
1975  goto get_type_code_string_err;
1976  }
1977 
1978  init_state_struct(&state, sym);
1979 
1980  while (state.state != eTCStateEnd) {
1981  run_state(&state, &type_code_str);
1982  if (state.err != eTCStateMachineErrOK) {
1983  *str_type_code = NULL;
1984  *amount_of_read_chars = 0;
1985  switch (state.err) {
1988  break;
1991  default:
1992  break;
1993  }
1994  goto get_type_code_string_err;
1995  }
1996  }
1997 
1998  *str_type_code = type_code_str_get(&type_code_str);
1999  *amount_of_read_chars = state.amount_of_read_chars;
2000 
2001 get_type_code_string_err:
2002  free_type_code_str_struct(&type_code_str);
2003  return err;
2004 }
2005 
2006 static inline size_t get_ptr_modifier(const char *encoded, SDataType *ptr_modifier) {
2007  const char *tmp = encoded;
2008  if (!ptr_modifier->left) {
2009  ptr_modifier->left = strdup("");
2010  }
2011  if (!ptr_modifier->right) {
2012  ptr_modifier->right = strdup("");
2013  }
2014 #define SET_PTR_MODIFIER(letter, modifier_left, modifier_right) \
2015  case letter: \
2016  ptr_modifier->left = dem_str_append(ptr_modifier->left, modifier_left); \
2017  ptr_modifier->right = dem_str_append(ptr_modifier->right, modifier_right); \
2018  break;
2019 
2020  bool ref = false;
2021  bool refref = false;
2022  while (*tmp) {
2023  switch (*tmp++) {
2024  SET_PTR_MODIFIER('E', "", " __ptr64")
2025  SET_PTR_MODIFIER('F', " __unaligned", "")
2026  SET_PTR_MODIFIER('I', "", " __restrict")
2027  case 'G':
2028  ref = true;
2029  break;
2030  case 'H':
2031  refref = true;
2032  break;
2033  default:
2034  if (ref) {
2035  ptr_modifier->right = dem_str_append(ptr_modifier->right, "&");
2036  if (refref) {
2037  ptr_modifier->right = dem_str_append(ptr_modifier->right, " ");
2038  }
2039  }
2040  if (refref) {
2041  ptr_modifier->right = dem_str_append(ptr_modifier->right, "&&");
2042  }
2043  return tmp - encoded - 1;
2044  }
2045  }
2046  return tmp - encoded;
2047 #undef SET_PTR_MODIFIER
2048 }
2049 
2050 static inline EDemanglerErr get_storage_class(const char encoded, const char **storage_class) {
2051  switch (encoded) {
2052  case 'A':
2053  *storage_class = NULL;
2054  break; // non-const
2055  case 'B':
2056  case 'J':
2057  *storage_class = "const";
2058  break;
2059  case 'C':
2060  case 'G':
2061  case 'K':
2062  *storage_class = "volatile";
2063  break;
2064  case 'D':
2065  case 'H':
2066  case 'L':
2067  *storage_class = "const volatile";
2068  break;
2069  default:
2070  *storage_class = NULL;
2072  }
2073  return eDemanglerErrOK;
2074 }
2075 
2076 static EDemanglerErr parse_data_type(const char *sym, SDataType *data_type, size_t *len) {
2078  size_t i;
2079  const char *curr_pos = sym;
2080  SDataType modifier = { 0 };
2081  const char *storage_class = NULL;
2082  char *tmp;
2083  if (len) {
2084  *len = 0;
2085  }
2086  if (!data_type) {
2087  return eDemanglerErrInternal;
2088  }
2089  data_type->left = data_type->right = NULL;
2090  // Data type and access level
2091  switch (*curr_pos) {
2092  // Data
2093  case '0': // Private static member
2094  case '1': // Protected static member
2095  case '2': // Public static member
2096  case '3': // Normal variable
2097  case '4': // Normal variable
2098  case '5': // Normal variable
2099  switch (*curr_pos) {
2100  case '0':
2101  modifier.left = strdup("private: static ");
2102  break;
2103  case '1':
2104  modifier.left = strdup("protected: static ");
2105  break;
2106  case '2':
2107  modifier.left = strdup("public: static ");
2108  break;
2109  default:
2110  break;
2111  }
2112  curr_pos++;
2113  i = 0;
2114  err = get_type_code_string(curr_pos, &i, &tmp);
2115  if (err != eDemanglerErrOK) {
2116  sdatatype_fini(&modifier);
2117  return err;
2118  }
2119 
2120  curr_pos += i;
2121  curr_pos += get_ptr_modifier(curr_pos, &modifier);
2122  if (get_storage_class(*curr_pos, &storage_class) != eDemanglerErrOK) {
2123  sdatatype_fini(&modifier);
2124  free(tmp);
2126  }
2127  curr_pos++;
2128 
2129  data_type->right = strdup("");
2130  if (storage_class) {
2131  data_type->left = dem_str_newf("%s%s %s%s", modifier.left, tmp, storage_class, modifier.right);
2132  } else {
2133  data_type->left = dem_str_newf("%s%s%s", modifier.left, tmp, modifier.right);
2134  }
2135  free(tmp);
2136  sdatatype_fini(&modifier);
2137  break;
2138  case '6': // compiler generated static
2139  case '7': // compiler generated static
2140  curr_pos++;
2141  curr_pos += get_ptr_modifier(curr_pos, &modifier);
2142  if (get_storage_class(*curr_pos, &storage_class) != eDemanglerErrOK) {
2143  sdatatype_fini(&modifier);
2145  }
2146  curr_pos++;
2147 
2148  if (storage_class) {
2149  data_type->left = dem_str_newf("%s%s%s", storage_class, modifier.left, modifier.right);
2150  } else {
2151  data_type->left = dem_str_newf("%s%s", modifier.left, modifier.right);
2152  }
2153  sdatatype_fini(&modifier);
2154  if (*curr_pos != '@') {
2155  STypeCodeStr str;
2156  if (!init_type_code_str_struct(&str)) {
2158  }
2159  size_t i = get_namespace_and_name(curr_pos, &str, NULL, true);
2160  if (!i) {
2163  }
2164  curr_pos += i;
2165  if (*curr_pos && *(curr_pos + 1) != '@') {
2166  STypeCodeStr str2;
2167  if (!init_type_code_str_struct(&str2)) {
2170  }
2171  i = get_namespace_and_name(curr_pos + 1, &str2, NULL, true);
2172  if (!i) {
2176  }
2177  curr_pos += i + 1;
2178  data_type->right = dem_str_newf("{for `%s's `%s'}", str.type_str, str2.type_str);
2180  } else {
2181  data_type->right = dem_str_newf("{for `%s'}", str.type_str);
2182  }
2184  } else {
2185  data_type->right = strdup("");
2186  }
2187  if (*curr_pos == '@') {
2188  curr_pos++;
2189  }
2190  break;
2191  case '8':
2192  case '9':
2193  curr_pos++;
2194  break;
2195  default:
2197  }
2198  if (len) {
2199  *len = curr_pos - sym;
2200  }
2201  return eDemanglerErrOK;
2202 }
2203 
2204 static EDemanglerErr parse_function_type(const char *sym, SDataType *data_type,
2205  size_t *len, bool *is_static, bool *is_implicit_this_pointer) {
2206  const char *curr_pos = sym;
2207  *is_static = *is_implicit_this_pointer = false;
2208 #define SET_THUNK_MODIFIER(letter, modifier_str) \
2209  case letter: { \
2210  SStateInfo state; \
2211  init_state_struct(&state, curr_pos); \
2212  char *num = get_num(&state); \
2213  if (!num) { \
2214  return eDemanglerErrUncorrectMangledSymbol; \
2215  } \
2216  data_type->left = strdup(modifier_str); \
2217  data_type->right = dem_str_newf("`adjustor{%s}'", num); \
2218  free(num); \
2219  *is_implicit_this_pointer = true; \
2220  curr_pos += state.amount_of_read_chars; \
2221  break; \
2222  }
2223 
2224 #define SET_ACCESS_MODIFIER(letter, flag_set, modifier_str) \
2225  case letter: \
2226  data_type->left = strdup(modifier_str); \
2227  *flag_set = true; \
2228  break;
2229 
2230  /* Functions */
2231  switch (*curr_pos++) {
2232  SET_ACCESS_MODIFIER('A', is_implicit_this_pointer, "private");
2233  SET_ACCESS_MODIFIER('B', is_implicit_this_pointer, "private");
2234  SET_ACCESS_MODIFIER('C', is_static, "private: static");
2235  SET_ACCESS_MODIFIER('D', is_static, "private: static");
2236  SET_ACCESS_MODIFIER('E', is_implicit_this_pointer, "private virtual");
2237  SET_ACCESS_MODIFIER('F', is_implicit_this_pointer, "private virtual");
2238  SET_THUNK_MODIFIER('G', "[thunk]:private virtual");
2239  SET_THUNK_MODIFIER('H', "[thunk]:private virtual");
2240  SET_ACCESS_MODIFIER('I', is_implicit_this_pointer, "protected");
2241  SET_ACCESS_MODIFIER('J', is_implicit_this_pointer, "protected");
2242  SET_ACCESS_MODIFIER('K', is_static, "protected: static");
2243  SET_ACCESS_MODIFIER('L', is_static, "protected: static");
2244  SET_ACCESS_MODIFIER('M', is_implicit_this_pointer, "protected virtual");
2245  SET_ACCESS_MODIFIER('N', is_implicit_this_pointer, "protected virtual");
2246  SET_THUNK_MODIFIER('O', "[thunk]:protected virtual");
2247  SET_THUNK_MODIFIER('P', "[thunk]:protected virtual");
2248  SET_ACCESS_MODIFIER('Q', is_implicit_this_pointer, "public");
2249  SET_ACCESS_MODIFIER('R', is_implicit_this_pointer, "public");
2250  SET_ACCESS_MODIFIER('S', is_static, "public: static");
2251  SET_ACCESS_MODIFIER('T', is_static, "public: static");
2252  SET_ACCESS_MODIFIER('U', is_implicit_this_pointer, "public virtual");
2253  SET_ACCESS_MODIFIER('V', is_implicit_this_pointer, "public virtual");
2254  SET_THUNK_MODIFIER('W', "[thunk]:public virtual");
2255  SET_THUNK_MODIFIER('X', "[thunk]:public virtual");
2256  case 'Y': // near
2257  case 'Z': // far
2258  break;
2259  default:
2261  }
2262 #undef SET_ACCESS_MODIFIER
2263  if (len) {
2264  *len = curr_pos - sym;
2265  }
2266  if (!data_type->left) {
2267  data_type->left = strdup("");
2268  }
2269  if (!data_type->right) {
2270  data_type->right = strdup("");
2271  }
2272  return eDemanglerErrOK;
2273 }
2274 
2275 static EDemanglerErr parse_function(const char *sym, STypeCodeStr *type_code_str, char **demangled_function, size_t *chars_read) {
2277  bool is_implicit_this_pointer;
2278  bool is_static;
2279  const char *memb_func_access_code = NULL;
2280  const char *call_conv = NULL;
2281  char *demangled_args = NULL;
2282  char *ret_type = NULL;
2283  SDataType data_type = { 0 };
2284  SDataType this_pointer_modifier = { 0 };
2285  const char *curr_pos = sym;
2286  bool __64ptr = false;
2287  size_t len;
2288 
2289  STypeCodeStr func_str;
2290  if (!init_type_code_str_struct(&func_str)) {
2292  goto parse_function_err;
2293  }
2294 
2295  if (!strncmp(curr_pos, "$$F", 3)) {
2296  // Managed function (Managed C++ or C++/CLI)
2297  curr_pos += 3;
2298  }
2299 
2300  err = parse_function_type(curr_pos, &data_type, &len, &is_static, &is_implicit_this_pointer);
2301  if (err != eDemanglerErrOK) {
2302  goto parse_function_err;
2303  }
2304 
2305  curr_pos += len;
2306 
2307  if (*curr_pos == 'E') {
2308  __64ptr = true;
2309  curr_pos++;
2310  }
2311 
2312  if (*curr_pos == '$') {
2313  curr_pos++;
2314  if (*curr_pos) {
2315  curr_pos++;
2316  }
2317  }
2318 
2319  if (!*curr_pos) {
2320  goto print_function;
2321  }
2322 
2323  // member function access code
2324  if (is_implicit_this_pointer) {
2325  curr_pos += get_ptr_modifier(curr_pos, &this_pointer_modifier);
2326  err = get_storage_class(*curr_pos, &memb_func_access_code);
2327  if (err != eDemanglerErrOK) {
2328  goto parse_function_err;
2329  }
2330  curr_pos++;
2331  }
2332 
2333  // currently does not use because I can not find real example of
2334  // where to use this
2335  // just read in http://www.agner.org/optimize/calling_conventions.pdf
2336  // that this is possible
2337  // when some find the case where it is used please remove this (void)*
2338  // lines
2339  (void)is_static;
2340 
2341  // Calling convention
2342  call_conv = get_calling_convention(*curr_pos);
2343  if (!call_conv) {
2345  goto parse_function_err;
2346  }
2347  curr_pos++;
2348 
2349  // get storage class code for return
2350  if (*curr_pos == '?') {
2351  err = get_storage_class(*++curr_pos, &memb_func_access_code);
2352  if (err != eDemanglerErrOK) {
2353  goto parse_function_err;
2354  }
2355  curr_pos++;
2356  }
2357 
2358  // Return type, or @ if 'void'
2359  if (*curr_pos == '@') {
2360  ret_type = strdup("void");
2361  curr_pos++;
2362  } else {
2363  err = get_type_code_string(curr_pos, &len, &ret_type);
2364  if (err != eDemanglerErrOK) {
2366  goto parse_function_err;
2367  }
2368 
2369  curr_pos += len;
2370  }
2371  err = parse_function_args(curr_pos, &demangled_args, &len);
2372  if (err != eDemanglerErrOK) {
2373  goto parse_function_err;
2374  }
2375 
2376  curr_pos += len;
2377 
2378 print_function:
2379 
2380  if (!RZ_STR_ISEMPTY(data_type.left)) {
2381  copy_string(&func_str, data_type.left);
2382  if (!strstr(data_type.left, "static")) {
2383  copy_string(&func_str, ": ");
2384  } else {
2385  copy_string(&func_str, " ");
2386  }
2387  }
2388 
2389  if (ret_type) {
2390  copy_string(&func_str, ret_type);
2391  copy_string(&func_str, " ");
2392  }
2393 
2394  if (call_conv) {
2395  copy_string(&func_str, call_conv);
2396  copy_string(&func_str, " ");
2397  }
2398 
2399  if (type_code_str->type_str) {
2400  copy_string_n(&func_str, type_code_str->type_str, type_code_str->curr_pos);
2401  }
2402 
2403  if (!RZ_STR_ISEMPTY(data_type.right)) {
2404  copy_string(&func_str, data_type.right);
2405  }
2406 
2407  copy_string(&func_str, demangled_args);
2408  RZ_FREE(demangled_args);
2409 
2410  if (memb_func_access_code) {
2411  copy_string(&func_str, memb_func_access_code);
2412  }
2413 
2414  copy_string(&func_str, this_pointer_modifier.left);
2415 
2416  if (__64ptr) {
2417  copy_string(&func_str, " __ptr64");
2418  }
2419 
2420  copy_string(&func_str, this_pointer_modifier.right);
2421 
2422  if (ret_type) {
2423  if (strstr(func_str.type_str, "#{return_type}")) {
2424  func_str.type_str = type_code_str_get(&func_str);
2425  func_str.type_str = dem_str_replace(func_str.type_str, "#{return_type}", ret_type, 0);
2426  func_str.curr_pos -= strlen("#{return_type}") - strlen(ret_type);
2427  }
2428  }
2429 
2430  // need to be free by user
2431  if (func_str.type_str) {
2432  *demangled_function = type_code_str_get(&func_str);
2433  }
2434 
2435 parse_function_err:
2436  if (chars_read) {
2437  *chars_read = curr_pos - sym;
2438  }
2439  sdatatype_fini(&data_type);
2440  sdatatype_fini(&this_pointer_modifier);
2441  free_type_code_str_struct(&func_str);
2442  free(ret_type);
2443  free(demangled_args);
2444  return err;
2445 }
2446 
2453 static EDemanglerErr parse_microsoft_mangled_name(const char *sym, char **demangled_name, size_t *chars_read) {
2454  STypeCodeStr type_code_str;
2456 
2457  const char *curr_pos = sym;
2458 
2459  if (!init_type_code_str_struct(&type_code_str)) {
2461  goto parse_microsoft_mangled_name_err;
2462  }
2463  size_t i;
2464  size_t len = get_namespace_and_name(curr_pos, &type_code_str, &i, false);
2465  if (!len) {
2467  goto parse_microsoft_mangled_name_err;
2468  }
2469 
2470  curr_pos += len;
2471 
2472  if (!*curr_pos) {
2473  *demangled_name = type_code_str_get(&type_code_str);
2474  goto parse_microsoft_mangled_name_err;
2475  }
2476 
2477  curr_pos++;
2478 
2479  if (!strncmp(curr_pos, "$$F", 3)) {
2480  // Managed function (Managed C++ or C++/CLI)
2481  curr_pos += 3;
2482  }
2483 
2484  if (curr_pos[0] == '_') {
2485  // TODO: __based()
2486  curr_pos++;
2487  }
2488 
2489  if (isdigit(*curr_pos)) {
2490  SDataType data_type = { 0 };
2491  err = parse_data_type(curr_pos, &data_type, &len);
2492  if (err != eDemanglerErrOK) {
2493  sdatatype_fini(&data_type);
2494  goto parse_microsoft_mangled_name_err;
2495  }
2496  curr_pos += len;
2497  *demangled_name = NULL;
2498  if (data_type.left) {
2499  *demangled_name = dem_str_newf("%s ", data_type.left);
2500  }
2501  *demangled_name = dem_str_append(*demangled_name, type_code_str.type_str);
2502  *demangled_name = dem_str_append(*demangled_name, data_type.right);
2503  sdatatype_fini(&data_type);
2504  } else if (isalpha(*curr_pos)) {
2505  err = parse_function(curr_pos, &type_code_str, demangled_name, &len);
2506  curr_pos += len;
2507  } else {
2509  }
2510 
2511 parse_microsoft_mangled_name_err:
2512  free_type_code_str_struct(&type_code_str);
2513  if (chars_read) {
2514  *chars_read = curr_pos - sym;
2515  }
2516  return err;
2517 }
2518 
2519 static EDemanglerErr parse_microsoft_rtti_mangled_name(const char *sym, char **demangled_name, size_t *chars_read) {
2521  char *type = NULL;
2522  const char *storage = NULL;
2523  if (chars_read) {
2524  *chars_read = 0;
2525  }
2526  if (RZ_STR_ISEMPTY(sym)) {
2528  }
2529  err = get_storage_class(*sym++, &storage);
2530  if (err != eDemanglerErrOK) {
2531  return err;
2532  }
2533  size_t len;
2534  err = get_type_code_string(sym, &len, &type);
2535  if (err != eDemanglerErrOK) {
2536  return err;
2537  }
2538  if (storage) {
2539  *demangled_name = dem_str_newf("%s %s", type, storage);
2540  } else {
2541  *demangled_name = dem_str_newf("%s", type);
2542  }
2543  if (chars_read) {
2544  *chars_read = len + 1;
2545  }
2546  free(type);
2547  return err;
2548 }
2549 
2551 EDemanglerErr microsoft_demangle(SDemangler *demangler, char **demangled_name) {
2553  // DemListIter *it = NULL;
2554  // char *tmp = NULL;
2555 
2556  // TODO: need refactor... maybe remove the static variable somewhere?
2559 
2560  if (!demangler || !demangled_name) {
2562  goto microsoft_demangle_err;
2563  }
2564 
2565  if (!strncmp(demangler->symbol, ".?", 2)) {
2566  err = parse_microsoft_rtti_mangled_name(demangler->symbol + 2, demangled_name, NULL);
2567  } else {
2568  err = parse_microsoft_mangled_name(demangler->symbol + 1, demangled_name, NULL);
2569  }
2570 
2571 microsoft_demangle_err:
2574  return err;
2575 }
size_t len
Definition: 6502dis.c:15
#define T(op)
lzma_index ** i
Definition: index.h:629
lzma_index * src
Definition: index.h:567
#define X(x, b, m)
#define A(x)
Definition: arc.h:165
#define I(x)
Definition: arc.h:164
#define R(x, b, m)
Definition: arc.h:168
#define B(x)
Definition: arc.h:166
#define C(x)
Definition: arc.h:167
static bool err
Definition: armass.c:435
#define W(x, y, z)
#define H(x)
#define Q(x)
int bits(struct state *s, int need)
Definition: blast.c:72
#define D
Definition: block.c:38
const lzma_allocator const uint8_t * in
Definition: block.h:527
#define P
#define NULL
Definition: cris-opc.c:27
int mod(int a, int b)
Definition: crypto_rot.c:8
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
EDemanglerErr
Enum of possible errors while demangler working.
@ eDemanglerErrUncorrectMangledSymbol
uncorrect mangled symbol
@ eDemanglerErrUnsupportedMangling
unsupported mangling scheme yet
@ eDemanglerErrOK
if all is OK
@ eDemanglerErrMemoryAllocation
some memory allocation problem
@ eDemanglerErrInternal
when something very wrong happens
void dem_list_free(DemList *list)
char * dem_str_ndup(const char *ptr, int len)
DemListIter * dem_list_append(DemList *list, void *data)
char * dem_string_drain(DemString *ds)
char * dem_str_replace(char *str, const char *key, const char *val, int g)
ut32 dem_list_length(const DemList *list)
void * dem_list_get_n(const DemList *list, ut32 n)
void dem_string_free(DemString *ds)
DemString * dem_string_new()
char * dem_str_newf(const char *fmt,...)
char * dem_str_append(char *ptr, const char *string)
bool dem_string_append(DemString *ds, const char *string)
bool dem_string_appendf(DemString *ds, const char *fmt,...)
bool dem_string_append_n(DemString *ds, const char *string, size_t size)
DemList * dem_list_newf(DemListFree f)
uint16_t ut16
#define dem_list_head(x)
#define dem_list_foreach_prev(list, it, pos)
void(* DemListFree)(void *ptr)
uint32_t ut32
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
return memset(p, 0, total)
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
#define M
Definition: common.h:37
void * realloc(void *ptr, size_t size)
Definition: malloc.c:144
void * malloc(size_t size)
Definition: malloc.c:123
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode static oldfd struct tms static buf static getgid static geteuid const char static filename static arg static mask struct ustat static ubuf static getppid static setsid static egid sigset_t static set struct timeval struct timezone static tz fd_set fd_set fd_set struct timeval static timeout const char char static bufsiz const char static swapflags void static offset const char static length static mode static who const char struct statfs static buf unsigned unsigned num
Definition: sflib.h:126
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
char * dst
Definition: lz4.h:724
RZ_API RZ_OWN char *MACH0_() get_name(struct MACH0_(obj_t) *mo, ut32 stridx, bool filter)
Get a string from the string table referenced by the LC_SYMTAB command.
Definition: mach0.c:2563
static size_t get_operator_code(const char *buf, DemList *names_l, bool memorize)
static void sdatatype_fini(SDataType *data_type)
struct STypeCodeStr STypeCodeStr
static size_t get_namespace_and_name(const char *buf, STypeCodeStr *type_code_str, size_t *amount_of_names, bool memorize)
get_namespace_and_name
static EDemanglerErr parse_function(const char *sym, STypeCodeStr *type_code_str, char **demangled_function, size_t *chars_read)
ETCStateMachineErr
@ eTCStateMachineErrUnsupportedTypeCode
@ eTCStateMachineErrUncorrectTypeCode
@ eTCStateMachineErrMax
@ eTCStateMachineErrOK
@ eTCStateMachineErrAlloc
#define PARSE_POINTER(pointer_str)
static void tc_state_start(SStateInfo *state, STypeCodeStr *type_code_str)
static EDemanglerErr parse_function_args(const char *sym, char **demangled_args, size_t *read_chars)
struct SDataType SDataType
EObjectType
@ eObjectTypeGlobal
@ eObjectTypeStaticClassMember
@ eObjectTypeMax
static EDemanglerErr parse_microsoft_mangled_name(const char *sym, char **demangled_name, size_t *chars_read)
static bool copy_string_n(STypeCodeStr *type_code_str, const char *str_for_copy, size_t copy_len)
#define DECL_STATE_ACTION(action)
static void sstrinfo_free(SStrInfo *sstrinfo)
static void tc_state_end(SStateInfo *state, STypeCodeStr *type_code_str)
static void parse_type_modifier(SStateInfo *state, STypeCodeStr *type_code_str, const char *modifier_str)
#define SET_OPERATOR_CODE(str)
static DemList * abbr_types
static void parse_pointer(SStateInfo *state, STypeCodeStr *type_code_str, const char *pointer_str)
static void free_type_code_str_struct(STypeCodeStr *type_code_str)
static int get_template_params(const char *sym, size_t *amount_of_read_chars, char **str_type_code)
static char * type_code_str_get(STypeCodeStr *type_code_str)
static char * get_num(SStateInfo *state)
#define PROCESS_CASE(letter, type_str)
struct SStrInfo SStrInfo
#define SET_ACCESS_MODIFIER(letter, flag_set, modifier_str)
#define SET_THUNK_MODIFIER(letter, modifier_str)
static EDemanglerErr get_type_code_string(const char *sym, size_t *amount_of_read_chars, char **str_type_code)
static EDemanglerErr utf16be_to_utf8(const char *utf16be, size_t utf16be_len, char **utf8, size_t *utf8_len)
#define DEF_STATE_ACTION(action)
#define copy_string(type_code_str, str_for_copy)
static EDemanglerErr parse_function_type(const char *sym, SDataType *data_type, size_t *len, bool *is_static, bool *is_implicit_this_pointer)
#define SET_PTR_MODIFIER(letter, modifier_left, modifier_right)
static state_func const state_table[eTCStateMax]
static bool init_type_code_str_struct(STypeCodeStr *type_code_str)
#define ONE_LETTER_STATE(letter)
EDemanglerErr microsoft_demangle(SDemangler *demangler, char **demangled_name)
Do demangle for microsoft mangling scheme. Demangled name need to be free by user.
static EDemanglerErr parse_microsoft_rtti_mangled_name(const char *sym, char **demangled_name, size_t *chars_read)
#define GET_USER_DEF_TYPE_NAME(data_struct_str)
static DemList * abbr_names
static const char * get_calling_convention(char calling_convention)
static void run_state(SStateInfo *state_info, STypeCodeStr *type_code_str)
#define ONE_LETTER_ACTION(action, type)
static size_t get_template(const char *buf, SStrInfo *str_info, bool memorize)
static ssize_t utf16be_to_utf8_impl(const char *utf16be, size_t utf16be_len, char *utf8, size_t utf8_len)
static EDemanglerErr parse_data_type(const char *sym, SDataType *demangled_type, size_t *len)
#define MICROSOFT_NAME_LEN
static size_t get_ptr_modifier(const char *encoded, SDataType *ptr_modifier)
static EDemanglerErr get_storage_class(const char encoded, const char **storage_class)
void(* state_func)(struct SStateInfo *, STypeCodeStr *type_code_str)
static void parse_function_pointer(SStateInfo *state, STypeCodeStr *type_code_str, const char *pointer_str)
static ut16 read_be16(const void *src)
@ eTCStateV
@ eTCStateT
@ eTCStateStart
@ eTCStateE
@ eTCStateS
@ eTCStateM
@ eTCStateN
@ eTCStateG
@ eTCState_
@ eTCStateMax
@ eTCStateQ
@ eTCStateF
@ eTCStateK
@ eTCStateO
@ eTCStateZ
@ eTCStateI
@ eTCState
@ eTCStateB
@ eTCStateR
@ eTCStateA
@ eTCStateP
@ eTCStateW
@ eTCStateC
@ eTCStateX
@ eTCStateD
@ eTCStateEnd
@ eTCStateH
@ eTCStateU
@ eTCStateJ
#define NAME(action)
static void init_state_struct(SStateInfo *state, const char *buff_for_parsing)
struct SStateInfo SStateInfo
int type
Definition: mipsasm.c:17
const char * name
Definition: op.c:541
#define _(String)
Definition: opintl.h:53
#define O
Definition: rcond.c:14
#define J
Definition: rsp_idec.c:366
static RzSocket * s
Definition: rtr.c:28
#define RZ_STR_ISEMPTY(x)
Definition: rz_str.h:67
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_FREE(x)
Definition: rz_types.h:369
#define isalpha(c)
Definition: safe-ctype.h:125
#define isdigit(c)
Definition: safe-ctype.h:131
#define isxdigit(c)
Definition: safe-ctype.h:145
int ssize_t
Definition: sftypes.h:39
#define d(i)
Definition: sha256.c:44
#define b(i)
Definition: sha256.c:42
#define c(i)
Definition: sha256.c:43
#define a(i)
Definition: sha256.c:41
size_t len
Demangler object.
char * symbol
symbol that need to be demangled
const char * buff_for_parsing
size_t amount_of_read_chars
ETCStateMachineErr err
char type_str_buf[MICROSOFT_NAME_LEN]
Definition: z80asm.h:102
Definition: dis.h:43
#define V(handle, symbol)
#define fail(test)
Definition: tests.h:29
#define F(x)
Definition: tricore.h:111
Definition: dis.c:32
#define N
Definition: zip_err_str.c:8
#define E
Definition: zip_err_str.c:12
#define Z
Definition: zip_err_str.c:10
#define L
Definition: zip_err_str.c:7
#define G
Definition: zip_err_str.c:13
#define S
Definition: zip_err_str.c:9
int read(izstream &zs, T *x, Items items)
Definition: zstream.h:115