Rizin
unix-like reverse engineering framework and cli tools
c_cpp_parser.c File Reference
#include <stdio.h>
#include <rz_types.h>
#include <rz_list.h>
#include <rz_util/rz_file.h>
#include <rz_type.h>
#include <tree_sitter/api.h>
#include <types_parser.h>

Go to the source code of this file.

Classes

struct  rz_type_parser_t
 

Macros

#define TS_START_END(node, start, end)
 

Functions

static char * ts_node_sub_string (TSNode node, const char *cstr)
 
TSLanguagetree_sitter_c ()
 
CParserStatec_parser_state_new (HtPP *base_types, HtPP *callable_types)
 
void c_parser_state_free (CParserState *state)
 
void c_parser_state_free_keep_ht (CParserState *state)
 
void c_parser_state_reset_keep_ht (CParserState *state)
 
RZ_API RZ_OWN RzTypeParserrz_type_parser_new ()
 Creates a new instance of the C type parser. More...
 
RZ_API RZ_OWN RzTypeParserrz_type_parser_init (HtPP *types, HtPP *callables)
 Creates a new instance of the C type parser. More...
 
RZ_API void rz_type_parser_free (RZ_NONNULL RzTypeParser *parser)
 Frees the instance of the C type parser without destroying hashtables. More...
 
RZ_API void rz_type_parser_free_purge (RZ_NONNULL RzTypeParser *parser)
 Frees the instance of the C type parser and destroy the hashtables. More...
 
static int type_parse_string (CParserState *state, const char *code, char **error_msg)
 
RZ_API int rz_type_parse_string_stateless (RzTypeParser *parser, const char *code, char **error_msg)
 Parses the C type string reusing the existing parser state. More...
 
RZ_API int rz_type_parse_file_stateless (RzTypeParser *parser, const char *path, const char *dir, char **error_msg)
 Parses the C types file reusing the existing parser state. More...
 
RZ_API int rz_type_parse_file (RzTypeDB *typedb, const char *path, const char *dir, char **error_msg)
 Parses the C types file creating the new parser state. More...
 
RZ_API int rz_type_parse_string (RzTypeDB *typedb, const char *code, char **error_msg)
 Parses the C type string creating the new parser state. More...
 
RZ_API void rz_type_parse_reset (RzTypeDB *typedb)
 Reset the C parser state. More...
 
RZ_API RZ_OWN RzTyperz_type_parse_string_single (RzTypeParser *parser, const char *code, char **error_msg)
 Parses the single C type definition. More...
 
RZ_API RZ_OWN RzTyperz_type_parse_string_declaration_single (RzTypeParser *parser, const char *code, char **error_msg)
 Parses the single C type declaration. More...
 

Macro Definition Documentation

◆ TS_START_END

#define TS_START_END (   node,
  start,
  end 
)
Value:
do { \
start = ts_node_start_byte(node); \
end = ts_node_end_byte(node); \
} while (0)
uint32_t ts_node_start_byte(TSNode)
Definition: node.c:36
uint32_t ts_node_end_byte(TSNode)
Definition: node.c:406

Definition at line 13 of file c_cpp_parser.c.

Function Documentation

◆ c_parser_state_free()

void c_parser_state_free ( CParserState state)

Definition at line 55 of file c_cpp_parser.c.

55  {
56  ht_pp_free(state->forward);
57  ht_pp_free(state->types);
58  ht_pp_free(state->callables);
59  rz_strbuf_free(state->debug);
60  rz_strbuf_free(state->warnings);
61  rz_strbuf_free(state->errors);
62  free(state);
63  return;
64 }
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
RZ_API void rz_strbuf_free(RzStrBuf *sb)
Definition: strbuf.c:358
Definition: dis.h:43

References free(), and rz_strbuf_free().

Referenced by rz_type_parser_free_purge().

◆ c_parser_state_free_keep_ht()

void c_parser_state_free_keep_ht ( CParserState state)

Definition at line 66 of file c_cpp_parser.c.

66  {
67  ht_pp_free(state->forward);
68  rz_strbuf_free(state->debug);
69  rz_strbuf_free(state->warnings);
70  rz_strbuf_free(state->errors);
71  free(state);
72  return;
73 }

References free(), and rz_strbuf_free().

Referenced by rz_type_parser_free().

◆ c_parser_state_new()

CParserState* c_parser_state_new ( HtPP *  base_types,
HtPP *  callable_types 
)

Definition at line 33 of file c_cpp_parser.c.

33  {
35  if (!base_types) {
36  state->types = ht_pp_new0();
37  } else {
38  state->types = base_types;
39  }
40  if (!callable_types) {
41  state->callables = ht_pp_new0();
42  } else {
43  state->callables = callable_types;
44  }
45  // Forward definitions require to have a special hashtable
46  state->forward = ht_pp_new0();
47  // Initializing error/warning/debug messages buffers
48  state->errors = rz_strbuf_new("");
49  state->warnings = rz_strbuf_new("");
50  state->debug = rz_strbuf_new("");
51  state->verbose = false;
52  return state;
53 }
RZ_API RzStrBuf * rz_strbuf_new(const char *s)
Definition: strbuf.c:8
#define RZ_NEW0(x)
Definition: rz_types.h:284

References RZ_NEW0, and rz_strbuf_new().

Referenced by rz_type_parse_string(), rz_type_parser_init(), and rz_type_parser_new().

◆ c_parser_state_reset_keep_ht()

void c_parser_state_reset_keep_ht ( CParserState state)

Definition at line 75 of file c_cpp_parser.c.

75  {
76  rz_strbuf_free(state->debug);
77  rz_strbuf_free(state->warnings);
78  rz_strbuf_free(state->errors);
79  // Initializing error/warning/debug messages buffers
80  state->errors = rz_strbuf_new("");
81  state->warnings = rz_strbuf_new("");
82  state->debug = rz_strbuf_new("");
83  return;
84 }

References rz_strbuf_free(), and rz_strbuf_new().

Referenced by rz_type_parse_string_declaration_single(), and rz_type_parse_string_single().

◆ rz_type_parse_file()

RZ_API int rz_type_parse_file ( RzTypeDB typedb,
const char *  path,
const char *  dir,
char **  error_msg 
)

Parses the C types file creating the new parser state.

Parameters
typedbRzTypeDB instance
pathThe path to the C file to parse
dirThe directory where the C file is located
error_msgA pointer where all error messages will be stored

Definition at line 260 of file c_cpp_parser.c.

260  {
261  size_t read_bytes = 0;
262  char *source_code = rz_file_slurp(path, &read_bytes);
263  if (!source_code || !read_bytes) {
264  free(source_code);
265  return -1;
266  }
267  RZ_LOG_DEBUG("File size is %" PFMT64d " bytes, read %zu bytes\n", rz_file_size(path), read_bytes);
268  int result = rz_type_parse_string(typedb, source_code, error_msg);
269  free(source_code);
270  return result;
271 }
RZ_API int rz_type_parse_string(RzTypeDB *typedb, const char *code, char **error_msg)
Parses the C type string creating the new parser state.
Definition: c_cpp_parser.c:280
static static fork const void static count static fd const char const char static newpath const char static path const char path
Definition: sflib.h:35
RZ_API RZ_OWN char * rz_file_slurp(const char *str, RZ_NULLABLE size_t *usz)
Definition: file.c:454
RZ_API ut64 rz_file_size(const char *str)
Definition: file.c:205
#define RZ_LOG_DEBUG(fmtstr,...)
Definition: rz_log.h:49
#define PFMT64d
Definition: rz_types.h:394

References free(), path, PFMT64d, rz_file_size(), rz_file_slurp(), RZ_LOG_DEBUG, and rz_type_parse_string().

Referenced by cmd_print_format(), and rz_core_run_script().

◆ rz_type_parse_file_stateless()

RZ_API int rz_type_parse_file_stateless ( RzTypeParser parser,
const char *  path,
const char *  dir,
char **  error_msg 
)

Parses the C types file reusing the existing parser state.

Parameters
parserRzTypeParser instance
pathThe path to the C file to parse
dirThe directory where the C file is located
error_msgA pointer where all error messages will be stored

Definition at line 239 of file c_cpp_parser.c.

239  {
240  size_t read_bytes = 0;
241  char *source_code = rz_file_slurp(path, &read_bytes);
242  if (!source_code || !read_bytes) {
243  free(source_code);
244  return -1;
245  }
246  RZ_LOG_DEBUG("File size is %" PFMT64d " bytes, read %zu bytes\n", rz_file_size(path), read_bytes);
247  int result = rz_type_parse_string_stateless(parser, source_code, error_msg);
248  free(source_code);
249  return result;
250 }
RZ_API int rz_type_parse_string_stateless(RzTypeParser *parser, const char *code, char **error_msg)
Parses the C type string reusing the existing parser state.
Definition: c_cpp_parser.c:227

References free(), cmd_descs_generate::parser, path, PFMT64d, rz_file_size(), rz_file_slurp(), RZ_LOG_DEBUG, and rz_type_parse_string_stateless().

Referenced by rz_types_open_file().

◆ rz_type_parse_reset()

RZ_API void rz_type_parse_reset ( RzTypeDB typedb)

Reset the C parser state.

Parameters
typedbRzTypeDB instance

Definition at line 297 of file c_cpp_parser.c.

297  {
298  rz_type_parser_free(typedb->parser);
299  typedb->parser = rz_type_parser_new();
300 }
RZ_API RZ_OWN RzTypeParser * rz_type_parser_new()
Creates a new instance of the C type parser.
Definition: c_cpp_parser.c:96
RZ_API void rz_type_parser_free(RZ_NONNULL RzTypeParser *parser)
Frees the instance of the C type parser without destroying hashtables.
Definition: c_cpp_parser.c:127
RzTypeParser * parser
Definition: rz_type.h:37

References rz_type_db_t::parser, rz_type_parser_free(), and rz_type_parser_new().

Referenced by rz_type_del_all_handler().

◆ rz_type_parse_string()

RZ_API int rz_type_parse_string ( RzTypeDB typedb,
const char *  code,
char **  error_msg 
)

Parses the C type string creating the new parser state.

Parameters
typedbRzTypeDB instance
codeThe C type itself
error_msgA pointer where all error messages will be stored

Definition at line 280 of file c_cpp_parser.c.

280  {
281  bool verbose = true;
282  // Create new C parser state
283  CParserState *state = c_parser_state_new(typedb->types, typedb->callables);
284  if (!state) {
285  eprintf("CParserState initialization error!\n");
286  return -1;
287  }
288  state->verbose = verbose;
289  return type_parse_string(state, code, error_msg);
290 }
static int type_parse_string(CParserState *state, const char *code, char **error_msg)
Definition: c_cpp_parser.c:141
CParserState * c_parser_state_new(HtPP *base_types, HtPP *callable_types)
Definition: c_cpp_parser.c:33
#define eprintf(x, y...)
Definition: rlcc.c:7
Definition: inftree9.h:24
HtPP * callables
Definition: rz_type.h:35
HtPP * types
Definition: rz_type.h:33
static int verbose
Definition: z80asm.c:73

References c_parser_state_new(), rz_type_db_t::callables, eprintf, type_parse_string(), rz_type_db_t::types, and verbose.

Referenced by rz_type_parse_file().

◆ rz_type_parse_string_declaration_single()

RZ_API RZ_OWN RzType* rz_type_parse_string_declaration_single ( RzTypeParser parser,
const char *  code,
char **  error_msg 
)

Parses the single C type declaration.

Parameters
parserRzTypeParser parser instance
codeThe C type itself
error_msgA pointer where all error messages will be stored

Definition at line 411 of file c_cpp_parser.c.

411  {
412  if (error_msg) {
413  *error_msg = NULL;
414  }
415  // Create a parser.
416  TSParser *tsparser = ts_parser_new();
417  // Set the parser's language (C in this case)
419 
420  TSTree *tree = ts_parser_parse_string(tsparser, NULL, code, strlen(code));
421 
422  // Get the root node of the syntax tree.
423  TSNode root_node = ts_tree_root_node(tree);
424  int root_node_child_count = ts_node_named_child_count(root_node);
425  if (!root_node_child_count) {
426  parser_warning(parser->state, "Root node is empty!\n");
427  ts_tree_delete(tree);
428  ts_parser_delete(tsparser);
429  return NULL;
430  }
431 
432  // Some debugging
433  if (parser->state->verbose) {
434  parser_debug(parser->state, "code: \"%s\"\n", code);
435  parser_debug(parser->state, "root_node (%d children): %s\n", root_node_child_count, ts_node_type(root_node));
436  // Print the syntax tree as an S-expression.
437  char *string = ts_node_string(root_node);
438  parser_debug(parser->state, "Syntax tree: %s\n", string);
439  free(string);
440  }
441 
442  // At first step we should handle defines
443  // #define
444  // #if / #ifdef
445  // #else
446  // #endif
447  // After that, we should process include files and #error/#warning/#pragma
448  // Temporarily we could just run preprocessing step using tccpp code
449  //
450  // And only after that - run the normal C/C++ syntax parsing
451 
452  // Filter types function prototypes and start parsing
453  int i = 0, result = 0;
454  ParserTypePair *tpair = NULL;
455  for (i = 0; i < root_node_child_count; i++) {
456  parser_debug(parser->state, "Processing %d child...\n", i);
457  TSNode child = ts_node_named_child(root_node, i);
458  if (!parse_declaration_node(parser->state, child, code, &tpair)) {
459  break;
460  }
461  }
462 
463  // If there were errors during the parser then the result is different from 0
464  if (result || !tpair) {
465  char *error_msgs = rz_strbuf_drain_nofree(parser->state->errors);
466  RZ_LOG_DEBUG("Errors:\n");
467  RZ_LOG_DEBUG("%s", error_msgs);
468  char *warning_msgs = rz_strbuf_drain_nofree(parser->state->warnings);
469  RZ_LOG_DEBUG("Warnings:\n");
470  RZ_LOG_DEBUG("%s", warning_msgs);
471  if (error_msg) {
472  *error_msg = strdup(error_msgs);
473  }
474  free(error_msgs);
475  free(warning_msgs);
476  }
477  if (parser->state->verbose) {
478  char *debug_msgs = rz_strbuf_drain_nofree(parser->state->debug);
479  RZ_LOG_DEBUG("%s", debug_msgs);
480  free(debug_msgs);
481  }
482 
483  // After everything parsed, we should preserve the base type database
484  // Also we don't free the parser state, just reset the buffers for new use
486  ts_tree_delete(tree);
487  ts_parser_delete(tsparser);
488  return tpair ? tpair->type : NULL;
489 }
lzma_index ** i
Definition: index.h:629
const char * ts_node_type(TSNode)
Definition: node.c:420
void ts_parser_delete(TSParser *parser)
Definition: parser.c:1725
TSNode ts_node_named_child(TSNode, uint32_t)
Definition: node.c:496
char * ts_node_string(TSNode)
Definition: node.c:426
uint32_t ts_node_named_child_count(TSNode)
Definition: node.c:611
void ts_tree_delete(TSTree *self)
Definition: tree.c:26
TSNode ts_tree_root_node(const TSTree *self)
Definition: tree.c:36
bool ts_parser_set_language(TSParser *self, const TSLanguage *language)
Definition: parser.c:1754
TSParser * ts_parser_new(void)
Definition: parser.c:1704
TSTree * ts_parser_parse_string(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length)
Definition: parser.c:1945
TSLanguage * tree_sitter_c()
Definition: parser.c:79645
void c_parser_state_reset_keep_ht(CParserState *state)
Definition: c_cpp_parser.c:75
#define NULL
Definition: cris-opc.c:27
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
RZ_API RZ_OWN char * rz_strbuf_drain_nofree(RzStrBuf *sb)
Definition: strbuf.c:349
RzType * type
Definition: types_parser.h:24
Definition: api.h:92
Definition: tree.h:15
int parse_declaration_node(CParserState *state, TSNode node, const char *text, ParserTypePair **tpair)
void parser_debug(CParserState *state, const char *fmt,...)
Definition: types_parser.c:37
void parser_warning(CParserState *state, const char *fmt,...)
Definition: types_parser.c:55

References c_parser_state_reset_keep_ht(), free(), i, NULL, parse_declaration_node(), cmd_descs_generate::parser, parser_debug(), parser_warning(), RZ_LOG_DEBUG, rz_strbuf_drain_nofree(), strdup(), tree_sitter_c(), ts_node_named_child(), ts_node_named_child_count(), ts_node_string(), ts_node_type(), ts_parser_delete(), ts_parser_new(), ts_parser_parse_string(), ts_parser_set_language(), ts_tree_delete(), ts_tree_root_node(), and ParserTypePair::type.

Referenced by rz_analysis_function_set_type_str().

◆ rz_type_parse_string_single()

RZ_API RZ_OWN RzType* rz_type_parse_string_single ( RzTypeParser parser,
const char *  code,
char **  error_msg 
)

Parses the single C type definition.

Parameters
parserRzTypeParser parser instance
codeThe C type itself
error_msgA pointer where all error messages will be stored

Definition at line 309 of file c_cpp_parser.c.

309  {
311  if (error_msg) {
312  *error_msg = NULL;
313  }
314  // Create a parser.
315  TSParser *tsparser = ts_parser_new();
316  // Set the parser's language (C in this case)
318 
319  // Note, that the original C grammar doesn't have support for alternate roots,
320  // see:
321  // - https://github.com/tree-sitter/tree-sitter-c/issues/65
322  // - https://github.com/tree-sitter/tree-sitter/issues/1105
323  // Thus, we use our own patched C grammar that has an additional rule
324  // for type descriptor, but we use the `__TYPE_EXPRESSION` prefix for every
325  // such type descriptor expression.
326  char *patched_code = rz_str_newf("__TYPE_EXPRESSION %s", code);
327 
328  TSTree *tree = ts_parser_parse_string(tsparser, NULL, patched_code, strlen(patched_code));
329 
330  // Get the root node of the syntax tree.
331  TSNode root_node = ts_tree_root_node(tree);
332  int root_node_child_count = ts_node_named_child_count(root_node);
333  if (!root_node_child_count) {
334  parser_warning(parser->state, "Root node is empty!\n");
335  ts_tree_delete(tree);
336  ts_parser_delete(tsparser);
337  free(patched_code);
338  return NULL;
339  }
340 
341  // Some debugging
342  if (parser->state->verbose) {
343  parser_debug(parser->state, "code: \"%s\"\n", code);
344  parser_debug(parser->state, "patched code: \"%s\"\n", patched_code);
345  parser_debug(parser->state, "root_node (%d children): %s\n", root_node_child_count, ts_node_type(root_node));
346  // Print the syntax tree as an S-expression.
347  char *string = ts_node_string(root_node);
348  parser_debug(parser->state, "Syntax tree: %s\n", string);
349  free(string);
350  }
351 
352  // At first step we should handle defines
353  // #define
354  // #if / #ifdef
355  // #else
356  // #endif
357  // After that, we should process include files and #error/#warning/#pragma
358  // Temporarily we could just run preprocessing step using tccpp code
359  //
360  // And only after that - run the normal C/C++ syntax parsing
361 
362  // Filter types function prototypes and start parsing
363  int i = 0, result = 0;
364  ParserTypePair *tpair = NULL;
365  for (i = 0; i < root_node_child_count; i++) {
366  parser_debug(parser->state, "Processing %d child...\n", i);
367  TSNode child = ts_node_named_child(root_node, i);
368  if (!parse_type_descriptor_single(parser->state, child, patched_code, &tpair)) {
369  break;
370  }
371  }
372 
373  // If there were errors during the parser then the result is different from 0
374  if (result || !tpair) {
375  char *error_msgs = rz_strbuf_drain_nofree(parser->state->errors);
376  RZ_LOG_DEBUG("Errors:\n");
377  RZ_LOG_DEBUG("%s", error_msgs);
378  char *warning_msgs = rz_strbuf_drain_nofree(parser->state->warnings);
379  RZ_LOG_DEBUG("Warnings:\n");
380  RZ_LOG_DEBUG("%s", warning_msgs);
381  if (error_msg) {
382  *error_msg = strdup(error_msgs);
383  }
384  free(error_msgs);
385  free(warning_msgs);
386  }
387  if (parser->state->verbose) {
388  char *debug_msgs = rz_strbuf_drain_nofree(parser->state->debug);
389  RZ_LOG_DEBUG("%s", debug_msgs);
390  free(debug_msgs);
391  }
392 
393  // After everything parsed, we should preserve the base type database
394  // Also we don't free the parser state, just reset the buffers for new use
396  ts_tree_delete(tree);
397  ts_parser_delete(tsparser);
398  free(patched_code);
399  RzType *ret = tpair ? tpair->type : NULL;
400  free(tpair);
401  return ret;
402 }
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API char * rz_str_newf(const char *fmt,...) RZ_PRINTF_CHECK(1
int parse_type_descriptor_single(CParserState *state, TSNode node, const char *text, ParserTypePair **tpair)

References c_parser_state_reset_keep_ht(), free(), i, NULL, parse_type_descriptor_single(), cmd_descs_generate::parser, parser_debug(), parser_warning(), RZ_LOG_DEBUG, rz_return_val_if_fail, rz_str_newf(), rz_strbuf_drain_nofree(), strdup(), tree_sitter_c(), ts_node_named_child(), ts_node_named_child_count(), ts_node_string(), ts_node_type(), ts_parser_delete(), ts_parser_new(), ts_parser_parse_string(), ts_parser_set_language(), ts_tree_delete(), ts_tree_root_node(), and ParserTypePair::type.

Referenced by get_struct_type(), get_typedef_type(), get_union_type(), parse_enum_type(), parse_struct_member(), parse_type(), parse_type_string_cached(), parse_typedef(), rz_analysis_dwarf_integrate_functions(), rz_analysis_function_signature_type_handler(), rz_analysis_function_vars_bp_handler(), rz_analysis_function_vars_regs_handler(), rz_analysis_function_vars_sp_handler(), rz_analysis_function_vars_type_handler(), rz_analysis_global_variable_add_handler(), rz_analysis_global_variable_retype_handler(), rz_core_types_link(), type_match(), typelinks_load_sdb(), types_xrefs(), var_type_clone_or_default_type(), var_type_set_str(), and variable_set_type().

◆ rz_type_parse_string_stateless()

RZ_API int rz_type_parse_string_stateless ( RzTypeParser parser,
const char *  code,
char **  error_msg 
)

Parses the C type string reusing the existing parser state.

Parameters
parserRzTypeParser instance
codeThe C type itself
error_msgA pointer where all error messages will be stored

Definition at line 227 of file c_cpp_parser.c.

227  {
228  return type_parse_string(parser->state, code, error_msg);
229 }

References cmd_descs_generate::parser, and type_parse_string().

Referenced by rz_core_bin_export_info(), rz_type_db_edit_base_type(), rz_type_parse_file_stateless(), rz_types_define(), and rz_types_open_file().

◆ rz_type_parser_free()

RZ_API void rz_type_parser_free ( RZ_NONNULL RzTypeParser parser)

Frees the instance of the C type parser without destroying hashtables.

Definition at line 127 of file c_cpp_parser.c.

127  {
128  // We do not destroy HT by default since it might be used after
130  free(parser);
131 }
void c_parser_state_free_keep_ht(CParserState *state)
Definition: c_cpp_parser.c:66

References c_parser_state_free_keep_ht(), free(), and cmd_descs_generate::parser.

Referenced by rz_type_db_free(), rz_type_db_purge(), and rz_type_parse_reset().

◆ rz_type_parser_free_purge()

RZ_API void rz_type_parser_free_purge ( RZ_NONNULL RzTypeParser parser)

Frees the instance of the C type parser and destroy the hashtables.

Definition at line 136 of file c_cpp_parser.c.

136  {
137  c_parser_state_free(parser->state);
138  free(parser);
139 }
void c_parser_state_free(CParserState *state)
Definition: c_cpp_parser.c:55

References c_parser_state_free(), free(), and cmd_descs_generate::parser.

◆ rz_type_parser_init()

RZ_API RZ_OWN RzTypeParser* rz_type_parser_init ( HtPP *  types,
HtPP *  callables 
)

Creates a new instance of the C type parser.

Creates the new instance of the C types parser preloaded hashtables for RzBaseTypes and RzCallable types. It will use provided hashtables for storing the parsed types as well.

Parameters
typeRzBaseTypes hashtable to preload into the parser state
typeRzCallable hashtable to preload into the parser state

Definition at line 115 of file c_cpp_parser.c.

115  {
117  if (!parser) {
118  return NULL;
119  }
120  parser->state = c_parser_state_new(types, callables);
121  return parser;
122 }
insn_type_descr_t types[]
Definition: or1k_disas.c:7

References c_parser_state_new(), NULL, cmd_descs_generate::parser, RZ_NEW0, and types.

Referenced by rz_type_db_new(), and rz_type_db_purge().

◆ rz_type_parser_new()

RZ_API RZ_OWN RzTypeParser* rz_type_parser_new ( )

Creates a new instance of the C type parser.

Creates the new instance of the C types parser with empty hashtables for RzBaseTypes and RzCallable types.

Definition at line 96 of file c_cpp_parser.c.

96  {
98  if (!parser) {
99  return NULL;
100  }
101  parser->state = c_parser_state_new(NULL, NULL);
102  return parser;
103 }

References c_parser_state_new(), NULL, cmd_descs_generate::parser, and RZ_NEW0.

Referenced by rz_type_parse_reset().

◆ tree_sitter_c()

TSLanguage* tree_sitter_c ( )

Definition at line 79645 of file parser.c.

79645  {
79646  static const TSLanguage language = {
79648  .symbol_count = SYMBOL_COUNT,
79649  .alias_count = ALIAS_COUNT,
79650  .token_count = TOKEN_COUNT,
79651  .external_token_count = EXTERNAL_TOKEN_COUNT,
79652  .state_count = STATE_COUNT,
79653  .large_state_count = LARGE_STATE_COUNT,
79654  .production_id_count = PRODUCTION_ID_COUNT,
79655  .field_count = FIELD_COUNT,
79656  .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
79657  .parse_table = &ts_parse_table[0][0],
79658  .small_parse_table = ts_small_parse_table,
79659  .small_parse_table_map = ts_small_parse_table_map,
79660  .parse_actions = ts_parse_actions,
79661  .symbol_names = ts_symbol_names,
79662  .field_names = ts_field_names,
79663  .field_map_slices = ts_field_map_slices,
79664  .field_map_entries = ts_field_map_entries,
79665  .symbol_metadata = ts_symbol_metadata,
79666  .public_symbol_map = ts_symbol_map,
79667  .alias_map = ts_non_terminal_alias_map,
79668  .alias_sequences = &ts_alias_sequences[0][0],
79669  .lex_modes = ts_lex_modes,
79670  .lex_fn = ts_lex,
79671  .keyword_lex_fn = ts_lex_keywords,
79672  .keyword_capture_token = sym_identifier,
79673  };
79674  return &language;
79675 }
uint32_t version
Definition: parser.h:91
static const TSFieldMapEntry ts_field_map_entries[]
Definition: parser.c:2086
static bool ts_lex_keywords(TSLexer *lexer, TSStateId state)
Definition: parser.c:4305
static const uint16_t ts_non_terminal_alias_map[]
Definition: parser.c:2343
static const uint32_t ts_small_parse_table_map[]
Definition: parser.c:76844
static const TSSymbol ts_symbol_map[]
Definition: parser.c:570
static bool ts_lex(TSLexer *lexer, TSStateId state)
Definition: parser.c:2347
#define FIELD_COUNT
Definition: parser.c:15
static const TSParseActionEntry ts_parse_actions[]
Definition: parser.c:78007
static const uint16_t ts_small_parse_table[]
Definition: parser.c:41251
@ sym_identifier
Definition: parser.c:20
#define STATE_COUNT
Definition: parser.c:9
static const TSLexMode ts_lex_modes[STATE_COUNT]
Definition: parser.c:5480
#define LARGE_STATE_COUNT
Definition: parser.c:10
#define SYMBOL_COUNT
Definition: parser.c:11
#define PRODUCTION_ID_COUNT
Definition: parser.c:17
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT]
Definition: parser.c:2004
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT]
Definition: parser.c:7048
#define TOKEN_COUNT
Definition: parser.c:13
static const TSSymbolMetadata ts_symbol_metadata[]
Definition: parser.c:846
#define ALIAS_COUNT
Definition: parser.c:12
static const char *const ts_symbol_names[]
Definition: parser.c:294
static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH]
Definition: parser.c:2306
#define MAX_ALIAS_SEQUENCE_LENGTH
Definition: parser.c:16
static const char *const ts_field_names[]
Definition: parser.c:1975
#define EXTERNAL_TOKEN_COUNT
Definition: parser.c:14
#define LANGUAGE_VERSION
Definition: parser.c:8

References ALIAS_COUNT, EXTERNAL_TOKEN_COUNT, FIELD_COUNT, LANGUAGE_VERSION, LARGE_STATE_COUNT, MAX_ALIAS_SEQUENCE_LENGTH, PRODUCTION_ID_COUNT, STATE_COUNT, sym_identifier, SYMBOL_COUNT, TOKEN_COUNT, ts_alias_sequences, ts_field_map_entries, ts_field_map_slices, ts_field_names, ts_lex(), ts_lex_keywords(), ts_lex_modes, ts_non_terminal_alias_map, ts_parse_actions, ts_parse_table, ts_small_parse_table, ts_small_parse_table_map, ts_symbol_map, ts_symbol_metadata, ts_symbol_names, and TSLanguage::version.

Referenced by rz_type_parse_string_declaration_single(), rz_type_parse_string_single(), and type_parse_string().

◆ ts_node_sub_string()

static char* ts_node_sub_string ( TSNode  node,
const char *  cstr 
)
static

Definition at line 19 of file c_cpp_parser.c.

19  {
20  ut32 start, end;
21  TS_START_END(node, start, end);
22  return rz_str_newf("%.*s", end - start, cstr + start);
23 }
#define TS_START_END(node, start, end)
Definition: c_cpp_parser.c:13
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
uint32_t ut32

References test_evm::end, rz_str_newf(), start, and TS_START_END.

Referenced by type_parse_string().

◆ type_parse_string()

static int type_parse_string ( CParserState state,
const char *  code,
char **  error_msg 
)
static

Definition at line 141 of file c_cpp_parser.c.

141  {
142  // Create a parser.
144  // Set the parser's language (C in this case)
146 
147  TSTree *tree = ts_parser_parse_string(parser, NULL, code, strlen(code));
148 
149  // Get the root node of the syntax tree.
150  TSNode root_node = ts_tree_root_node(tree);
151  int root_node_child_count = ts_node_named_child_count(root_node);
152  if (!root_node_child_count) {
153  parser_warning(state, "Root node is empty!\n");
154  ts_tree_delete(tree);
156  return 0;
157  }
158 
159  // Some debugging
160  if (state->verbose) {
161  parser_debug(state, "root_node (%d children): %s\n", root_node_child_count, ts_node_type(root_node));
162  // Print the syntax tree as an S-expression.
163  char *string = ts_node_string(root_node);
164  parser_debug(state, "Syntax tree: %s\n", string);
165  free(string);
166  }
167 
168  // At first step we should handle defines
169  // #define
170  // #if / #ifdef
171  // #else
172  // #endif
173  // After that, we should process include files and #error/#warning/#pragma
174  // Temporarily we could just run preprocessing step using tccpp code
175  //
176  // And only after that - run the normal C/C++ syntax parsing
177 
178  // Filter types function prototypes and start parsing
179  int i = 0, result = 0;
180  for (i = 0; i < root_node_child_count; i++) {
181  TSNode child = ts_node_named_child(root_node, i);
182  // We skip ";" or "," - empty expressions
183  char *node_code = ts_node_sub_string(child, code);
184  if (!strcmp(node_code, ";") || !strcmp(node_code, ",")) {
185  free(node_code);
186  continue;
187  }
188  free(node_code);
189  parser_debug(state, "Processing %d child...\n", i);
190  result += parse_type_nodes_save(state, child, code);
191  }
192 
193  // If there were errors during the parser then the result is different from 0
194  if (result) {
195  char *error_msgs = rz_strbuf_drain_nofree(state->errors);
196  RZ_LOG_DEBUG("Errors:\n");
197  RZ_LOG_DEBUG("%s", error_msgs);
198  char *warning_msgs = rz_strbuf_drain_nofree(state->warnings);
199  RZ_LOG_DEBUG("Warnings:\n");
200  RZ_LOG_DEBUG("%s", warning_msgs);
201  if (error_msg) {
202  *error_msg = strdup(error_msgs);
203  }
204  free(error_msgs);
205  free(warning_msgs);
206  }
207  if (state->verbose) {
208  char *debug_msgs = rz_strbuf_drain_nofree(state->debug);
209  RZ_LOG_DEBUG("%s", debug_msgs);
210  free(debug_msgs);
211  }
212 
213  // After everything parsed, we should preserve the base type database
214  // And the state of the parser - anonymous structs, forward declarations, etc
215  ts_tree_delete(tree);
217  return result;
218 }
static char * ts_node_sub_string(TSNode node, const char *cstr)
Definition: c_cpp_parser.c:19
int parse_type_nodes_save(CParserState *state, TSNode node, const char *text)

References free(), i, NULL, parse_type_nodes_save(), cmd_descs_generate::parser, parser_debug(), parser_warning(), RZ_LOG_DEBUG, rz_strbuf_drain_nofree(), strdup(), tree_sitter_c(), ts_node_named_child(), ts_node_named_child_count(), ts_node_string(), ts_node_sub_string(), ts_node_type(), ts_parser_delete(), ts_parser_new(), ts_parser_parse_string(), ts_parser_set_language(), ts_tree_delete(), and ts_tree_root_node().

Referenced by rz_type_parse_string(), and rz_type_parse_string_stateless().