Rizin
unix-like reverse engineering framework and cli tools
subtree.h
Go to the documentation of this file.
1 #ifndef TREE_SITTER_SUBTREE_H_
2 #define TREE_SITTER_SUBTREE_H_
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8 #include <limits.h>
9 #include <stdbool.h>
10 #include <stdio.h>
11 #include "./length.h"
12 #include "./array.h"
13 #include "./error_costs.h"
14 #include "./host.h"
15 #include "tree_sitter/api.h"
16 #include "tree_sitter/parser.h"
17 
18 #define TS_TREE_STATE_NONE USHRT_MAX
19 #define NULL_SUBTREE ((Subtree) {.ptr = NULL})
20 
21 // The serialized state of an external scanner.
22 //
23 // Every time an external token subtree is created after a call to an
24 // external scanner, the scanner's `serialize` function is called to
25 // retrieve a serialized copy of its state. The bytes are then copied
26 // onto the subtree itself so that the scanner's state can later be
27 // restored using its `deserialize` function.
28 //
29 // Small byte arrays are stored inline, and long ones are allocated
30 // separately on the heap.
31 typedef struct {
32  union {
33  char *long_data;
34  char short_data[24];
35  };
38 
39 // A compact representation of a subtree.
40 //
41 // This representation is used for small leaf nodes that are not
42 // errors, and were not created by an external scanner.
43 //
44 // The idea behind the layout of this struct is that the `is_inline`
45 // bit will fall exactly into the same location as the least significant
46 // bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
47 // Because of alignment, for any valid pointer this will be 0, giving
48 // us the opportunity to make use of this bit to signify whether to use
49 // the pointer or the inline struct.
51 
52 #define SUBTREE_BITS \
53  bool visible : 1; \
54  bool named : 1; \
55  bool extra : 1; \
56  bool has_changes : 1; \
57  bool is_missing : 1; \
58  bool is_keyword : 1;
59 
60 #define SUBTREE_SIZE \
61  uint8_t padding_columns; \
62  uint8_t padding_rows : 4; \
63  uint8_t lookahead_bytes : 4; \
64  uint8_t padding_bytes; \
65  uint8_t size_bytes;
66 
67 #if TS_BIG_ENDIAN
68 #if TS_PTR_SIZE == 32
69 
70 struct SubtreeInlineData {
74  bool unused : 1;
75  bool is_inline : 1;
77 };
78 
79 #else
80 
81 struct SubtreeInlineData {
86  bool unused : 1;
87  bool is_inline : 1;
88 };
89 
90 #endif
91 #else
92 
94  bool is_inline : 1;
99 };
100 
101 #endif
102 
103 #undef SUBTREE_BITS
104 #undef SUBTREE_SIZE
105 
106 // A heap-allocated representation of a subtree.
107 //
108 // This representation is used for parent nodes, external tokens,
109 // errors, and other leaf nodes whose data is too large to fit into
110 // the inline representation.
111 typedef struct {
112  volatile uint32_t ref_count;
120 
121  bool visible : 1;
122  bool named : 1;
123  bool extra : 1;
124  bool fragile_left : 1;
125  bool fragile_right : 1;
126  bool has_changes : 1;
129  bool is_missing : 1;
130  bool is_keyword : 1;
131 
132  union {
133  // Non-terminal subtrees (`child_count > 0`)
134  struct {
141  struct {
142  TSSymbol symbol;
143  TSStateId parse_state;
144  } first_leaf;
145  };
146 
147  // External terminal subtrees (`child_count == 0 && has_external_tokens`)
149 
150  // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
152  };
154 
155 // The fundamental building block of a syntax tree.
156 typedef union {
159 } Subtree;
160 
161 // Like Subtree, but mutable.
162 typedef union {
166 
167 typedef Array(Subtree) SubtreeArray;
168 typedef Array(MutableSubtree) MutableSubtreeArray;
169 
170 typedef struct {
171  MutableSubtreeArray free_trees;
172  MutableSubtreeArray tree_stack;
173 } SubtreePool;
174 
175 void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
177 
178 void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
179 void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
180 void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
181 void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
182 void ts_subtree_array_reverse(SubtreeArray *);
183 
186 
189  TSStateId, bool, bool, bool, const TSLanguage *
190 );
193 );
194 MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
195 Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
206 char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
210 
211 #define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
212 
213 static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
214 static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
215 static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
216 static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
217 static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
218 static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
219 static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
220 static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
221 static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
222 
223 #undef SUBTREE_GET
224 
225 // Get the size needed to store a heap-allocated subtree with the given
226 // number of children.
227 static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
228  return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
229 }
230 
231 // Get a subtree's children, which are allocated immediately before the
232 // tree's own heap data.
233 #define ts_subtree_children(self) \
234  ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
235 
236 static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
237  if (self->data.is_inline) {
238  self->data.extra = is_extra;
239  } else {
240  self->ptr->extra = is_extra;
241  }
242 }
243 
245  if (self.data.is_inline) return self.data.symbol;
246  if (self.ptr->child_count == 0) return self.ptr->symbol;
247  return self.ptr->first_leaf.symbol;
248 }
249 
251  if (self.data.is_inline) return self.data.parse_state;
252  if (self.ptr->child_count == 0) return self.ptr->parse_state;
253  return self.ptr->first_leaf.parse_state;
254 }
255 
256 static inline Length ts_subtree_padding(Subtree self) {
257  if (self.data.is_inline) {
258  Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
259  return result;
260  } else {
261  return self.ptr->padding;
262  }
263 }
264 
265 static inline Length ts_subtree_size(Subtree self) {
266  if (self.data.is_inline) {
267  Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
268  return result;
269  } else {
270  return self.ptr->size;
271  }
272 }
273 
274 static inline Length ts_subtree_total_size(Subtree self) {
275  return length_add(ts_subtree_padding(self), ts_subtree_size(self));
276 }
277 
279  return ts_subtree_total_size(self).bytes;
280 }
281 
283  return self.data.is_inline ? 0 : self.ptr->child_count;
284 }
285 
287  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
288 }
289 
291  return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
292 }
293 
295  if (ts_subtree_child_count(self) > 0) {
296  return self.ptr->visible_child_count;
297  } else {
298  return 0;
299  }
300 }
301 
303  if (ts_subtree_missing(self)) {
305  } else {
306  return self.data.is_inline ? 0 : self.ptr->error_cost;
307  }
308 }
309 
311  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
312 }
313 
315  if (ts_subtree_child_count(self) > 0) {
316  return self.ptr->production_id;
317  } else {
318  return 0;
319  }
320 }
321 
322 static inline bool ts_subtree_fragile_left(Subtree self) {
323  return self.data.is_inline ? false : self.ptr->fragile_left;
324 }
325 
326 static inline bool ts_subtree_fragile_right(Subtree self) {
327  return self.data.is_inline ? false : self.ptr->fragile_right;
328 }
329 
330 static inline bool ts_subtree_has_external_tokens(Subtree self) {
331  return self.data.is_inline ? false : self.ptr->has_external_tokens;
332 }
333 
334 static inline bool ts_subtree_depends_on_column(Subtree self) {
335  return self.data.is_inline ? false : self.ptr->depends_on_column;
336 }
337 
338 static inline bool ts_subtree_is_fragile(Subtree self) {
339  return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
340 }
341 
342 static inline bool ts_subtree_is_error(Subtree self) {
343  return ts_subtree_symbol(self) == ts_builtin_sym_error;
344 }
345 
346 static inline bool ts_subtree_is_eof(Subtree self) {
347  return ts_subtree_symbol(self) == ts_builtin_sym_end;
348 }
349 
351  Subtree result;
352  result.data = self.data;
353  return result;
354 }
355 
357  MutableSubtree result;
358  result.data = self.data;
359  return result;
360 }
361 
362 #ifdef __cplusplus
363 }
364 #endif
365 
366 #endif // TREE_SITTER_SUBTREE_H_
#define false
#define ERROR_COST_PER_RECOVERY
Definition: error_costs.h:5
#define ERROR_COST_PER_MISSING_TREE
Definition: error_costs.h:6
static Length length_add(Length len1, Length len2)
Definition: length.h:25
string FILE
Definition: benchmark.py:21
uint16_t TSStateId
Definition: parser.h:16
#define ts_builtin_sym_end
Definition: parser.h:13
uint16_t TSSymbol
Definition: parser.h:19
#define ts_builtin_sym_error
Definition: parser.h:12
unsigned short uint16_t
Definition: sftypes.h:30
int int32_t
Definition: sftypes.h:33
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
uint32_t length
Definition: subtree.h:36
Definition: length.h:9
uint32_t bytes
Definition: length.h:10
uint32_t visible_child_count
Definition: subtree.h:135
uint32_t named_child_count
Definition: subtree.h:136
int32_t dynamic_precedence
Definition: subtree.h:139
TSSymbol symbol
Definition: subtree.h:118
uint32_t repeat_depth
Definition: subtree.h:138
bool depends_on_column
Definition: subtree.h:128
bool fragile_right
Definition: subtree.h:125
bool fragile_left
Definition: subtree.h:124
bool is_keyword
Definition: subtree.h:130
uint32_t node_count
Definition: subtree.h:137
bool has_changes
Definition: subtree.h:126
int32_t lookahead_char
Definition: subtree.h:151
volatile uint32_t ref_count
Definition: subtree.h:112
Length size
Definition: subtree.h:114
uint32_t lookahead_bytes
Definition: subtree.h:115
bool is_missing
Definition: subtree.h:129
TSStateId parse_state
Definition: subtree.h:119
ExternalScannerState external_scanner_state
Definition: subtree.h:148
Length padding
Definition: subtree.h:113
uint32_t child_count
Definition: subtree.h:117
bool has_external_tokens
Definition: subtree.h:127
uint32_t error_cost
Definition: subtree.h:116
uint16_t production_id
Definition: subtree.h:140
uint16_t parse_state
Definition: subtree.h:97
SUBTREE_BITS uint8_t symbol
Definition: subtree.h:96
MutableSubtreeArray tree_stack
Definition: subtree.h:172
MutableSubtreeArray free_trees
Definition: subtree.h:171
char * ts_subtree_string(Subtree, const TSLanguage *, bool include_all)
Definition: subtree.c:947
static size_t ts_subtree_alloc_size(uint32_t child_count)
Definition: subtree.h:227
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *)
Definition: subtree.c:542
static bool ts_subtree_fragile_left(Subtree self)
Definition: subtree.h:322
void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned)
Definition: subtree.c:28
const char * ts_external_scanner_state_data(const ExternalScannerState *)
Definition: subtree.c:53
static bool ts_subtree_visible(Subtree self)
Definition: subtree.h:214
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree)
Definition: subtree.c:284
static Subtree ts_subtree_from_mut(MutableSubtree self)
Definition: subtree.h:350
void ts_subtree_retain(Subtree)
Definition: subtree.c:577
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *)
Definition: subtree.c:338
Subtree ts_subtree_last_external_token(Subtree)
Definition: subtree.c:802
static bool ts_subtree_has_external_tokens(Subtree self)
Definition: subtree.h:330
static Length ts_subtree_total_size(Subtree self)
Definition: subtree.h:274
static MutableSubtree ts_subtree_to_mut_unsafe(Subtree self)
Definition: subtree.h:356
static uint32_t ts_subtree_total_bytes(Subtree self)
Definition: subtree.h:278
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *)
Definition: subtree.c:70
static uint32_t ts_subtree_error_cost(Subtree self)
Definition: subtree.h:302
void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *)
void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *)
Definition: subtree.c:1020
void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *)
Definition: subtree.c:371
typedef Array(Subtree) SubtreeArray
bool ts_subtree_external_scanner_state_eq(Subtree, Subtree)
Definition: subtree.c:1027
static int32_t ts_subtree_dynamic_precedence(Subtree self)
Definition: subtree.h:310
static bool ts_subtree_extra(Subtree self)
Definition: subtree.h:216
Subtree ts_subtree_new_error(SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *)
Definition: subtree.c:244
static bool ts_subtree_is_keyword(Subtree self)
Definition: subtree.h:219
#define SUBTREE_BITS
Definition: subtree.h:52
void ts_subtree_array_clear(SubtreePool *, SubtreeArray *)
Definition: subtree.c:83
static bool ts_subtree_is_error(Subtree self)
Definition: subtree.h:342
static bool ts_subtree_has_changes(Subtree self)
Definition: subtree.h:217
static bool ts_subtree_missing(Subtree self)
Definition: subtree.h:218
static uint16_t ts_subtree_production_id(Subtree self)
Definition: subtree.h:314
static Length ts_subtree_size(Subtree self)
Definition: subtree.h:265
static uint32_t ts_subtree_repeat_depth(Subtree self)
Definition: subtree.h:286
static TSSymbol ts_subtree_leaf_symbol(Subtree self)
Definition: subtree.h:244
static bool ts_subtree_depends_on_column(Subtree self)
Definition: subtree.h:334
Subtree ts_subtree_new_leaf(SubtreePool *, TSSymbol, Length, Length, uint32_t, TSStateId, bool, bool, bool, const TSLanguage *)
Definition: subtree.c:167
static bool ts_subtree_named(Subtree self)
Definition: subtree.h:215
static bool ts_subtree_is_fragile(Subtree self)
Definition: subtree.h:338
#define SUBTREE_SIZE
Definition: subtree.h:60
void ts_subtree_array_reverse(SubtreeArray *)
Definition: subtree.c:112
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *)
Definition: subtree.c:90
static TSStateId ts_subtree_parse_state(Subtree self)
Definition: subtree.h:220
static uint32_t ts_subtree_lookahead_bytes(Subtree self)
Definition: subtree.h:221
SubtreePool ts_subtree_pool_new(uint32_t capacity)
Definition: subtree.c:123
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *)
Definition: subtree.c:95
int ts_subtree_compare(Subtree, Subtree)
Definition: subtree.c:615
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *)
Definition: subtree.c:226
static uint32_t ts_subtree_visible_child_count(Subtree self)
Definition: subtree.h:294
static uint32_t ts_subtree_child_count(Subtree self)
Definition: subtree.h:282
static void ts_subtree_set_extra(MutableSubtree *self, bool is_extra)
Definition: subtree.h:236
void ts_subtree_release(SubtreePool *, Subtree)
Definition: subtree.c:584
static bool ts_subtree_is_eof(Subtree self)
Definition: subtree.h:346
#define SUBTREE_GET(self, name)
Definition: subtree.h:211
void ts_subtree_pool_delete(SubtreePool *)
Definition: subtree.c:129
static TSStateId ts_subtree_leaf_parse_state(Subtree self)
Definition: subtree.h:250
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *)
Definition: subtree.c:558
static Length ts_subtree_padding(Subtree self)
Definition: subtree.h:256
static uint32_t ts_subtree_node_count(Subtree self)
Definition: subtree.h:290
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *)
Definition: subtree.c:640
static bool ts_subtree_fragile_right(Subtree self)
Definition: subtree.h:326
static TSSymbol ts_subtree_symbol(Subtree self)
Definition: subtree.h:213
MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *)
Definition: subtree.c:500
SubtreeHeapData * ptr
Definition: subtree.h:164
SubtreeInlineData data
Definition: subtree.h:163
const SubtreeHeapData * ptr
Definition: subtree.h:158
SubtreeInlineData data
Definition: subtree.h:157