Rizin
unix-like reverse engineering framework and cli tools
language.h
Go to the documentation of this file.
1 #ifndef TREE_SITTER_LANGUAGE_H_
2 #define TREE_SITTER_LANGUAGE_H_
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8 #include "./subtree.h"
9 #include "tree_sitter/parser.h"
10 
11 #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
12 
13 typedef struct {
17 } TableEntry;
18 
19 typedef struct {
21  const uint16_t *data;
28 
34 
36 
38 
40 
41 static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
42  return 0 < symbol && symbol < self->external_token_count + 1;
43 }
44 
45 static inline const TSParseAction *ts_language_actions(
46  const TSLanguage *self,
48  TSSymbol symbol,
50 ) {
52  ts_language_table_entry(self, state, symbol, &entry);
53  *count = entry.action_count;
54  return entry.actions;
55 }
56 
57 static inline bool ts_language_has_reduce_action(
58  const TSLanguage *self,
60  TSSymbol symbol
61 ) {
63  ts_language_table_entry(self, state, symbol, &entry);
64  return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
65 }
66 
67 // Lookup the table value for a given symbol and state.
68 //
69 // For non-terminal symbols, the table value represents a successor state.
70 // For terminal symbols, it represents an index in the actions table.
71 // For 'large' parse states, this is a direct lookup. For 'small' parse
72 // states, this requires searching through the symbol groups to find
73 // the given symbol.
75  const TSLanguage *self,
77  TSSymbol symbol
78 ) {
79  if (state >= self->large_state_count) {
80  uint32_t index = self->small_parse_table_map[state - self->large_state_count];
81  const uint16_t *data = &self->small_parse_table[index];
82  uint16_t group_count = *(data++);
83  for (unsigned i = 0; i < group_count; i++) {
84  uint16_t section_value = *(data++);
85  uint16_t symbol_count = *(data++);
86  for (unsigned i = 0; i < symbol_count; i++) {
87  if (*(data++) == symbol) return section_value;
88  }
89  }
90  return 0;
91  } else {
92  return self->parse_table[state * self->symbol_count + symbol];
93  }
94 }
95 
96 static inline bool ts_language_has_actions(
97  const TSLanguage *self,
99  TSSymbol symbol
100 ) {
101  return ts_language_lookup(self, state, symbol) != 0;
102 }
103 
104 // Iterate over all of the symbols that are valid in the given state.
105 //
106 // For 'large' parse states, this just requires iterating through
107 // all possible symbols and checking the parse table for each one.
108 // For 'small' parse states, this exploits the structure of the
109 // table to only visit the valid symbols.
111  const TSLanguage *self,
113 ) {
114  bool is_small_state = state >= self->large_state_count;
115  const uint16_t *data;
116  const uint16_t *group_end = NULL;
117  uint16_t group_count = 0;
118  if (is_small_state) {
119  uint32_t index = self->small_parse_table_map[state - self->large_state_count];
120  data = &self->small_parse_table[index];
121  group_end = data + 1;
122  group_count = *data;
123  } else {
124  data = &self->parse_table[state * self->symbol_count] - 1;
125  }
126  return (LookaheadIterator) {
127  .language = self,
128  .data = data,
129  .group_end = group_end,
130  .group_count = group_count,
131  .is_small_state = is_small_state,
132  .symbol = UINT16_MAX,
133  .next_state = 0,
134  };
135 }
136 
138  // For small parse states, valid symbols are listed explicitly,
139  // grouped by their value. There's no need to look up the actions
140  // again until moving to the next group.
141  if (self->is_small_state) {
142  self->data++;
143  if (self->data == self->group_end) {
144  if (self->group_count == 0) return false;
145  self->group_count--;
146  self->table_value = *(self->data++);
147  unsigned symbol_count = *(self->data++);
148  self->group_end = self->data + symbol_count;
149  self->symbol = *self->data;
150  } else {
151  self->symbol = *self->data;
152  return true;
153  }
154  }
155 
156  // For large parse states, iterate through every symbol until one
157  // is found that has valid actions.
158  else {
159  do {
160  self->data++;
161  self->symbol++;
162  if (self->symbol >= self->language->symbol_count) return false;
163  self->table_value = *self->data;
164  } while (!self->table_value);
165  }
166 
167  // Depending on if the symbols is terminal or non-terminal, the table value either
168  // represents a list of actions or a successor state.
169  if (self->symbol < self->language->token_count) {
170  const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
171  self->action_count = entry->entry.count;
172  self->actions = (const TSParseAction *)(entry + 1);
173  self->next_state = 0;
174  } else {
175  self->action_count = 0;
176  self->next_state = self->table_value;
177  }
178  return true;
179 }
180 
182  const TSLanguage *self,
184  TSSymbol symbol
185 ) {
186  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
187  return 0;
188  } else if (symbol < self->token_count) {
189  uint32_t count;
190  const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
191  if (count > 0) {
192  TSParseAction action = actions[count - 1];
193  if (action.type == TSParseActionTypeShift) {
194  return action.shift.extra ? state : action.shift.state;
195  }
196  }
197  return 0;
198  } else {
199  return ts_language_lookup(self, state, symbol);
200  }
201 }
202 
203 // Whether the state is a "primary state". If this returns false, it indicates that there exists
204 // another state that behaves identically to this one with respect to query analysis.
205 static inline bool ts_language_state_is_primary(
206  const TSLanguage *self,
208 ) {
209  if (self->version >= 14) {
210  return state == self->primary_state_ids[state];
211  } else {
212  return true;
213  }
214 }
215 
216 static inline const bool *ts_language_enabled_external_tokens(
217  const TSLanguage *self,
218  unsigned external_scanner_state
219 ) {
220  if (external_scanner_state == 0) {
221  return NULL;
222  } else {
223  return self->external_scanner.states + self->external_token_count * external_scanner_state;
224  }
225 }
226 
228  const TSLanguage *self,
229  uint32_t production_id
230 ) {
231  return production_id ?
232  &self->alias_sequences[production_id * self->max_alias_sequence_length] :
233  NULL;
234 }
235 
237  const TSLanguage *self,
238  uint32_t production_id,
239  uint32_t child_index
240 ) {
241  return production_id ?
242  self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
243  0;
244 }
245 
246 static inline void ts_language_field_map(
247  const TSLanguage *self,
248  uint32_t production_id,
249  const TSFieldMapEntry **start,
250  const TSFieldMapEntry **end
251 ) {
252  if (self->field_count == 0) {
253  *start = NULL;
254  *end = NULL;
255  return;
256  }
257 
258  TSFieldMapSlice slice = self->field_map_slices[production_id];
259  *start = &self->field_map_entries[slice.index];
260  *end = &self->field_map_entries[slice.index] + slice.length;
261 }
262 
264  const TSLanguage *self,
265  TSSymbol original_symbol,
266  const TSSymbol **start,
267  const TSSymbol **end
268 ) {
269  *start = &self->public_symbol_map[original_symbol];
270  *end = *start + 1;
271 
272  unsigned i = 0;
273  for (;;) {
274  TSSymbol symbol = self->alias_map[i++];
275  if (symbol == 0 || symbol > original_symbol) break;
276  uint16_t count = self->alias_map[i++];
277  if (symbol == original_symbol) {
278  *start = &self->alias_map[i];
279  *end = &self->alias_map[i + count];
280  break;
281  }
282  i += count;
283  }
284 }
285 
286 
287 #ifdef __cplusplus
288 }
289 #endif
290 
291 #endif // TREE_SITTER_LANGUAGE_H_
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
static const TSParseAction * ts_language_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol, uint32_t *count)
Definition: language.h:45
static bool ts_language_has_reduce_action(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:57
static bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol)
Definition: language.h:41
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol)
Definition: language.c:38
static uint16_t ts_language_lookup(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:74
static void ts_language_field_map(const TSLanguage *self, uint32_t production_id, const TSFieldMapEntry **start, const TSFieldMapEntry **end)
Definition: language.h:246
static bool ts_language_state_is_primary(const TSLanguage *self, TSStateId state)
Definition: language.h:205
static TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:181
static bool ts_lookahead_iterator_next(LookaheadIterator *self)
Definition: language.h:137
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *)
Definition: language.c:18
static bool ts_language_has_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:96
#define ts_builtin_sym_error_repeat
Definition: language.h:11
static void ts_language_aliases_for_symbol(const TSLanguage *self, TSSymbol original_symbol, const TSSymbol **start, const TSSymbol **end)
Definition: language.h:263
TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol)
Definition: language.c:51
static const TSSymbol * ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id)
Definition: language.h:227
static const bool * ts_language_enabled_external_tokens(const TSLanguage *self, unsigned external_scanner_state)
Definition: language.h:216
static TSSymbol ts_language_alias_at(const TSLanguage *self, uint32_t production_id, uint32_t child_index)
Definition: language.h:236
static LookaheadIterator ts_language_lookaheads(const TSLanguage *self, TSStateId state)
Definition: language.h:110
uint16_t TSStateId
Definition: parser.h:16
@ TSParseActionTypeShift
Definition: parser.h:54
@ TSParseActionTypeReduce
Definition: parser.h:55
uint16_t TSSymbol
Definition: parser.h:19
#define ts_builtin_sym_error
Definition: parser.h:12
unsigned short uint16_t
Definition: sftypes.h:30
unsigned int uint32_t
Definition: sftypes.h:29
#define UINT16_MAX
TSStateId state
Definition: language.h:23
const uint16_t * group_end
Definition: language.h:22
TSSymbol symbol
Definition: language.h:30
bool is_small_state
Definition: language.h:27
uint16_t group_count
Definition: language.h:26
const TSLanguage * language
Definition: language.h:20
const TSParseAction * actions
Definition: language.h:29
TSStateId next_state
Definition: language.h:31
uint16_t table_value
Definition: language.h:24
uint16_t action_count
Definition: language.h:32
const uint16_t * data
Definition: language.h:21
uint16_t section_index
Definition: language.h:25
uint16_t length
Definition: parser.h:32
uint16_t index
Definition: parser.h:31
bool is_reusable
Definition: language.h:16
uint32_t action_count
Definition: language.h:15
const TSParseAction * actions
Definition: language.h:14
Definition: zipcmp.c:77
Definition: dis.h:43