Rizin
unix-like reverse engineering framework and cli tools
parser.c
Go to the documentation of this file.
1 #include <time.h>
2 #include <assert.h>
3 #include <stdio.h>
4 #include <limits.h>
5 #include <stdbool.h>
6 #include "tree_sitter/api.h"
7 #include "./alloc.h"
8 #include "./array.h"
9 #include "./atomic.h"
10 #include "./clock.h"
11 #include "./error_costs.h"
12 #include "./get_changed_ranges.h"
13 #include "./language.h"
14 #include "./length.h"
15 #include "./lexer.h"
16 #include "./reduce_action.h"
17 #include "./reusable_node.h"
18 #include "./stack.h"
19 #include "./subtree.h"
20 #include "./tree.h"
21 
22 #define LOG(...) \
23  if (self->lexer.logger.log || self->dot_graph_file) { \
24  snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
25  ts_parser__log(self); \
26  }
27 
28 #define LOG_LOOKAHEAD(symbol_name, size) \
29  if (self->lexer.logger.log || self->dot_graph_file) { \
30  char *buf = self->lexer.debug_buffer; \
31  const char *symbol = symbol_name; \
32  int off = sprintf(buf, "lexed_lookahead sym:"); \
33  for ( \
34  int i = 0; \
35  symbol[i] != '\0' \
36  && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \
37  i++ \
38  ) { \
39  switch (symbol[i]) { \
40  case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \
41  case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \
42  case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \
43  case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \
44  case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \
45  case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
46  default: buf[off++] = symbol[i]; break; \
47  } \
48  } \
49  snprintf( \
50  buf + off, \
51  TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \
52  ", size:%u", \
53  size \
54  ); \
55  ts_parser__log(self); \
56  }
57 
58 #define LOG_STACK() \
59  if (self->dot_graph_file) { \
60  ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
61  fputs("\n\n", self->dot_graph_file); \
62  }
63 
64 #define LOG_TREE(tree) \
65  if (self->dot_graph_file) { \
66  ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
67  fputs("\n", self->dot_graph_file); \
68  }
69 
70 #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
71 
72 #define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
73 
74 static const unsigned MAX_VERSION_COUNT = 6;
75 static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
76 static const unsigned MAX_SUMMARY_DEPTH = 16;
77 static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
78 static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
79 
80 typedef struct {
84 } TokenCache;
85 
86 struct TSParser {
91  ReduceActionSet reduce_actions;
93  SubtreeArray trailing_extras;
94  SubtreeArray trailing_extras2;
95  SubtreeArray scratch_trees;
102  unsigned accept_count;
103  unsigned operation_count;
104  const volatile size_t *cancellation_flag;
108 };
109 
110 typedef struct {
111  unsigned cost;
112  unsigned node_count;
115 } ErrorStatus;
116 
117 typedef enum {
124 
125 typedef struct {
126  const char *string;
128 } TSStringInput;
129 
130 // StringInput
131 
132 static const char *ts_string_input_read(
133  void *_self,
134  uint32_t byte,
135  TSPoint pt,
137 ) {
138  (void)pt;
139  TSStringInput *self = (TSStringInput *)_self;
140  if (byte >= self->length) {
141  *length = 0;
142  return "";
143  } else {
144  *length = self->length - byte;
145  return self->string + byte;
146  }
147 }
148 
149 // Parser - Private
150 
151 static void ts_parser__log(TSParser *self) {
152  if (self->lexer.logger.log) {
153  self->lexer.logger.log(
154  self->lexer.logger.payload,
156  self->lexer.debug_buffer
157  );
158  }
159 
160  if (self->dot_graph_file) {
161  fprintf(self->dot_graph_file, "graph {\nlabel=\"");
162  for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) {
163  if (*c == '"') fputc('\\', self->dot_graph_file);
164  fputc(*c, self->dot_graph_file);
165  }
166  fprintf(self->dot_graph_file, "\"\n}\n\n");
167  }
168 }
169 
171  TSParser *self,
173 ) {
174  bool did_break_down = false;
175  bool pending = false;
176 
177  do {
178  StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
179  if (!pop.size) break;
180 
181  did_break_down = true;
182  pending = false;
183  for (uint32_t i = 0; i < pop.size; i++) {
184  StackSlice slice = pop.contents[i];
185  TSStateId state = ts_stack_state(self->stack, slice.version);
186  Subtree parent = *array_front(&slice.subtrees);
187 
188  for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
189  Subtree child = ts_subtree_children(parent)[j];
190  pending = ts_subtree_child_count(child) > 0;
191 
192  if (ts_subtree_is_error(child)) {
193  state = ERROR_STATE;
194  } else if (!ts_subtree_extra(child)) {
195  state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
196  }
197 
198  ts_subtree_retain(child);
199  ts_stack_push(self->stack, slice.version, child, pending, state);
200  }
201 
202  for (uint32_t j = 1; j < slice.subtrees.size; j++) {
203  Subtree tree = slice.subtrees.contents[j];
204  ts_stack_push(self->stack, slice.version, tree, false, state);
205  }
206 
207  ts_subtree_release(&self->tree_pool, parent);
208  array_delete(&slice.subtrees);
209 
210  LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
211  LOG_STACK();
212  }
213  } while (pending);
214 
215  return did_break_down;
216 }
217 
219  TSParser *self,
220  Subtree *lookahead,
222  ReusableNode *reusable_node
223 ) {
224  bool did_descend = false;
225  Subtree tree = reusable_node_tree(reusable_node);
226  while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
227  LOG("state_mismatch sym:%s", TREE_NAME(tree));
228  reusable_node_descend(reusable_node);
229  tree = reusable_node_tree(reusable_node);
230  did_descend = true;
231  }
232 
233  if (did_descend) {
234  ts_subtree_release(&self->tree_pool, *lookahead);
235  *lookahead = tree;
236  ts_subtree_retain(*lookahead);
237  }
238 }
239 
241  TSParser *self,
242  ErrorStatus a,
243  ErrorStatus b
244 ) {
245  (void)self;
246  if (!a.is_in_error && b.is_in_error) {
247  if (a.cost < b.cost) {
249  } else {
251  }
252  }
253 
254  if (a.is_in_error && !b.is_in_error) {
255  if (b.cost < a.cost) {
257  } else {
259  }
260  }
261 
262  if (a.cost < b.cost) {
263  if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
265  } else {
267  }
268  }
269 
270  if (b.cost < a.cost) {
271  if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
273  } else {
275  }
276  }
277 
278  if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
279  if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
280  return ErrorComparisonNone;
281 }
282 
284  TSParser *self,
286 ) {
287  unsigned cost = ts_stack_error_cost(self->stack, version);
288  bool is_paused = ts_stack_is_paused(self->stack, version);
289  if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
290  return (ErrorStatus) {
291  .cost = cost,
292  .node_count = ts_stack_node_count_since_error(self->stack, version),
293  .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
294  .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
295  };
296 }
297 
299  TSParser *self,
301  bool is_in_error,
302  unsigned cost
303 ) {
304  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
305  return true;
306  }
307 
308  Length position = ts_stack_position(self->stack, version);
309  ErrorStatus status = {
310  .cost = cost,
311  .is_in_error = is_in_error,
312  .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
313  .node_count = ts_stack_node_count_since_error(self->stack, version),
314  };
315 
316  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
317  if (i == version ||
318  !ts_stack_is_active(self->stack, i) ||
319  ts_stack_position(self->stack, i).bytes < position.bytes) continue;
320  ErrorStatus status_i = ts_parser__version_status(self, i);
321  switch (ts_parser__compare_versions(self, status, status_i)) {
323  return true;
325  if (ts_stack_can_merge(self->stack, i, version)) return true;
326  break;
327  default:
328  break;
329  }
330  }
331 
332  return false;
333 }
334 
336  TSParser *self,
337  Subtree external_token
338 ) {
339  if (external_token.ptr) {
340  self->language->external_scanner.deserialize(
341  self->external_scanner_payload,
343  external_token.ptr->external_scanner_state.length
344  );
345  } else {
346  self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
347  }
348 }
349 
351  TSParser *self,
353  Subtree tree,
354  TableEntry *table_entry
355 ) {
356  TSLexMode current_lex_mode = self->language->lex_modes[state];
357  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
358  TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
359  TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
360 
361  // At the end of a non-terminal extra node, the lexer normally returns
362  // NULL, which indicates that the parser should look for a reduce action
363  // at symbol `0`. Avoid reusing tokens in this situation to ensure that
364  // the same thing happens when incrementally reparsing.
365  if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
366 
367  // If the token was created in a state with the same set of lookaheads, it is reusable.
368  if (
369  table_entry->action_count > 0 &&
370  memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
371  (
372  leaf_symbol != self->language->keyword_capture_token ||
374  )
375  ) return true;
376 
377  // Empty tokens are not reusable in states with different lookaheads.
378  if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
379 
380  // If the current state allows external tokens or other tokens that conflict with this
381  // token, this token is not reusable.
382  return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
383 }
384 
386  TSParser *self,
388  TSStateId parse_state
389 ) {
390  TSLexMode lex_mode = self->language->lex_modes[parse_state];
391  if (lex_mode.lex_state == (uint16_t)-1) {
392  LOG("no_lookahead_after_non_terminal_extra");
393  return NULL_SUBTREE;
394  }
395 
396  Length start_position = ts_stack_position(self->stack, version);
397  Subtree external_token = ts_stack_last_external_token(self->stack, version);
398  const bool *valid_external_tokens = ts_language_enabled_external_tokens(
399  self->language,
400  lex_mode.external_lex_state
401  );
402 
403  bool found_external_token = false;
404  bool error_mode = parse_state == ERROR_STATE;
405  bool skipped_error = false;
406  bool called_get_column = false;
407  int32_t first_error_character = 0;
408  Length error_start_position = length_zero();
409  Length error_end_position = length_zero();
410  uint32_t lookahead_end_byte = 0;
411  ts_lexer_reset(&self->lexer, start_position);
412 
413  for (;;) {
414  Length current_position = self->lexer.current_position;
415 
416  if (valid_external_tokens) {
417  LOG(
418  "lex_external state:%d, row:%u, column:%u",
419  lex_mode.external_lex_state,
420  current_position.extent.row,
421  current_position.extent.column
422  );
423  ts_lexer_start(&self->lexer);
424  ts_parser__restore_external_scanner(self, external_token);
425  bool found_token = self->language->external_scanner.scan(
426  self->external_scanner_payload,
427  &self->lexer.data,
428  valid_external_tokens
429  );
430  ts_lexer_finish(&self->lexer, &lookahead_end_byte);
431 
432  // Zero-length external tokens are generally allowed, but they're not
433  // allowed right after a syntax error. This is for two reasons:
434  // 1. After a syntax error, the lexer is looking for any possible token,
435  // as opposed to the specific set of tokens that are valid in some
436  // parse state. In this situation, it's very easy for an external
437  // scanner to produce unwanted zero-length tokens.
438  // 2. The parser sometimes inserts *missing* tokens to recover from
439  // errors. These tokens are also zero-length. If we allow more
440  // zero-length tokens to be created after missing tokens, it
441  // can lead to infinite loops. Forbidding zero-length tokens
442  // right at the point of error recovery is a conservative strategy
443  // for preventing this kind of infinite loop.
444  if (found_token && (
445  self->lexer.token_end_position.bytes > current_position.bytes ||
446  (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
447  )) {
448  found_external_token = true;
449  called_get_column = self->lexer.did_get_column;
450  break;
451  }
452 
453  ts_lexer_reset(&self->lexer, current_position);
454  }
455 
456  LOG(
457  "lex_internal state:%d, row:%u, column:%u",
458  lex_mode.lex_state,
459  current_position.extent.row,
460  current_position.extent.column
461  );
462  ts_lexer_start(&self->lexer);
463  bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
464  ts_lexer_finish(&self->lexer, &lookahead_end_byte);
465  if (found_token) break;
466 
467  if (!error_mode) {
468  error_mode = true;
469  lex_mode = self->language->lex_modes[ERROR_STATE];
470  valid_external_tokens = ts_language_enabled_external_tokens(
471  self->language,
472  lex_mode.external_lex_state
473  );
474  ts_lexer_reset(&self->lexer, start_position);
475  continue;
476  }
477 
478  if (!skipped_error) {
479  LOG("skip_unrecognized_character");
480  skipped_error = true;
481  error_start_position = self->lexer.token_start_position;
482  error_end_position = self->lexer.token_start_position;
483  first_error_character = self->lexer.data.lookahead;
484  }
485 
486  if (self->lexer.current_position.bytes == error_end_position.bytes) {
487  if (self->lexer.data.eof(&self->lexer.data)) {
488  self->lexer.data.result_symbol = ts_builtin_sym_error;
489  break;
490  }
491  self->lexer.data.advance(&self->lexer.data, false);
492  }
493 
494  error_end_position = self->lexer.current_position;
495  }
496 
497  Subtree result;
498  if (skipped_error) {
499  Length padding = length_sub(error_start_position, start_position);
500  Length size = length_sub(error_end_position, error_start_position);
501  uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
502  result = ts_subtree_new_error(
503  &self->tree_pool,
504  first_error_character,
505  padding,
506  size,
507  lookahead_bytes,
508  parse_state,
509  self->language
510  );
511 
513  SYM_NAME(ts_subtree_symbol(result)),
515  );
516  } else {
517  if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
518  self->lexer.token_start_position = self->lexer.token_end_position;
519  }
520 
521  bool is_keyword = false;
522  TSSymbol symbol = self->lexer.data.result_symbol;
523  Length padding = length_sub(self->lexer.token_start_position, start_position);
524  Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
525  uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
526 
527  if (found_external_token) {
528  symbol = self->language->external_scanner.symbol_map[symbol];
529  } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
530  uint32_t end_byte = self->lexer.token_end_position.bytes;
531  ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
532  ts_lexer_start(&self->lexer);
533  if (
534  self->language->keyword_lex_fn(&self->lexer.data, 0) &&
535  self->lexer.token_end_position.bytes == end_byte &&
536  ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
537  ) {
538  is_keyword = true;
539  symbol = self->lexer.data.result_symbol;
540  }
541  }
542 
543  result = ts_subtree_new_leaf(
544  &self->tree_pool,
545  symbol,
546  padding,
547  size,
548  lookahead_bytes,
549  parse_state,
550  found_external_token,
551  called_get_column,
552  is_keyword,
553  self->language
554  );
555 
556  if (found_external_token) {
557  unsigned length = self->language->external_scanner.serialize(
558  self->external_scanner_payload,
559  self->lexer.debug_buffer
560  );
562  &((SubtreeHeapData *)result.ptr)->external_scanner_state,
563  self->lexer.debug_buffer,
564  length
565  );
566  }
567 
569  SYM_NAME(ts_subtree_symbol(result)),
571  );
572  }
573 
574  return result;
575 }
576 
578  TSParser *self,
580  size_t position,
581  Subtree last_external_token,
582  TableEntry *table_entry
583 ) {
584  TokenCache *cache = &self->token_cache;
585  if (
586  cache->token.ptr && cache->byte_index == position &&
587  ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
588  ) {
589  ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
590  if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
591  ts_subtree_retain(cache->token);
592  return cache->token;
593  }
594  }
595  return NULL_SUBTREE;
596 }
597 
599  TSParser *self,
600  size_t byte_index,
601  Subtree last_external_token,
602  Subtree token
603 ) {
604  TokenCache *cache = &self->token_cache;
605  if (token.ptr) ts_subtree_retain(token);
606  if (last_external_token.ptr) ts_subtree_retain(last_external_token);
607  if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
608  if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
609  cache->token = token;
610  cache->byte_index = byte_index;
611  cache->last_external_token = last_external_token;
612 }
613 
615  const TSParser *self,
616  uint32_t start_position,
617  uint32_t end_position
618 ) {
620  &self->included_range_differences,
621  self->included_range_difference_index,
622  start_position,
623  end_position
624  );
625 }
626 
628  TSParser *self,
630  TSStateId *state,
631  uint32_t position,
632  Subtree last_external_token,
633  TableEntry *table_entry
634 ) {
635  Subtree result;
636  while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
637  uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
638  uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
639 
640  // Do not reuse an EOF node if the included ranges array has changes
641  // later on in the file.
642  if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
643 
644  if (byte_offset > position) {
645  LOG("before_reusable_node symbol:%s", TREE_NAME(result));
646  break;
647  }
648 
649  if (byte_offset < position) {
650  LOG("past_reusable_node symbol:%s", TREE_NAME(result));
651  if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
652  reusable_node_advance(&self->reusable_node);
653  }
654  continue;
655  }
656 
657  if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
658  LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
659  reusable_node_advance(&self->reusable_node);
660  continue;
661  }
662 
663  const char *reason = NULL;
664  if (ts_subtree_has_changes(result)) {
665  reason = "has_changes";
666  } else if (ts_subtree_is_error(result)) {
667  reason = "is_error";
668  } else if (ts_subtree_missing(result)) {
669  reason = "is_missing";
670  } else if (ts_subtree_is_fragile(result)) {
671  reason = "is_fragile";
672  } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
673  reason = "contains_different_included_range";
674  }
675 
676  if (reason) {
677  LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
678  if (!reusable_node_descend(&self->reusable_node)) {
679  reusable_node_advance(&self->reusable_node);
681  *state = ts_stack_state(self->stack, version);
682  }
683  continue;
684  }
685 
686  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
687  ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
688  if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
689  LOG(
690  "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
691  TREE_NAME(result),
692  SYM_NAME(leaf_symbol)
693  );
694  reusable_node_advance_past_leaf(&self->reusable_node);
695  break;
696  }
697 
698  LOG("reuse_node symbol:%s", TREE_NAME(result));
699  ts_subtree_retain(result);
700  return result;
701  }
702 
703  return NULL_SUBTREE;
704 }
705 
706 // Determine if a given tree should be replaced by an alternative tree.
707 //
708 // The decision is based on the trees' error costs (if any), their dynamic precedence,
709 // and finally, as a default, by a recursive comparison of the trees' symbols.
710 static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
711  if (!left.ptr) return true;
712  if (!right.ptr) return false;
713 
714  if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
715  LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
716  return true;
717  }
718 
719  if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
720  LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
721  return false;
722  }
723 
725  LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
726  TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
728  return true;
729  }
730 
732  LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
735  return false;
736  }
737 
738  if (ts_subtree_error_cost(left) > 0) return true;
739 
740  int comparison = ts_subtree_compare(left, right);
741  switch (comparison) {
742  case -1:
743  LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
744  return false;
745  break;
746  case 1:
747  LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
748  return true;
749  default:
750  LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
751  return false;
752  }
753 }
754 
755 // Determine if a given tree's children should be replaced by an alternative
756 // array of children.
758  TSParser *self,
759  Subtree left,
760  const SubtreeArray *children
761 ) {
762  array_assign(&self->scratch_trees, children);
763 
764  // Create a temporary subtree using the scratch trees array. This node does
765  // not perform any allocation except for possibly growing the array to make
766  // room for its own heap data. The scratch tree is never explicitly released,
767  // so the same 'scratch trees' array can be reused again later.
768  MutableSubtree scratch_tree = ts_subtree_new_node(
769  ts_subtree_symbol(left),
770  &self->scratch_trees,
771  0,
772  self->language
773  );
774 
775  return ts_parser__select_tree(
776  self,
777  left,
778  ts_subtree_from_mut(scratch_tree)
779  );
780 }
781 
782 static void ts_parser__shift(
783  TSParser *self,
786  Subtree lookahead,
787  bool extra
788 ) {
789  bool is_leaf = ts_subtree_child_count(lookahead) == 0;
790  Subtree subtree_to_push = lookahead;
791  if (extra != ts_subtree_extra(lookahead) && is_leaf) {
792  MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
793  ts_subtree_set_extra(&result, extra);
794  subtree_to_push = ts_subtree_from_mut(result);
795  }
796 
797  ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
798  if (ts_subtree_has_external_tokens(subtree_to_push)) {
800  self->stack, version, ts_subtree_last_external_token(subtree_to_push)
801  );
802  }
803 }
804 
806  TSParser *self,
808  TSSymbol symbol,
809  uint32_t count,
810  int dynamic_precedence,
811  uint16_t production_id,
812  bool is_fragile,
813  bool end_of_non_terminal_extra
814 ) {
815  uint32_t initial_version_count = ts_stack_version_count(self->stack);
816 
817  // Pop the given number of nodes from the given version of the parse stack.
818  // If stack versions have previously merged, then there may be more than one
819  // path back through the stack. For each path, create a new parent node to
820  // contain the popped children, and push it onto the stack in place of the
821  // children.
822  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
823  uint32_t removed_version_count = 0;
824  for (uint32_t i = 0; i < pop.size; i++) {
825  StackSlice slice = pop.contents[i];
826  StackVersion slice_version = slice.version - removed_version_count;
827 
828  // This is where new versions are added to the parse stack. The versions
829  // will all be sorted and truncated at the end of the outer parsing loop.
830  // Allow the maximum version count to be temporarily exceeded, but only
831  // by a limited threshold.
832  if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
833  ts_stack_remove_version(self->stack, slice_version);
834  ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
835  removed_version_count++;
836  while (i + 1 < pop.size) {
837  StackSlice next_slice = pop.contents[i + 1];
838  if (next_slice.version != slice.version) break;
839  ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
840  i++;
841  }
842  continue;
843  }
844 
845  // Extra tokens on top of the stack should not be included in this new parent
846  // node. They will be re-pushed onto the stack after the parent node is
847  // created and pushed.
848  SubtreeArray children = slice.subtrees;
849  ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
850 
852  symbol, &children, production_id, self->language
853  );
854 
855  // This pop operation may have caused multiple stack versions to collapse
856  // into one, because they all diverged from a common state. In that case,
857  // choose one of the arrays of trees to be the parent node's children, and
858  // delete the rest of the tree arrays.
859  while (i + 1 < pop.size) {
860  StackSlice next_slice = pop.contents[i + 1];
861  if (next_slice.version != slice.version) break;
862  i++;
863 
864  SubtreeArray children = next_slice.subtrees;
865  ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2);
866 
868  self,
869  ts_subtree_from_mut(parent),
870  &children
871  )) {
872  ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
873  ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
874  array_swap(&self->trailing_extras, &self->trailing_extras2);
875  parent = ts_subtree_new_node(
876  symbol, &children, production_id, self->language
877  );
878  } else {
879  array_clear(&self->trailing_extras2);
880  ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
881  }
882  }
883 
884  TSStateId state = ts_stack_state(self->stack, slice_version);
885  TSStateId next_state = ts_language_next_state(self->language, state, symbol);
886  if (end_of_non_terminal_extra && next_state == state) {
887  parent.ptr->extra = true;
888  }
889  if (is_fragile || pop.size > 1 || initial_version_count > 1) {
890  parent.ptr->fragile_left = true;
891  parent.ptr->fragile_right = true;
893  } else {
894  parent.ptr->parse_state = state;
895  }
896  parent.ptr->dynamic_precedence += dynamic_precedence;
897 
898  // Push the parent node onto the stack, along with any extra tokens that
899  // were previously on top of the stack.
900  ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
901  for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
902  ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
903  }
904 
905  for (StackVersion j = 0; j < slice_version; j++) {
906  if (j == version) continue;
907  if (ts_stack_merge(self->stack, j, slice_version)) {
908  removed_version_count++;
909  break;
910  }
911  }
912  }
913 
914  // Return the first new stack version that was created.
915  return ts_stack_version_count(self->stack) > initial_version_count
916  ? initial_version_count
918 }
919 
920 static void ts_parser__accept(
921  TSParser *self,
923  Subtree lookahead
924 ) {
925  assert(ts_subtree_is_eof(lookahead));
926  ts_stack_push(self->stack, version, lookahead, false, 1);
927 
928  StackSliceArray pop = ts_stack_pop_all(self->stack, version);
929  for (uint32_t i = 0; i < pop.size; i++) {
930  SubtreeArray trees = pop.contents[i].subtrees;
931 
933  for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
934  Subtree tree = trees.contents[j];
935  if (!ts_subtree_extra(tree)) {
936  assert(!tree.data.is_inline);
937  uint32_t child_count = ts_subtree_child_count(tree);
938  const Subtree *children = ts_subtree_children(tree);
939  for (uint32_t k = 0; k < child_count; k++) {
940  ts_subtree_retain(children[k]);
941  }
942  array_splice(&trees, j, 1, child_count, children);
944  ts_subtree_symbol(tree),
945  &trees,
946  tree.ptr->production_id,
947  self->language
948  ));
949  ts_subtree_release(&self->tree_pool, tree);
950  break;
951  }
952  }
953 
954  assert(root.ptr);
955  self->accept_count++;
956 
957  if (self->finished_tree.ptr) {
958  if (ts_parser__select_tree(self, self->finished_tree, root)) {
959  ts_subtree_release(&self->tree_pool, self->finished_tree);
960  self->finished_tree = root;
961  } else {
962  ts_subtree_release(&self->tree_pool, root);
963  }
964  } else {
965  self->finished_tree = root;
966  }
967  }
968 
969  ts_stack_remove_version(self->stack, pop.contents[0].version);
970  ts_stack_halt(self->stack, version);
971 }
972 
974  TSParser *self,
975  StackVersion starting_version,
976  TSSymbol lookahead_symbol
977 ) {
978  uint32_t initial_version_count = ts_stack_version_count(self->stack);
979 
980  bool can_shift_lookahead_symbol = false;
981  StackVersion version = starting_version;
982  for (unsigned i = 0; true; i++) {
983  uint32_t version_count = ts_stack_version_count(self->stack);
984  if (version >= version_count) break;
985 
986  bool merged = false;
987  for (StackVersion i = initial_version_count; i < version; i++) {
988  if (ts_stack_merge(self->stack, i, version)) {
989  merged = true;
990  break;
991  }
992  }
993  if (merged) continue;
994 
995  TSStateId state = ts_stack_state(self->stack, version);
996  bool has_shift_action = false;
997  array_clear(&self->reduce_actions);
998 
999  TSSymbol first_symbol, end_symbol;
1000  if (lookahead_symbol != 0) {
1001  first_symbol = lookahead_symbol;
1002  end_symbol = lookahead_symbol + 1;
1003  } else {
1004  first_symbol = 1;
1005  end_symbol = self->language->token_count;
1006  }
1007 
1008  for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1009  TableEntry entry;
1010  ts_language_table_entry(self->language, state, symbol, &entry);
1011  for (uint32_t i = 0; i < entry.action_count; i++) {
1012  TSParseAction action = entry.actions[i];
1013  switch (action.type) {
1016  if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1017  break;
1019  if (action.reduce.child_count > 0)
1020  ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1021  .symbol = action.reduce.symbol,
1022  .count = action.reduce.child_count,
1023  .dynamic_precedence = action.reduce.dynamic_precedence,
1024  .production_id = action.reduce.production_id,
1025  });
1026  break;
1027  default:
1028  break;
1029  }
1030  }
1031  }
1032 
1033  StackVersion reduction_version = STACK_VERSION_NONE;
1034  for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
1035  ReduceAction action = self->reduce_actions.contents[i];
1036 
1037  reduction_version = ts_parser__reduce(
1038  self, version, action.symbol, action.count,
1039  action.dynamic_precedence, action.production_id,
1040  true, false
1041  );
1042  }
1043 
1044  if (has_shift_action) {
1045  can_shift_lookahead_symbol = true;
1046  } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1047  ts_stack_renumber_version(self->stack, reduction_version, version);
1048  continue;
1049  } else if (lookahead_symbol != 0) {
1050  ts_stack_remove_version(self->stack, version);
1051  }
1052 
1053  if (version == starting_version) {
1054  version = version_count;
1055  } else {
1056  version++;
1057  }
1058  }
1059 
1060  return can_shift_lookahead_symbol;
1061 }
1062 
1064  TSParser *self,
1066  unsigned depth,
1067  TSStateId goal_state
1068 ) {
1069  StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1070  StackVersion previous_version = STACK_VERSION_NONE;
1071 
1072  for (unsigned i = 0; i < pop.size; i++) {
1073  StackSlice slice = pop.contents[i];
1074 
1075  if (slice.version == previous_version) {
1076  ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1077  array_erase(&pop, i--);
1078  continue;
1079  }
1080 
1081  if (ts_stack_state(self->stack, slice.version) != goal_state) {
1082  ts_stack_halt(self->stack, slice.version);
1083  ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1084  array_erase(&pop, i--);
1085  continue;
1086  }
1087 
1088  SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1089  if (error_trees.size > 0) {
1090  assert(error_trees.size == 1);
1091  Subtree error_tree = error_trees.contents[0];
1092  uint32_t error_child_count = ts_subtree_child_count(error_tree);
1093  if (error_child_count > 0) {
1094  array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1095  for (unsigned j = 0; j < error_child_count; j++) {
1096  ts_subtree_retain(slice.subtrees.contents[j]);
1097  }
1098  }
1099  ts_subtree_array_delete(&self->tree_pool, &error_trees);
1100  }
1101 
1102  ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1103 
1104  if (slice.subtrees.size > 0) {
1105  Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1106  ts_stack_push(self->stack, slice.version, error, false, goal_state);
1107  } else {
1108  array_delete(&slice.subtrees);
1109  }
1110 
1111  for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1112  Subtree tree = self->trailing_extras.contents[j];
1113  ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1114  }
1115 
1116  previous_version = slice.version;
1117  }
1118 
1119  return previous_version != STACK_VERSION_NONE;
1120 }
1121 
1123  TSParser *self,
1125  Subtree lookahead
1126 ) {
1127  bool did_recover = false;
1128  unsigned previous_version_count = ts_stack_version_count(self->stack);
1129  Length position = ts_stack_position(self->stack, version);
1130  StackSummary *summary = ts_stack_get_summary(self->stack, version);
1131  unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1132  unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1133 
1134  // When the parser is in the error state, there are two strategies for recovering with a
1135  // given lookahead token:
1136  // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1137  // create a new stack version that is in that state again. This entails popping all of the
1138  // subtrees that have been pushed onto the stack since that previous state, and wrapping
1139  // them in an ERROR node.
1140  // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1141  // move on to the next lookahead token, remaining in the error state.
1142  //
1143  // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1144  // of the previous parse states and their depths. Look at each state in the summary, to see
1145  // if the current lookahead token would be valid in that state.
1146  if (summary && !ts_subtree_is_error(lookahead)) {
1147  for (unsigned i = 0; i < summary->size; i++) {
1148  StackSummaryEntry entry = summary->contents[i];
1149 
1150  if (entry.state == ERROR_STATE) continue;
1151  if (entry.position.bytes == position.bytes) continue;
1152  unsigned depth = entry.depth;
1153  if (node_count_since_error > 0) depth++;
1154 
1155  // Do not recover in ways that create redundant stack versions.
1156  bool would_merge = false;
1157  for (unsigned j = 0; j < previous_version_count; j++) {
1158  if (
1159  ts_stack_state(self->stack, j) == entry.state &&
1160  ts_stack_position(self->stack, j).bytes == position.bytes
1161  ) {
1162  would_merge = true;
1163  break;
1164  }
1165  }
1166  if (would_merge) continue;
1167 
1168  // Do not recover if the result would clearly be worse than some existing stack version.
1169  unsigned new_cost =
1170  current_error_cost +
1172  (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1173  (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1174  if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1175 
1176  // If the current lookahead token is valid in some previous state, recover to that state.
1177  // Then stop looking for further recoveries.
1178  if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1179  if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1180  did_recover = true;
1181  LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1182  LOG_STACK();
1183  break;
1184  }
1185  }
1186  }
1187  }
1188 
1189  // In the process of attempting to recover, some stack versions may have been created
1190  // and subsequently halted. Remove those versions.
1191  for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1192  if (!ts_stack_is_active(self->stack, i)) {
1193  ts_stack_remove_version(self->stack, i--);
1194  }
1195  }
1196 
1197  // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1198  // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1199  // current lookahead token by wrapping it in an ERROR node.
1200 
1201  // Don't pursue this additional strategy if there are already too many stack versions.
1202  if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1203  ts_stack_halt(self->stack, version);
1204  ts_subtree_release(&self->tree_pool, lookahead);
1205  return;
1206  }
1207 
1208  // If the parser is still in the error state at the end of the file, just wrap everything
1209  // in an ERROR node and terminate.
1210  if (ts_subtree_is_eof(lookahead)) {
1211  LOG("recover_eof");
1212  SubtreeArray children = array_new();
1213  Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1214  ts_stack_push(self->stack, version, parent, false, 1);
1215  ts_parser__accept(self, version, lookahead);
1216  return;
1217  }
1218 
1219  // Do not recover if the result would clearly be worse than some existing stack version.
1220  unsigned new_cost =
1221  current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1224  if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1225  ts_stack_halt(self->stack, version);
1226  ts_subtree_release(&self->tree_pool, lookahead);
1227  return;
1228  }
1229 
1230  // If the current lookahead token is an extra token, mark it as extra. This means it won't
1231  // be counted in error cost calculations.
1232  unsigned n;
1233  const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1234  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1235  MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1236  ts_subtree_set_extra(&mutable_lookahead, true);
1237  lookahead = ts_subtree_from_mut(mutable_lookahead);
1238  }
1239 
1240  // Wrap the lookahead token in an ERROR.
1241  LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1242  SubtreeArray children = array_new();
1243  array_reserve(&children, 1);
1244  array_push(&children, lookahead);
1245  MutableSubtree error_repeat = ts_subtree_new_node(
1247  &children,
1248  0,
1249  self->language
1250  );
1251 
1252  // If other tokens have already been skipped, so there is already an ERROR at the top of the
1253  // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1254  // ERROR.
1255  if (node_count_since_error > 0) {
1256  StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1257 
1258  // TODO: Figure out how to make this condition occur.
1259  // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1260  // If multiple stack versions have merged at this point, just pick one of the errors
1261  // arbitrarily and discard the rest.
1262  if (pop.size > 1) {
1263  for (unsigned i = 1; i < pop.size; i++) {
1264  ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1265  }
1266  while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1267  ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1268  }
1269  }
1270 
1271  ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1272  array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1273  error_repeat = ts_subtree_new_node(
1275  &pop.contents[0].subtrees,
1276  0,
1277  self->language
1278  );
1279  }
1280 
1281  // Push the new ERROR onto the stack.
1282  ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1283  if (ts_subtree_has_external_tokens(lookahead)) {
1285  self->stack, version, ts_subtree_last_external_token(lookahead)
1286  );
1287  }
1288 }
1289 
1291  TSParser *self,
1293  Subtree lookahead
1294 ) {
1295  uint32_t previous_version_count = ts_stack_version_count(self->stack);
1296 
1297  // Perform any reductions that can happen in this state, regardless of the lookahead. After
1298  // skipping one or more invalid tokens, the parser might find a token that would have allowed
1299  // a reduction to take place.
1301  uint32_t version_count = ts_stack_version_count(self->stack);
1302  Length position = ts_stack_position(self->stack, version);
1303 
1304  // Push a discontinuity onto the stack. Merge all of the stack versions that
1305  // were created in the previous step.
1306  bool did_insert_missing_token = false;
1307  for (StackVersion v = version; v < version_count;) {
1308  if (!did_insert_missing_token) {
1309  TSStateId state = ts_stack_state(self->stack, v);
1310  for (
1311  TSSymbol missing_symbol = 1;
1312  missing_symbol < self->language->token_count;
1313  missing_symbol++
1314  ) {
1315  TSStateId state_after_missing_symbol = ts_language_next_state(
1316  self->language, state, missing_symbol
1317  );
1318  if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1319  continue;
1320  }
1321 
1323  self->language,
1324  state_after_missing_symbol,
1325  ts_subtree_leaf_symbol(lookahead)
1326  )) {
1327  // In case the parser is currently outside of any included range, the lexer will
1328  // snap to the beginning of the next included range. The missing token's padding
1329  // must be assigned to position it within the next included range.
1330  ts_lexer_reset(&self->lexer, position);
1331  ts_lexer_mark_end(&self->lexer);
1332  Length padding = length_sub(self->lexer.token_end_position, position);
1333  uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1334 
1335  StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1336  Subtree missing_tree = ts_subtree_new_missing_leaf(
1337  &self->tree_pool, missing_symbol,
1338  padding, lookahead_bytes,
1339  self->language
1340  );
1341  ts_stack_push(
1342  self->stack, version_with_missing_tree,
1343  missing_tree, false,
1344  state_after_missing_symbol
1345  );
1346 
1348  self, version_with_missing_tree,
1349  ts_subtree_leaf_symbol(lookahead)
1350  )) {
1351  LOG(
1352  "recover_with_missing symbol:%s, state:%u",
1353  SYM_NAME(missing_symbol),
1354  ts_stack_state(self->stack, version_with_missing_tree)
1355  );
1356  did_insert_missing_token = true;
1357  break;
1358  }
1359  }
1360  }
1361  }
1362 
1363  ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1364  v = (v == version) ? previous_version_count : v + 1;
1365  }
1366 
1367  for (unsigned i = previous_version_count; i < version_count; i++) {
1368  bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1369  assert(did_merge);
1370  }
1371 
1373 
1374  // Begin recovery with the current lookahead node, rather than waiting for the
1375  // next turn of the parse loop. This ensures that the tree accounts for the the
1376  // current lookahead token's "lookahead bytes" value, which describes how far
1377  // the lexer needed to look ahead beyond the content of the token in order to
1378  // recognize it.
1379  if (ts_subtree_child_count(lookahead) > 0) {
1380  ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1381  }
1382  ts_parser__recover(self, version, lookahead);
1383 
1384  LOG_STACK();
1385 }
1386 
1388  TSParser *self,
1390  bool allow_node_reuse
1391 ) {
1392  TSStateId state = ts_stack_state(self->stack, version);
1393  uint32_t position = ts_stack_position(self->stack, version).bytes;
1394  Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1395 
1396  bool did_reuse = true;
1397  Subtree lookahead = NULL_SUBTREE;
1398  TableEntry table_entry = {.action_count = 0};
1399 
1400  // If possible, reuse a node from the previous syntax tree.
1401  if (allow_node_reuse) {
1402  lookahead = ts_parser__reuse_node(
1403  self, version, &state, position, last_external_token, &table_entry
1404  );
1405  }
1406 
1407  // If no node from the previous syntax tree could be reused, then try to
1408  // reuse the token previously returned by the lexer.
1409  if (!lookahead.ptr) {
1410  did_reuse = false;
1411  lookahead = ts_parser__get_cached_token(
1412  self, state, position, last_external_token, &table_entry
1413  );
1414  }
1415 
1416  bool needs_lex = !lookahead.ptr;
1417  for (;;) {
1418  // Otherwise, re-run the lexer.
1419  if (needs_lex) {
1420  needs_lex = false;
1421  lookahead = ts_parser__lex(self, version, state);
1422 
1423  if (lookahead.ptr) {
1424  ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1425  ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1426  }
1427 
1428  // When parsing a non-terminal extra, a null lookahead indicates the
1429  // end of the rule. The reduction is stored in the EOF table entry.
1430  // After the reduction, the lexer needs to be run again.
1431  else {
1432  ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1433  }
1434  }
1435 
1436  // If a cancellation flag or a timeout was provided, then check every
1437  // time a fixed number of parse actions has been processed.
1438  if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
1439  self->operation_count = 0;
1440  }
1441  if (
1442  self->operation_count == 0 &&
1443  ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1444  (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1445  ) {
1446  ts_subtree_release(&self->tree_pool, lookahead);
1447  return false;
1448  }
1449 
1450  // Process each parse action for the current lookahead token in
1451  // the current state. If there are multiple actions, then this is
1452  // an ambiguous state. REDUCE actions always create a new stack
1453  // version, whereas SHIFT actions update the existing stack version
1454  // and terminate this loop.
1455  StackVersion last_reduction_version = STACK_VERSION_NONE;
1456  for (uint32_t i = 0; i < table_entry.action_count; i++) {
1457  TSParseAction action = table_entry.actions[i];
1458 
1459  switch (action.type) {
1460  case TSParseActionTypeShift: {
1461  if (action.shift.repetition) break;
1462  TSStateId next_state;
1463  if (action.shift.extra) {
1464  next_state = state;
1465  LOG("shift_extra");
1466  } else {
1467  next_state = action.shift.state;
1468  LOG("shift state:%u", next_state);
1469  }
1470 
1471  if (ts_subtree_child_count(lookahead) > 0) {
1472  ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1473  next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1474  }
1475 
1476  ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1477  if (did_reuse) reusable_node_advance(&self->reusable_node);
1478  return true;
1479  }
1480 
1481  case TSParseActionTypeReduce: {
1482  bool is_fragile = table_entry.action_count > 1;
1483  bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1484  LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1485  StackVersion reduction_version = ts_parser__reduce(
1486  self, version, action.reduce.symbol, action.reduce.child_count,
1487  action.reduce.dynamic_precedence, action.reduce.production_id,
1488  is_fragile, end_of_non_terminal_extra
1489  );
1490  if (reduction_version != STACK_VERSION_NONE) {
1491  last_reduction_version = reduction_version;
1492  }
1493  break;
1494  }
1495 
1496  case TSParseActionTypeAccept: {
1497  LOG("accept");
1498  ts_parser__accept(self, version, lookahead);
1499  return true;
1500  }
1501 
1502  case TSParseActionTypeRecover: {
1503  if (ts_subtree_child_count(lookahead) > 0) {
1504  ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1505  }
1506 
1507  ts_parser__recover(self, version, lookahead);
1508  if (did_reuse) reusable_node_advance(&self->reusable_node);
1509  return true;
1510  }
1511  }
1512  }
1513 
1514  // If a reduction was performed, then replace the current stack version
1515  // with one of the stack versions created by a reduction, and continue
1516  // processing this version of the stack with the same lookahead symbol.
1517  if (last_reduction_version != STACK_VERSION_NONE) {
1518  ts_stack_renumber_version(self->stack, last_reduction_version, version);
1519  LOG_STACK();
1520  state = ts_stack_state(self->stack, version);
1521 
1522  // At the end of a non-terminal extra rule, the lexer will return a
1523  // null subtree, because the parser needs to perform a fixed reduction
1524  // regardless of the lookahead node. After performing that reduction,
1525  // (and completing the non-terminal extra rule) run the lexer again based
1526  // on the current parse state.
1527  if (!lookahead.ptr) {
1528  needs_lex = true;
1529  } else {
1531  self->language,
1532  state,
1533  ts_subtree_leaf_symbol(lookahead),
1534  &table_entry
1535  );
1536  }
1537 
1538  continue;
1539  }
1540 
1541  // If there were no parse actions for the current lookahead token, then
1542  // it is not valid in this state. If the current lookahead token is a
1543  // keyword, then switch to treating it as the normal word token if that
1544  // token is valid in this state.
1545  if (
1546  ts_subtree_is_keyword(lookahead) &&
1547  ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1548  ) {
1549  ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
1550  if (table_entry.action_count > 0) {
1551  LOG(
1552  "switch from_keyword:%s, to_word_token:%s",
1553  TREE_NAME(lookahead),
1554  SYM_NAME(self->language->keyword_capture_token)
1555  );
1556 
1557  MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1558  ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1559  lookahead = ts_subtree_from_mut(mutable_lookahead);
1560  continue;
1561  }
1562  }
1563 
1564  // If the current lookahead token is not valid and the parser is
1565  // already in the error state, restart the error recovery process.
1566  // TODO - can this be unified with the other `RECOVER` case above?
1567  if (state == ERROR_STATE) {
1568  ts_parser__recover(self, version, lookahead);
1569  return true;
1570  }
1571 
1572  // If the current lookahead token is not valid and the previous
1573  // subtree on the stack was reused from an old tree, it isn't actually
1574  // valid to reuse it. Remove it from the stack, and in its place,
1575  // push each of its children. Then try again to process the current
1576  // lookahead.
1578  state = ts_stack_state(self->stack, version);
1579  ts_subtree_release(&self->tree_pool, lookahead);
1580  needs_lex = true;
1581  continue;
1582  }
1583 
1584  // At this point, the current lookahead token is definitely not valid
1585  // for this parse stack version. Mark this version as paused and continue
1586  // processing any other stack versions that might exist. If some other
1587  // version advances successfully, then this version can simply be removed.
1588  // But if all versions end up paused, then error recovery is needed.
1589  LOG("detect_error");
1590  ts_stack_pause(self->stack, version, lookahead);
1591  return true;
1592  }
1593 }
1594 
1595 static unsigned ts_parser__condense_stack(TSParser *self) {
1596  bool made_changes = false;
1597  unsigned min_error_cost = UINT_MAX;
1598  for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1599  // Prune any versions that have been marked for removal.
1600  if (ts_stack_is_halted(self->stack, i)) {
1601  ts_stack_remove_version(self->stack, i);
1602  i--;
1603  continue;
1604  }
1605 
1606  // Keep track of the minimum error cost of any stack version so
1607  // that it can be returned.
1608  ErrorStatus status_i = ts_parser__version_status(self, i);
1609  if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1610  min_error_cost = status_i.cost;
1611  }
1612 
1613  // Examine each pair of stack versions, removing any versions that
1614  // are clearly worse than another version. Ensure that the versions
1615  // are ordered from most promising to least promising.
1616  for (StackVersion j = 0; j < i; j++) {
1617  ErrorStatus status_j = ts_parser__version_status(self, j);
1618 
1619  switch (ts_parser__compare_versions(self, status_j, status_i)) {
1621  made_changes = true;
1622  ts_stack_remove_version(self->stack, i);
1623  i--;
1624  j = i;
1625  break;
1626 
1628  case ErrorComparisonNone:
1629  if (ts_stack_merge(self->stack, j, i)) {
1630  made_changes = true;
1631  i--;
1632  j = i;
1633  }
1634  break;
1635 
1637  made_changes = true;
1638  if (ts_stack_merge(self->stack, j, i)) {
1639  i--;
1640  j = i;
1641  } else {
1642  ts_stack_swap_versions(self->stack, i, j);
1643  }
1644  break;
1645 
1647  made_changes = true;
1648  ts_stack_remove_version(self->stack, j);
1649  i--;
1650  j--;
1651  break;
1652  }
1653  }
1654  }
1655 
1656  // Enfore a hard upper bound on the number of stack versions by
1657  // discarding the least promising versions.
1658  while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1660  made_changes = true;
1661  }
1662 
1663  // If the best-performing stack version is currently paused, or all
1664  // versions are paused, then resume the best paused version and begin
1665  // the error recovery process. Otherwise, remove the paused versions.
1666  if (ts_stack_version_count(self->stack) > 0) {
1667  bool has_unpaused_version = false;
1668  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1669  if (ts_stack_is_paused(self->stack, i)) {
1670  if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1671  LOG("resume version:%u", i);
1672  min_error_cost = ts_stack_error_cost(self->stack, i);
1673  Subtree lookahead = ts_stack_resume(self->stack, i);
1674  ts_parser__handle_error(self, i, lookahead);
1675  has_unpaused_version = true;
1676  } else {
1677  ts_stack_remove_version(self->stack, i);
1678  i--;
1679  n--;
1680  }
1681  } else {
1682  has_unpaused_version = true;
1683  }
1684  }
1685  }
1686 
1687  if (made_changes) {
1688  LOG("condense");
1689  LOG_STACK();
1690  }
1691 
1692  return min_error_cost;
1693 }
1694 
1696  return (
1697  ts_stack_state(self->stack, 0) != 1 ||
1698  ts_stack_node_count_since_error(self->stack, 0) != 0
1699  );
1700 }
1701 
1702 // Parser - Public
1703 
1705  TSParser *self = ts_calloc(1, sizeof(TSParser));
1706  ts_lexer_init(&self->lexer);
1707  array_init(&self->reduce_actions);
1708  array_reserve(&self->reduce_actions, 4);
1709  self->tree_pool = ts_subtree_pool_new(32);
1710  self->stack = ts_stack_new(&self->tree_pool);
1711  self->finished_tree = NULL_SUBTREE;
1712  self->reusable_node = reusable_node_new();
1713  self->dot_graph_file = NULL;
1714  self->cancellation_flag = NULL;
1715  self->timeout_duration = 0;
1716  self->end_clock = clock_null();
1717  self->operation_count = 0;
1718  self->old_tree = NULL_SUBTREE;
1719  self->included_range_differences = (TSRangeArray) array_new();
1720  self->included_range_difference_index = 0;
1722  return self;
1723 }
1724 
1726  if (!self) return;
1727 
1729  ts_stack_delete(self->stack);
1730  if (self->reduce_actions.contents) {
1731  array_delete(&self->reduce_actions);
1732  }
1733  if (self->included_range_differences.contents) {
1734  array_delete(&self->included_range_differences);
1735  }
1736  if (self->old_tree.ptr) {
1737  ts_subtree_release(&self->tree_pool, self->old_tree);
1738  self->old_tree = NULL_SUBTREE;
1739  }
1740  ts_lexer_delete(&self->lexer);
1742  ts_subtree_pool_delete(&self->tree_pool);
1743  reusable_node_delete(&self->reusable_node);
1744  array_delete(&self->trailing_extras);
1745  array_delete(&self->trailing_extras2);
1746  array_delete(&self->scratch_trees);
1747  ts_free(self);
1748 }
1749 
1751  return self->language;
1752 }
1753 
1754 bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
1755  if (language) {
1756  if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
1757  if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
1758  }
1759 
1760  if (self->external_scanner_payload && self->language->external_scanner.destroy) {
1761  self->language->external_scanner.destroy(self->external_scanner_payload);
1762  }
1763 
1764  if (language && language->external_scanner.create) {
1765  self->external_scanner_payload = language->external_scanner.create();
1766  } else {
1767  self->external_scanner_payload = NULL;
1768  }
1769 
1770  self->language = language;
1771  ts_parser_reset(self);
1772  return true;
1773 }
1774 
1776  return self->lexer.logger;
1777 }
1778 
1780  self->lexer.logger = logger;
1781 }
1782 
1784  if (self->dot_graph_file) {
1785  fclose(self->dot_graph_file);
1786  }
1787 
1788  if (fd >= 0) {
1789  self->dot_graph_file = fdopen(fd, "a");
1790  } else {
1791  self->dot_graph_file = NULL;
1792  }
1793 }
1794 
1795 const size_t *ts_parser_cancellation_flag(const TSParser *self) {
1796  return (const size_t *)self->cancellation_flag;
1797 }
1798 
1799 void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
1800  self->cancellation_flag = (const volatile size_t *)flag;
1801 }
1802 
1804  return duration_to_micros(self->timeout_duration);
1805 }
1806 
1807 void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
1808  self->timeout_duration = duration_from_micros(timeout_micros);
1809 }
1810 
1812  TSParser *self,
1813  const TSRange *ranges,
1814  uint32_t count
1815 ) {
1816  return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1817 }
1818 
1820  return ts_lexer_included_ranges(&self->lexer, count);
1821 }
1822 
1824  if (self->language && self->language->external_scanner.deserialize) {
1825  self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
1826  }
1827 
1828  if (self->old_tree.ptr) {
1829  ts_subtree_release(&self->tree_pool, self->old_tree);
1830  self->old_tree = NULL_SUBTREE;
1831  }
1832 
1833  reusable_node_clear(&self->reusable_node);
1834  ts_lexer_reset(&self->lexer, length_zero());
1835  ts_stack_clear(self->stack);
1837  if (self->finished_tree.ptr) {
1838  ts_subtree_release(&self->tree_pool, self->finished_tree);
1839  self->finished_tree = NULL_SUBTREE;
1840  }
1841  self->accept_count = 0;
1842 }
1843 
1845  TSParser *self,
1846  const TSTree *old_tree,
1847  TSInput input
1848 ) {
1849  if (!self->language || !input.read) return NULL;
1850 
1851  ts_lexer_set_input(&self->lexer, input);
1852 
1853  array_clear(&self->included_range_differences);
1854  self->included_range_difference_index = 0;
1855 
1856  if (ts_parser_has_outstanding_parse(self)) {
1857  LOG("resume_parsing");
1858  } else if (old_tree) {
1859  ts_subtree_retain(old_tree->root);
1860  self->old_tree = old_tree->root;
1862  old_tree->included_ranges, old_tree->included_range_count,
1863  self->lexer.included_ranges, self->lexer.included_range_count,
1864  &self->included_range_differences
1865  );
1866  reusable_node_reset(&self->reusable_node, old_tree->root);
1867  LOG("parse_after_edit");
1868  LOG_TREE(self->old_tree);
1869  for (unsigned i = 0; i < self->included_range_differences.size; i++) {
1870  TSRange *range = &self->included_range_differences.contents[i];
1871  LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
1872  }
1873  } else {
1874  reusable_node_clear(&self->reusable_node);
1875  LOG("new_parse");
1876  }
1877 
1878  self->operation_count = 0;
1879  if (self->timeout_duration) {
1880  self->end_clock = clock_after(clock_now(), self->timeout_duration);
1881  } else {
1882  self->end_clock = clock_null();
1883  }
1884 
1885  uint32_t position = 0, last_position = 0, version_count = 0;
1886  do {
1887  for (
1888  StackVersion version = 0;
1889  version_count = ts_stack_version_count(self->stack),
1890  version < version_count;
1891  version++
1892  ) {
1893  bool allow_node_reuse = version_count == 1;
1894  while (ts_stack_is_active(self->stack, version)) {
1895  LOG(
1896  "process version:%d, version_count:%u, state:%d, row:%u, col:%u",
1897  version,
1898  ts_stack_version_count(self->stack),
1899  ts_stack_state(self->stack, version),
1900  ts_stack_position(self->stack, version).extent.row,
1901  ts_stack_position(self->stack, version).extent.column
1902  );
1903 
1904  if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
1905  LOG_STACK();
1906 
1907  position = ts_stack_position(self->stack, version).bytes;
1908  if (position > last_position || (version > 0 && position == last_position)) {
1909  last_position = position;
1910  break;
1911  }
1912  }
1913  }
1914 
1915  unsigned min_error_cost = ts_parser__condense_stack(self);
1916  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
1917  break;
1918  }
1919 
1920  while (self->included_range_difference_index < self->included_range_differences.size) {
1921  TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
1922  if (range->end_byte <= position) {
1923  self->included_range_difference_index++;
1924  } else {
1925  break;
1926  }
1927  }
1928  } while (version_count != 0);
1929 
1930  ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
1931  LOG("done");
1932  LOG_TREE(self->finished_tree);
1933 
1934  TSTree *result = ts_tree_new(
1935  self->finished_tree,
1936  self->language,
1937  self->lexer.included_ranges,
1938  self->lexer.included_range_count
1939  );
1940  self->finished_tree = NULL_SUBTREE;
1941  ts_parser_reset(self);
1942  return result;
1943 }
1944 
1946  TSParser *self,
1947  const TSTree *old_tree,
1948  const char *string,
1950 ) {
1951  return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
1952 }
1953 
1955  TSParser *self,
1956  const TSTree *old_tree,
1957  const char *string,
1958  uint32_t length,
1960 ) {
1961  TSStringInput input = {string, length};
1962  return ts_parser_parse(self, old_tree, (TSInput) {
1963  &input,
1965  encoding,
1966  });
1967 }
1968 
1969 #undef LOG
static char * version
Definition: acr.h:4
#define ts_free
Definition: alloc.h:30
#define ts_calloc
Definition: alloc.h:24
lzma_index ** i
Definition: index.h:629
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
Definition: api.h:30
@ TSLogTypeParse
Definition: api.h:74
#define TREE_SITTER_LANGUAGE_VERSION
Definition: api.h:24
TSInputEncoding
Definition: api.h:44
@ TSInputEncodingUTF8
Definition: api.h:45
static RZ_NULLABLE RzILOpBitVector * shift(RzILOpBitVector *val, RZ_NULLABLE RzILOpBool **carry_out, arm_shifter type, RZ_OWN RzILOpBitVector *dist)
Definition: arm_il32.c:190
#define array_new()
Definition: array.h:25
#define array_init(self)
Definition: array.h:22
#define array_front(self)
Definition: array.h:31
#define array_erase(self, index)
Definition: array.h:79
#define array_assign(self, other)
Definition: array.h:84
#define array_push(self, element)
Definition: array.h:43
#define array_reserve(self, new_capacity)
Definition: array.h:37
#define array_swap(self, other)
Definition: array.h:87
#define array_delete(self)
Definition: array.h:41
#define array_clear(self)
Definition: array.h:35
#define array_splice(self, index, old_count, new_count, new_contents)
Definition: array.h:68
static ut8 bytes[32]
Definition: asm_arc.c:23
uint64_t TSClock
Definition: clock.h:109
static TSClock clock_null(void)
Definition: clock.h:119
uint64_t TSDuration
Definition: clock.h:6
static TSDuration duration_from_micros(uint64_t micros)
Definition: clock.h:111
static uint64_t duration_to_micros(TSDuration self)
Definition: clock.h:115
static bool clock_is_gt(TSClock self, TSClock other)
Definition: clock.h:135
static TSClock clock_after(TSClock base, TSDuration duration)
Definition: clock.h:127
static TSClock clock_now(void)
Definition: clock.h:123
static bool clock_is_null(TSClock self)
Definition: clock.h:131
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
const char * k
Definition: dsignal.c:11
const char * v
Definition: dsignal.c:12
int root
Definition: enough.c:226
#define ERROR_COST_PER_SKIPPED_TREE
Definition: error_costs.h:7
#define ERROR_COST_PER_SKIPPED_LINE
Definition: error_costs.h:8
#define ERROR_STATE
Definition: error_costs.h:4
#define ERROR_COST_PER_SKIPPED_CHAR
Definition: error_costs.h:9
void ts_range_array_get_changed_ranges(const TSRange *old_ranges, unsigned old_range_count, const TSRange *new_ranges, unsigned new_range_count, TSRangeArray *differences)
bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, uint32_t start_byte, uint32_t end_byte)
voidpf void uLong size
Definition: ioapi.h:138
void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result)
Definition: language.c:18
static const TSParseAction * ts_language_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol, uint32_t *count)
Definition: language.h:45
static bool ts_language_has_reduce_action(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:57
static TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:181
static bool ts_language_has_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:96
#define ts_builtin_sym_error_repeat
Definition: language.h:11
static const bool * ts_language_enabled_external_tokens(const TSLanguage *self, unsigned external_scanner_state)
Definition: language.h:216
static Length length_zero(void)
Definition: length.h:39
static Length length_sub(Length len1, Length len2)
Definition: length.h:32
void ts_lexer_reset(Lexer *self, Length position)
Definition: lexer.c:316
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte)
Definition: lexer.c:337
void ts_lexer_mark_end(Lexer *self)
Definition: lexer.c:363
void ts_lexer_set_input(Lexer *self, TSInput input)
Definition: lexer.c:308
void ts_lexer_delete(Lexer *self)
Definition: lexer.c:304
void ts_lexer_start(Lexer *self)
Definition: lexer.c:322
void ts_lexer_init(Lexer *self)
Definition: lexer.c:275
TSRange * ts_lexer_included_ranges(const Lexer *self, uint32_t *count)
Definition: lexer.c:395
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count)
Definition: lexer.c:367
static const char struct stat static buf struct stat static buf static vhangup int status
Definition: sflib.h:145
assert(limit<=UINT32_MAX/2)
int n
Definition: mipsasm.c:19
int type
Definition: mipsasm.c:17
string FILE
Definition: benchmark.py:21
static void ts_reduce_action_set_add(ReduceActionSet *self, ReduceAction new_action)
Definition: reduce_action.h:20
static void reusable_node_advance(ReusableNode *self)
Definition: reusable_node.h:39
static uint32_t reusable_node_byte_offset(ReusableNode *self)
Definition: reusable_node.h:29
static bool reusable_node_descend(ReusableNode *self)
Definition: reusable_node.h:62
static Subtree reusable_node_tree(ReusableNode *self)
Definition: reusable_node.h:23
static void reusable_node_advance_past_leaf(ReusableNode *self)
Definition: reusable_node.h:76
static void reusable_node_delete(ReusableNode *self)
Definition: reusable_node.h:35
static void reusable_node_reset(ReusableNode *self, Subtree tree)
Definition: reusable_node.h:81
static void reusable_node_clear(ReusableNode *self)
Definition: reusable_node.h:18
static ReusableNode reusable_node_new(void)
Definition: reusable_node.h:14
uint16_t TSStateId
Definition: parser.h:16
#define ts_builtin_sym_end
Definition: parser.h:13
@ TSParseActionTypeAccept
Definition: parser.h:56
@ TSParseActionTypeShift
Definition: parser.h:54
@ TSParseActionTypeRecover
Definition: parser.h:57
@ TSParseActionTypeReduce
Definition: parser.h:55
uint16_t TSSymbol
Definition: parser.h:19
#define ts_builtin_sym_error
Definition: parser.h:12
unsigned short uint16_t
Definition: sftypes.h:30
int int32_t
Definition: sftypes.h:33
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
#define b(i)
Definition: sha256.c:42
#define c(i)
Definition: sha256.c:43
#define a(i)
Definition: sha256.c:41
#define STACK_VERSION_NONE
Definition: stack.h:16
unsigned StackVersion
Definition: stack.h:15
#define UINT32_MAX
unsigned cost
Definition: parser.c:111
unsigned node_count
Definition: parser.c:112
bool is_in_error
Definition: parser.c:114
int dynamic_precedence
Definition: parser.c:113
uint32_t length
Definition: subtree.h:36
Definition: length.h:9
uint32_t bytes
Definition: length.h:10
TSPoint extent
Definition: length.h:11
Definition: lexer.h:13
SubtreeArray subtrees
Definition: stack.h:19
StackVersion version
Definition: stack.h:20
Definition: stack.c:63
int32_t dynamic_precedence
Definition: subtree.h:139
bool fragile_right
Definition: subtree.h:125
bool fragile_left
Definition: subtree.h:124
TSStateId parse_state
Definition: subtree.h:119
ExternalScannerState external_scanner_state
Definition: subtree.h:148
uint16_t production_id
Definition: subtree.h:140
Definition: api.h:67
void *(* create)(void)
Definition: parser.h:120
struct TSLanguage::@436 external_scanner
uint32_t version
Definition: parser.h:91
uint16_t external_lex_state
Definition: parser.h:79
uint16_t lex_state
Definition: parser.h:78
Definition: api.h:78
unsigned included_range_difference_index
Definition: parser.c:107
const TSLanguage * language
Definition: parser.c:90
Subtree old_tree
Definition: parser.c:105
SubtreeArray trailing_extras
Definition: parser.c:93
unsigned operation_count
Definition: parser.c:103
SubtreePool tree_pool
Definition: parser.c:89
const volatile size_t * cancellation_flag
Definition: parser.c:104
Subtree finished_tree
Definition: parser.c:92
FILE * dot_graph_file
Definition: parser.c:99
void * external_scanner_payload
Definition: parser.c:98
SubtreeArray scratch_trees
Definition: parser.c:95
TSDuration timeout_duration
Definition: parser.c:101
TSRangeArray included_range_differences
Definition: parser.c:106
ReusableNode reusable_node
Definition: parser.c:97
SubtreeArray trailing_extras2
Definition: parser.c:94
Stack * stack
Definition: parser.c:88
TokenCache token_cache
Definition: parser.c:96
ReduceActionSet reduce_actions
Definition: parser.c:91
unsigned accept_count
Definition: parser.c:102
Lexer lexer
Definition: parser.c:87
TSClock end_clock
Definition: parser.c:100
Definition: api.h:55
uint32_t row
Definition: api.h:56
uint32_t column
Definition: api.h:57
Definition: api.h:60
uint32_t length
Definition: parser.c:127
const char * string
Definition: parser.c:126
Definition: tree.h:15
Subtree root
Definition: tree.h:16
TSRange * included_ranges
Definition: tree.h:18
unsigned included_range_count
Definition: tree.h:19
bool is_reusable
Definition: language.h:16
uint32_t action_count
Definition: language.h:15
const TSParseAction * actions
Definition: language.h:14
Subtree last_external_token
Definition: parser.c:82
Subtree token
Definition: parser.c:81
uint32_t byte_index
Definition: parser.c:83
Definition: zipcmp.c:77
Definition: dis.h:43
#define UINT_MAX
Definition: md5.h:55
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2)
Definition: stack.c:693
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2)
Definition: stack.c:648
int ts_stack_dynamic_precedence(Stack *self, StackVersion version)
Definition: stack.c:615
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2)
Definition: stack.c:663
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth)
Definition: stack.c:596
StackSummary * ts_stack_get_summary(Stack *self, StackVersion version)
Definition: stack.c:611
void ts_stack_halt(Stack *self, StackVersion version)
Definition: stack.c:705
void ts_stack_clear(Stack *self)
Definition: stack.c:737
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version)
Definition: stack.c:619
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead)
Definition: stack.c:709
Subtree ts_stack_resume(Stack *self, StackVersion version)
Definition: stack.c:728
uint32_t ts_stack_version_count(const Stack *self)
Definition: stack.c:443
TSStateId ts_stack_state(const Stack *self, StackVersion version)
Definition: stack.c:447
bool ts_stack_is_active(const Stack *self, StackVersion version)
Definition: stack.c:716
Length ts_stack_position(const Stack *self, StackVersion version)
Definition: stack.c:451
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count)
Definition: stack.c:507
void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state)
Definition: stack.c:485
void ts_stack_remove_version(Stack *self, StackVersion version)
Definition: stack.c:643
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version)
Definition: stack.c:477
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2)
Definition: stack.c:679
StackVersion ts_stack_copy_version(Stack *self, StackVersion version)
Definition: stack.c:669
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version)
Definition: stack.c:524
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version)
Definition: stack.c:569
Stack * ts_stack_new(SubtreePool *subtree_pool)
Definition: stack.c:405
void ts_stack_delete(Stack *self)
Definition: stack.c:424
bool ts_stack_is_halted(const Stack *self, StackVersion version)
Definition: stack.c:720
unsigned ts_stack_error_cost(const Stack *self, StackVersion version)
Definition: stack.c:466
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version)
Definition: stack.c:547
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version)
Definition: stack.c:455
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token)
Definition: stack.c:459
bool ts_stack_is_paused(const Stack *self, StackVersion version)
Definition: stack.c:724
TSTree * ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count)
Definition: tree.c:8
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other)
Definition: subtree.c:1027
MutableSubtree ts_subtree_new_node(TSSymbol symbol, SubtreeArray *children, unsigned production_id, const TSLanguage *language)
Definition: subtree.c:500
void ts_subtree_release(SubtreePool *pool, Subtree self)
Definition: subtree.c:584
Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language)
Definition: subtree.c:167
Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, const TSLanguage *language)
Definition: subtree.c:542
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self)
Definition: subtree.c:284
void ts_subtree_retain(Subtree self)
Definition: subtree.c:577
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self)
Definition: subtree.c:90
void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination)
Definition: subtree.c:95
const char * ts_external_scanner_state_data(const ExternalScannerState *self)
Definition: subtree.c:53
void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language)
Definition: subtree.c:226
void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language)
Definition: subtree.c:338
Subtree ts_subtree_last_external_token(Subtree tree)
Definition: subtree.c:802
Subtree ts_subtree_new_error(SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language)
Definition: subtree.c:244
SubtreePool ts_subtree_pool_new(uint32_t capacity)
Definition: subtree.c:123
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length)
Definition: subtree.c:28
int ts_subtree_compare(Subtree left, Subtree right)
Definition: subtree.c:615
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, uint32_t lookahead_bytes, const TSLanguage *language)
Definition: subtree.c:558
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self)
Definition: subtree.c:83
void ts_subtree_pool_delete(SubtreePool *self)
Definition: subtree.c:129
#define ts_subtree_children(self)
Definition: subtree.h:233
static Subtree ts_subtree_from_mut(MutableSubtree self)
Definition: subtree.h:350
static bool ts_subtree_has_external_tokens(Subtree self)
Definition: subtree.h:330
static Length ts_subtree_total_size(Subtree self)
Definition: subtree.h:274
static uint32_t ts_subtree_total_bytes(Subtree self)
Definition: subtree.h:278
static uint32_t ts_subtree_error_cost(Subtree self)
Definition: subtree.h:302
#define TS_TREE_STATE_NONE
Definition: subtree.h:18
static int32_t ts_subtree_dynamic_precedence(Subtree self)
Definition: subtree.h:310
static bool ts_subtree_extra(Subtree self)
Definition: subtree.h:216
static bool ts_subtree_is_keyword(Subtree self)
Definition: subtree.h:219
static bool ts_subtree_is_error(Subtree self)
Definition: subtree.h:342
static bool ts_subtree_has_changes(Subtree self)
Definition: subtree.h:217
static bool ts_subtree_missing(Subtree self)
Definition: subtree.h:218
static Length ts_subtree_size(Subtree self)
Definition: subtree.h:265
static TSSymbol ts_subtree_leaf_symbol(Subtree self)
Definition: subtree.h:244
static bool ts_subtree_is_fragile(Subtree self)
Definition: subtree.h:338
static TSStateId ts_subtree_parse_state(Subtree self)
Definition: subtree.h:220
static uint32_t ts_subtree_lookahead_bytes(Subtree self)
Definition: subtree.h:221
static uint32_t ts_subtree_child_count(Subtree self)
Definition: subtree.h:282
static void ts_subtree_set_extra(MutableSubtree *self, bool is_extra)
Definition: subtree.h:236
#define NULL_SUBTREE
Definition: subtree.h:19
static bool ts_subtree_is_eof(Subtree self)
Definition: subtree.h:346
static TSStateId ts_subtree_leaf_parse_state(Subtree self)
Definition: subtree.h:250
static TSSymbol ts_subtree_symbol(Subtree self)
Definition: subtree.h:213
static size_t atomic_load(const volatile size_t *p)
Definition: atomic.h:40
static const unsigned MAX_VERSION_COUNT_OVERFLOW
Definition: parser.c:75
TSLogger ts_parser_logger(const TSParser *self)
Definition: parser.c:1775
static const unsigned MAX_COST_DIFFERENCE
Definition: parser.c:77
static const unsigned MAX_SUMMARY_DEPTH
Definition: parser.c:76
TSTree * ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input)
Definition: parser.c:1844
static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state)
Definition: parser.c:385
static Subtree ts_parser__get_cached_token(TSParser *self, TSStateId state, size_t position, Subtree last_external_token, TableEntry *table_entry)
Definition: parser.c:577
static void ts_parser__accept(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:920
ErrorComparison
Definition: parser.c:117
@ ErrorComparisonPreferRight
Definition: parser.c:121
@ ErrorComparisonNone
Definition: parser.c:120
@ ErrorComparisonTakeRight
Definition: parser.c:122
@ ErrorComparisonTakeLeft
Definition: parser.c:118
@ ErrorComparisonPreferLeft
Definition: parser.c:119
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK
Definition: parser.c:78
#define LOG(...)
Definition: parser.c:22
static unsigned ts_parser__condense_stack(TSParser *self)
Definition: parser.c:1595
const TSRange * ts_parser_included_ranges(const TSParser *self, uint32_t *count)
Definition: parser.c:1819
void ts_parser_reset(TSParser *self)
Definition: parser.c:1823
static bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion version)
Definition: parser.c:170
static const unsigned MAX_VERSION_COUNT
Definition: parser.c:74
static void ts_parser__handle_error(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:1290
static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry)
Definition: parser.c:350
static void ts_parser__recover(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:1122
uint64_t ts_parser_timeout_micros(const TSParser *self)
Definition: parser.c:1803
static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right)
Definition: parser.c:710
static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse)
Definition: parser.c:1387
const size_t * ts_parser_cancellation_flag(const TSParser *self)
Definition: parser.c:1795
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros)
Definition: parser.c:1807
static bool ts_parser__recover_to_state(TSParser *self, StackVersion version, unsigned depth, TSStateId goal_state)
Definition: parser.c:1063
static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol)
Definition: parser.c:973
static void ts_parser__shift(TSParser *self, StackVersion version, TSStateId state, Subtree lookahead, bool extra)
Definition: parser.c:782
#define LOG_STACK()
Definition: parser.c:58
static bool ts_parser__has_included_range_difference(const TSParser *self, uint32_t start_position, uint32_t end_position)
Definition: parser.c:614
static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b)
Definition: parser.c:240
static void ts_parser__breakdown_lookahead(TSParser *self, Subtree *lookahead, TSStateId state, ReusableNode *reusable_node)
Definition: parser.c:218
static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, TSStateId *state, uint32_t position, Subtree last_external_token, TableEntry *table_entry)
Definition: parser.c:627
static void ts_parser__set_cached_token(TSParser *self, size_t byte_index, Subtree last_external_token, Subtree token)
Definition: parser.c:598
void ts_parser_set_logger(TSParser *self, TSLogger logger)
Definition: parser.c:1779
#define LOG_LOOKAHEAD(symbol_name, size)
Definition: parser.c:28
static const char * ts_string_input_read(void *_self, uint32_t byte, TSPoint pt, uint32_t *length)
Definition: parser.c:132
bool ts_parser_set_language(TSParser *self, const TSLanguage *language)
Definition: parser.c:1754
#define TREE_NAME(tree)
Definition: parser.c:72
TSTree * ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length, TSInputEncoding encoding)
Definition: parser.c:1954
#define SYM_NAME(symbol)
Definition: parser.c:70
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag)
Definition: parser.c:1799
static ErrorStatus ts_parser__version_status(TSParser *self, StackVersion version)
Definition: parser.c:283
bool ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count)
Definition: parser.c:1811
TSParser * ts_parser_new(void)
Definition: parser.c:1704
static void ts_parser__log(TSParser *self)
Definition: parser.c:151
TSTree * ts_parser_parse_string(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length)
Definition: parser.c:1945
static bool ts_parser__better_version_exists(TSParser *self, StackVersion version, bool is_in_error, unsigned cost)
Definition: parser.c:298
static void ts_parser__restore_external_scanner(TSParser *self, Subtree external_token)
Definition: parser.c:335
static bool ts_parser__select_children(TSParser *self, Subtree left, const SubtreeArray *children)
Definition: parser.c:757
const TSLanguage * ts_parser_language(const TSParser *self)
Definition: parser.c:1750
void ts_parser_print_dot_graphs(TSParser *self, int fd)
Definition: parser.c:1783
void ts_parser_delete(TSParser *self)
Definition: parser.c:1725
#define LOG_TREE(tree)
Definition: parser.c:64
static bool ts_parser_has_outstanding_parse(TSParser *self)
Definition: parser.c:1695
static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, uint16_t production_id, bool is_fragile, bool end_of_non_terminal_extra)
Definition: parser.c:805
SubtreeHeapData * ptr
Definition: subtree.h:164
const SubtreeHeapData * ptr
Definition: subtree.h:158
SubtreeInlineData data
Definition: subtree.h:157
void error(const char *msg)
Definition: untgz.c:593
static const z80_opcode fd[]
Definition: z80_tab.h:997
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)
int summary
Definition: zipcmp.c:234