Rizin
unix-like reverse engineering framework and cli tools
parser.c File Reference
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include <limits.h>
#include <stdbool.h>
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./array.h"
#include "./atomic.h"
#include "./clock.h"
#include "./error_costs.h"
#include "./get_changed_ranges.h"
#include "./language.h"
#include "./length.h"
#include "./lexer.h"
#include "./reduce_action.h"
#include "./reusable_node.h"
#include "./stack.h"
#include "./subtree.h"
#include "./tree.h"

Go to the source code of this file.

Classes

struct  TokenCache
 
struct  TSParser
 
struct  ErrorStatus
 
struct  TSStringInput
 

Macros

#define LOG(...)
 
#define LOG_LOOKAHEAD(symbol_name, size)
 
#define LOG_STACK()
 
#define LOG_TREE(tree)
 
#define SYM_NAME(symbol)   ts_language_symbol_name(self->language, symbol)
 
#define TREE_NAME(tree)   SYM_NAME(ts_subtree_symbol(tree))
 

Enumerations

enum  ErrorComparison {
  ErrorComparisonTakeLeft , ErrorComparisonPreferLeft , ErrorComparisonNone , ErrorComparisonPreferRight ,
  ErrorComparisonTakeRight
}
 

Functions

static const char * ts_string_input_read (void *_self, uint32_t byte, TSPoint pt, uint32_t *length)
 
static void ts_parser__log (TSParser *self)
 
static bool ts_parser__breakdown_top_of_stack (TSParser *self, StackVersion version)
 
static void ts_parser__breakdown_lookahead (TSParser *self, Subtree *lookahead, TSStateId state, ReusableNode *reusable_node)
 
static ErrorComparison ts_parser__compare_versions (TSParser *self, ErrorStatus a, ErrorStatus b)
 
static ErrorStatus ts_parser__version_status (TSParser *self, StackVersion version)
 
static bool ts_parser__better_version_exists (TSParser *self, StackVersion version, bool is_in_error, unsigned cost)
 
static void ts_parser__restore_external_scanner (TSParser *self, Subtree external_token)
 
static bool ts_parser__can_reuse_first_leaf (TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry)
 
static Subtree ts_parser__lex (TSParser *self, StackVersion version, TSStateId parse_state)
 
static Subtree ts_parser__get_cached_token (TSParser *self, TSStateId state, size_t position, Subtree last_external_token, TableEntry *table_entry)
 
static void ts_parser__set_cached_token (TSParser *self, size_t byte_index, Subtree last_external_token, Subtree token)
 
static bool ts_parser__has_included_range_difference (const TSParser *self, uint32_t start_position, uint32_t end_position)
 
static Subtree ts_parser__reuse_node (TSParser *self, StackVersion version, TSStateId *state, uint32_t position, Subtree last_external_token, TableEntry *table_entry)
 
static bool ts_parser__select_tree (TSParser *self, Subtree left, Subtree right)
 
static bool ts_parser__select_children (TSParser *self, Subtree left, const SubtreeArray *children)
 
static void ts_parser__shift (TSParser *self, StackVersion version, TSStateId state, Subtree lookahead, bool extra)
 
static StackVersion ts_parser__reduce (TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, uint16_t production_id, bool is_fragile, bool end_of_non_terminal_extra)
 
static void ts_parser__accept (TSParser *self, StackVersion version, Subtree lookahead)
 
static bool ts_parser__do_all_potential_reductions (TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol)
 
static bool ts_parser__recover_to_state (TSParser *self, StackVersion version, unsigned depth, TSStateId goal_state)
 
static void ts_parser__recover (TSParser *self, StackVersion version, Subtree lookahead)
 
static void ts_parser__handle_error (TSParser *self, StackVersion version, Subtree lookahead)
 
static bool ts_parser__advance (TSParser *self, StackVersion version, bool allow_node_reuse)
 
static unsigned ts_parser__condense_stack (TSParser *self)
 
static bool ts_parser_has_outstanding_parse (TSParser *self)
 
TSParserts_parser_new (void)
 
void ts_parser_delete (TSParser *self)
 
const TSLanguagets_parser_language (const TSParser *self)
 
bool ts_parser_set_language (TSParser *self, const TSLanguage *language)
 
TSLogger ts_parser_logger (const TSParser *self)
 
void ts_parser_set_logger (TSParser *self, TSLogger logger)
 
void ts_parser_print_dot_graphs (TSParser *self, int fd)
 
const size_tts_parser_cancellation_flag (const TSParser *self)
 
void ts_parser_set_cancellation_flag (TSParser *self, const size_t *flag)
 
uint64_t ts_parser_timeout_micros (const TSParser *self)
 
void ts_parser_set_timeout_micros (TSParser *self, uint64_t timeout_micros)
 
bool ts_parser_set_included_ranges (TSParser *self, const TSRange *ranges, uint32_t count)
 
const TSRangets_parser_included_ranges (const TSParser *self, uint32_t *count)
 
void ts_parser_reset (TSParser *self)
 
TSTreets_parser_parse (TSParser *self, const TSTree *old_tree, TSInput input)
 
TSTreets_parser_parse_string (TSParser *self, const TSTree *old_tree, const char *string, uint32_t length)
 
TSTreets_parser_parse_string_encoding (TSParser *self, const TSTree *old_tree, const char *string, uint32_t length, TSInputEncoding encoding)
 

Variables

static const unsigned MAX_VERSION_COUNT = 6
 
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4
 
static const unsigned MAX_SUMMARY_DEPTH = 16
 
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE
 
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100
 

Macro Definition Documentation

◆ LOG

#define LOG (   ...)
Value:
if (self->lexer.logger.log || self->dot_graph_file) { \
snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
ts_parser__log(self); \
}
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE
Definition: parser.h:14

Definition at line 22 of file parser.c.

◆ LOG_LOOKAHEAD

#define LOG_LOOKAHEAD (   symbol_name,
  size 
)
Value:
if (self->lexer.logger.log || self->dot_graph_file) { \
char *buf = self->lexer.debug_buffer; \
const char *symbol = symbol_name; \
int off = sprintf(buf, "lexed_lookahead sym:"); \
for ( \
int i = 0; \
symbol[i] != '\0' \
i++ \
) { \
switch (symbol[i]) { \
case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \
case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \
case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \
case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \
case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \
case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
default: buf[off++] = symbol[i]; break; \
} \
} \
snprintf( \
buf + off, \
", size:%u", \
size \
); \
ts_parser__log(self); \
}
lzma_index ** i
Definition: index.h:629
voidpf void uLong size
Definition: ioapi.h:138
voidpf void * buf
Definition: ioapi.h:138
sprintf
Definition: kernel.h:365
int off
Definition: pal.c:13

Definition at line 28 of file parser.c.

◆ LOG_STACK

#define LOG_STACK ( )
Value:
if (self->dot_graph_file) { \
ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
fputs("\n\n", self->dot_graph_file); \
}

Definition at line 58 of file parser.c.

◆ LOG_TREE

#define LOG_TREE (   tree)
Value:
if (self->dot_graph_file) { \
ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
fputs("\n", self->dot_graph_file); \
}

Definition at line 64 of file parser.c.

◆ SYM_NAME

#define SYM_NAME (   symbol)    ts_language_symbol_name(self->language, symbol)

Definition at line 70 of file parser.c.

◆ TREE_NAME

#define TREE_NAME (   tree)    SYM_NAME(ts_subtree_symbol(tree))

Definition at line 72 of file parser.c.

Enumeration Type Documentation

◆ ErrorComparison

Enumerator
ErrorComparisonTakeLeft 
ErrorComparisonPreferLeft 
ErrorComparisonNone 
ErrorComparisonPreferRight 
ErrorComparisonTakeRight 

Definition at line 117 of file parser.c.

117  {
ErrorComparison
Definition: parser.c:117
@ ErrorComparisonPreferRight
Definition: parser.c:121
@ ErrorComparisonNone
Definition: parser.c:120
@ ErrorComparisonTakeRight
Definition: parser.c:122
@ ErrorComparisonTakeLeft
Definition: parser.c:118
@ ErrorComparisonPreferLeft
Definition: parser.c:119

Function Documentation

◆ ts_parser__accept()

static void ts_parser__accept ( TSParser self,
StackVersion  version,
Subtree  lookahead 
)
static

Definition at line 920 of file parser.c.

924  {
925  assert(ts_subtree_is_eof(lookahead));
926  ts_stack_push(self->stack, version, lookahead, false, 1);
927 
928  StackSliceArray pop = ts_stack_pop_all(self->stack, version);
929  for (uint32_t i = 0; i < pop.size; i++) {
930  SubtreeArray trees = pop.contents[i].subtrees;
931 
933  for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
934  Subtree tree = trees.contents[j];
935  if (!ts_subtree_extra(tree)) {
936  assert(!tree.data.is_inline);
937  uint32_t child_count = ts_subtree_child_count(tree);
938  const Subtree *children = ts_subtree_children(tree);
939  for (uint32_t k = 0; k < child_count; k++) {
940  ts_subtree_retain(children[k]);
941  }
942  array_splice(&trees, j, 1, child_count, children);
944  ts_subtree_symbol(tree),
945  &trees,
946  tree.ptr->production_id,
947  self->language
948  ));
949  ts_subtree_release(&self->tree_pool, tree);
950  break;
951  }
952  }
953 
954  assert(root.ptr);
955  self->accept_count++;
956 
957  if (self->finished_tree.ptr) {
958  if (ts_parser__select_tree(self, self->finished_tree, root)) {
960  self->finished_tree = root;
961  } else {
963  }
964  } else {
965  self->finished_tree = root;
966  }
967  }
968 
969  ts_stack_remove_version(self->stack, pop.contents[0].version);
970  ts_stack_halt(self->stack, version);
971 }
#define array_splice(self, index, old_count, new_count, new_contents)
Definition: array.h:68
const char * k
Definition: dsignal.c:11
int root
Definition: enough.c:226
assert(limit<=UINT32_MAX/2)
unsigned int uint32_t
Definition: sftypes.h:29
uint16_t production_id
Definition: subtree.h:140
const TSLanguage * language
Definition: parser.c:90
SubtreePool tree_pool
Definition: parser.c:89
Subtree finished_tree
Definition: parser.c:92
Stack * stack
Definition: parser.c:88
void ts_stack_halt(Stack *self, StackVersion version)
Definition: stack.c:705
void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state)
Definition: stack.c:485
void ts_stack_remove_version(Stack *self, StackVersion version)
Definition: stack.c:643
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version)
Definition: stack.c:569
MutableSubtree ts_subtree_new_node(TSSymbol symbol, SubtreeArray *children, unsigned production_id, const TSLanguage *language)
Definition: subtree.c:500
void ts_subtree_release(SubtreePool *pool, Subtree self)
Definition: subtree.c:584
void ts_subtree_retain(Subtree self)
Definition: subtree.c:577
#define ts_subtree_children(self)
Definition: subtree.h:233
static Subtree ts_subtree_from_mut(MutableSubtree self)
Definition: subtree.h:350
static bool ts_subtree_extra(Subtree self)
Definition: subtree.h:216
static uint32_t ts_subtree_child_count(Subtree self)
Definition: subtree.h:282
#define NULL_SUBTREE
Definition: subtree.h:19
static bool ts_subtree_is_eof(Subtree self)
Definition: subtree.h:346
static TSSymbol ts_subtree_symbol(Subtree self)
Definition: subtree.h:213
static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right)
Definition: parser.c:710
const SubtreeHeapData * ptr
Definition: subtree.h:158
SubtreeInlineData data
Definition: subtree.h:157

References array_splice, assert(), Subtree::data, i, SubtreeInlineData::is_inline, k, NULL_SUBTREE, SubtreeHeapData::production_id, Subtree::ptr, root, ts_parser__select_tree(), ts_stack_halt(), ts_stack_pop_all(), ts_stack_push(), ts_stack_remove_version(), ts_subtree_child_count(), ts_subtree_children, ts_subtree_extra(), ts_subtree_from_mut(), ts_subtree_is_eof(), ts_subtree_new_node(), ts_subtree_release(), ts_subtree_retain(), and ts_subtree_symbol().

Referenced by ts_parser__advance(), and ts_parser__recover().

◆ ts_parser__advance()

static bool ts_parser__advance ( TSParser self,
StackVersion  version,
bool  allow_node_reuse 
)
static

Definition at line 1387 of file parser.c.

1391  {
1393  uint32_t position = ts_stack_position(self->stack, version).bytes;
1394  Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1395 
1396  bool did_reuse = true;
1397  Subtree lookahead = NULL_SUBTREE;
1398  TableEntry table_entry = {.action_count = 0};
1399 
1400  // If possible, reuse a node from the previous syntax tree.
1401  if (allow_node_reuse) {
1402  lookahead = ts_parser__reuse_node(
1403  self, version, &state, position, last_external_token, &table_entry
1404  );
1405  }
1406 
1407  // If no node from the previous syntax tree could be reused, then try to
1408  // reuse the token previously returned by the lexer.
1409  if (!lookahead.ptr) {
1410  did_reuse = false;
1411  lookahead = ts_parser__get_cached_token(
1412  self, state, position, last_external_token, &table_entry
1413  );
1414  }
1415 
1416  bool needs_lex = !lookahead.ptr;
1417  for (;;) {
1418  // Otherwise, re-run the lexer.
1419  if (needs_lex) {
1420  needs_lex = false;
1421  lookahead = ts_parser__lex(self, version, state);
1422 
1423  if (lookahead.ptr) {
1424  ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1425  ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1426  }
1427 
1428  // When parsing a non-terminal extra, a null lookahead indicates the
1429  // end of the rule. The reduction is stored in the EOF table entry.
1430  // After the reduction, the lexer needs to be run again.
1431  else {
1433  }
1434  }
1435 
1436  // If a cancellation flag or a timeout was provided, then check every
1437  // time a fixed number of parse actions has been processed.
1439  self->operation_count = 0;
1440  }
1441  if (
1442  self->operation_count == 0 &&
1443  ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1444  (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1445  ) {
1446  ts_subtree_release(&self->tree_pool, lookahead);
1447  return false;
1448  }
1449 
1450  // Process each parse action for the current lookahead token in
1451  // the current state. If there are multiple actions, then this is
1452  // an ambiguous state. REDUCE actions always create a new stack
1453  // version, whereas SHIFT actions update the existing stack version
1454  // and terminate this loop.
1455  StackVersion last_reduction_version = STACK_VERSION_NONE;
1456  for (uint32_t i = 0; i < table_entry.action_count; i++) {
1457  TSParseAction action = table_entry.actions[i];
1458 
1459  switch (action.type) {
1460  case TSParseActionTypeShift: {
1461  if (action.shift.repetition) break;
1462  TSStateId next_state;
1463  if (action.shift.extra) {
1464  next_state = state;
1465  LOG("shift_extra");
1466  } else {
1467  next_state = action.shift.state;
1468  LOG("shift state:%u", next_state);
1469  }
1470 
1471  if (ts_subtree_child_count(lookahead) > 0) {
1472  ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1473  next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1474  }
1475 
1476  ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1477  if (did_reuse) reusable_node_advance(&self->reusable_node);
1478  return true;
1479  }
1480 
1481  case TSParseActionTypeReduce: {
1482  bool is_fragile = table_entry.action_count > 1;
1483  bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1484  LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1485  StackVersion reduction_version = ts_parser__reduce(
1486  self, version, action.reduce.symbol, action.reduce.child_count,
1487  action.reduce.dynamic_precedence, action.reduce.production_id,
1488  is_fragile, end_of_non_terminal_extra
1489  );
1490  if (reduction_version != STACK_VERSION_NONE) {
1491  last_reduction_version = reduction_version;
1492  }
1493  break;
1494  }
1495 
1496  case TSParseActionTypeAccept: {
1497  LOG("accept");
1498  ts_parser__accept(self, version, lookahead);
1499  return true;
1500  }
1501 
1502  case TSParseActionTypeRecover: {
1503  if (ts_subtree_child_count(lookahead) > 0) {
1504  ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1505  }
1506 
1507  ts_parser__recover(self, version, lookahead);
1508  if (did_reuse) reusable_node_advance(&self->reusable_node);
1509  return true;
1510  }
1511  }
1512  }
1513 
1514  // If a reduction was performed, then replace the current stack version
1515  // with one of the stack versions created by a reduction, and continue
1516  // processing this version of the stack with the same lookahead symbol.
1517  if (last_reduction_version != STACK_VERSION_NONE) {
1518  ts_stack_renumber_version(self->stack, last_reduction_version, version);
1519  LOG_STACK();
1520  state = ts_stack_state(self->stack, version);
1521 
1522  // At the end of a non-terminal extra rule, the lexer will return a
1523  // null subtree, because the parser needs to perform a fixed reduction
1524  // regardless of the lookahead node. After performing that reduction,
1525  // (and completing the non-terminal extra rule) run the lexer again based
1526  // on the current parse state.
1527  if (!lookahead.ptr) {
1528  needs_lex = true;
1529  } else {
1531  self->language,
1532  state,
1533  ts_subtree_leaf_symbol(lookahead),
1534  &table_entry
1535  );
1536  }
1537 
1538  continue;
1539  }
1540 
1541  // If there were no parse actions for the current lookahead token, then
1542  // it is not valid in this state. If the current lookahead token is a
1543  // keyword, then switch to treating it as the normal word token if that
1544  // token is valid in this state.
1545  if (
1546  ts_subtree_is_keyword(lookahead) &&
1547  ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
1548  ) {
1550  if (table_entry.action_count > 0) {
1551  LOG(
1552  "switch from_keyword:%s, to_word_token:%s",
1553  TREE_NAME(lookahead),
1555  );
1556 
1557  MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1558  ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1559  lookahead = ts_subtree_from_mut(mutable_lookahead);
1560  continue;
1561  }
1562  }
1563 
1564  // If the current lookahead token is not valid and the parser is
1565  // already in the error state, restart the error recovery process.
1566  // TODO - can this be unified with the other `RECOVER` case above?
1567  if (state == ERROR_STATE) {
1568  ts_parser__recover(self, version, lookahead);
1569  return true;
1570  }
1571 
1572  // If the current lookahead token is not valid and the previous
1573  // subtree on the stack was reused from an old tree, it isn't actually
1574  // valid to reuse it. Remove it from the stack, and in its place,
1575  // push each of its children. Then try again to process the current
1576  // lookahead.
1578  state = ts_stack_state(self->stack, version);
1579  ts_subtree_release(&self->tree_pool, lookahead);
1580  needs_lex = true;
1581  continue;
1582  }
1583 
1584  // At this point, the current lookahead token is definitely not valid
1585  // for this parse stack version. Mark this version as paused and continue
1586  // processing any other stack versions that might exist. If some other
1587  // version advances successfully, then this version can simply be removed.
1588  // But if all versions end up paused, then error recovery is needed.
1589  LOG("detect_error");
1590  ts_stack_pause(self->stack, version, lookahead);
1591  return true;
1592  }
1593 }
static bool clock_is_gt(TSClock self, TSClock other)
Definition: clock.h:135
static TSClock clock_now(void)
Definition: clock.h:123
static bool clock_is_null(TSClock self)
Definition: clock.h:131
#define NULL
Definition: cris-opc.c:27
#define ERROR_STATE
Definition: error_costs.h:4
void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result)
Definition: language.c:18
static TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:181
static void reusable_node_advance(ReusableNode *self)
Definition: reusable_node.h:39
uint16_t TSStateId
Definition: parser.h:16
#define ts_builtin_sym_end
Definition: parser.h:13
@ TSParseActionTypeAccept
Definition: parser.h:56
@ TSParseActionTypeShift
Definition: parser.h:54
@ TSParseActionTypeRecover
Definition: parser.h:57
@ TSParseActionTypeReduce
Definition: parser.h:55
#define STACK_VERSION_NONE
Definition: stack.h:16
unsigned StackVersion
Definition: stack.h:15
uint32_t bytes
Definition: length.h:10
TSSymbol keyword_capture_token
Definition: parser.h:116
unsigned operation_count
Definition: parser.c:103
const volatile size_t * cancellation_flag
Definition: parser.c:104
ReusableNode reusable_node
Definition: parser.c:97
TSClock end_clock
Definition: parser.c:100
uint32_t action_count
Definition: language.h:15
const TSParseAction * actions
Definition: language.h:14
Definition: dis.h:43
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2)
Definition: stack.c:648
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead)
Definition: stack.c:709
TSStateId ts_stack_state(const Stack *self, StackVersion version)
Definition: stack.c:447
Length ts_stack_position(const Stack *self, StackVersion version)
Definition: stack.c:451
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version)
Definition: stack.c:455
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self)
Definition: subtree.c:284
void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language)
Definition: subtree.c:226
static bool ts_subtree_is_keyword(Subtree self)
Definition: subtree.h:219
static TSSymbol ts_subtree_leaf_symbol(Subtree self)
Definition: subtree.h:244
static size_t atomic_load(const volatile size_t *p)
Definition: atomic.h:40
static Subtree ts_parser__lex(TSParser *self, StackVersion version, TSStateId parse_state)
Definition: parser.c:385
static Subtree ts_parser__get_cached_token(TSParser *self, TSStateId state, size_t position, Subtree last_external_token, TableEntry *table_entry)
Definition: parser.c:577
static void ts_parser__accept(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:920
static const unsigned OP_COUNT_PER_TIMEOUT_CHECK
Definition: parser.c:78
#define LOG(...)
Definition: parser.c:22
static bool ts_parser__breakdown_top_of_stack(TSParser *self, StackVersion version)
Definition: parser.c:170
static void ts_parser__recover(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:1122
static void ts_parser__shift(TSParser *self, StackVersion version, TSStateId state, Subtree lookahead, bool extra)
Definition: parser.c:782
#define LOG_STACK()
Definition: parser.c:58
static void ts_parser__breakdown_lookahead(TSParser *self, Subtree *lookahead, TSStateId state, ReusableNode *reusable_node)
Definition: parser.c:218
static Subtree ts_parser__reuse_node(TSParser *self, StackVersion version, TSStateId *state, uint32_t position, Subtree last_external_token, TableEntry *table_entry)
Definition: parser.c:627
static void ts_parser__set_cached_token(TSParser *self, size_t byte_index, Subtree last_external_token, Subtree token)
Definition: parser.c:598
#define TREE_NAME(tree)
Definition: parser.c:72
#define SYM_NAME(symbol)
Definition: parser.c:70
static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, uint16_t production_id, bool is_fragile, bool end_of_non_terminal_extra)
Definition: parser.c:805

References test-lz4-speed::action, TableEntry::action_count, TableEntry::actions, atomic_load(), Length::bytes, clock_is_gt(), clock_is_null(), clock_now(), ERROR_STATE, i, LOG, LOG_STACK, NULL, NULL_SUBTREE, OP_COUNT_PER_TIMEOUT_CHECK, Subtree::ptr, reusable_node_advance(), STACK_VERSION_NONE, SYM_NAME, TREE_NAME, ts_builtin_sym_end, ts_language_next_state(), ts_language_table_entry(), ts_parser__accept(), ts_parser__breakdown_lookahead(), ts_parser__breakdown_top_of_stack(), ts_parser__get_cached_token(), ts_parser__lex(), ts_parser__recover(), ts_parser__reduce(), ts_parser__reuse_node(), ts_parser__set_cached_token(), ts_parser__shift(), ts_stack_last_external_token(), ts_stack_pause(), ts_stack_position(), ts_stack_renumber_version(), ts_stack_state(), ts_subtree_child_count(), ts_subtree_from_mut(), ts_subtree_is_keyword(), ts_subtree_leaf_symbol(), ts_subtree_make_mut(), ts_subtree_release(), ts_subtree_set_symbol(), ts_subtree_symbol(), TSParseActionTypeAccept, TSParseActionTypeRecover, TSParseActionTypeReduce, and TSParseActionTypeShift.

Referenced by ts_parser_parse().

◆ ts_parser__better_version_exists()

static bool ts_parser__better_version_exists ( TSParser self,
StackVersion  version,
bool  is_in_error,
unsigned  cost 
)
static

Definition at line 298 of file parser.c.

303  {
304  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
305  return true;
306  }
307 
308  Length position = ts_stack_position(self->stack, version);
309  ErrorStatus status = {
310  .cost = cost,
311  .is_in_error = is_in_error,
312  .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
313  .node_count = ts_stack_node_count_since_error(self->stack, version),
314  };
315 
316  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
317  if (i == version ||
318  !ts_stack_is_active(self->stack, i) ||
319  ts_stack_position(self->stack, i).bytes < position.bytes) continue;
320  ErrorStatus status_i = ts_parser__version_status(self, i);
321  switch (ts_parser__compare_versions(self, status, status_i)) {
323  return true;
325  if (ts_stack_can_merge(self->stack, i, version)) return true;
326  break;
327  default:
328  break;
329  }
330  }
331 
332  return false;
333 }
static const char struct stat static buf struct stat static buf static vhangup int status
Definition: sflib.h:145
int n
Definition: mipsasm.c:19
Definition: length.h:9
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2)
Definition: stack.c:693
int ts_stack_dynamic_precedence(Stack *self, StackVersion version)
Definition: stack.c:615
uint32_t ts_stack_version_count(const Stack *self)
Definition: stack.c:443
bool ts_stack_is_active(const Stack *self, StackVersion version)
Definition: stack.c:716
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version)
Definition: stack.c:477
static uint32_t ts_subtree_error_cost(Subtree self)
Definition: subtree.h:302
static ErrorComparison ts_parser__compare_versions(TSParser *self, ErrorStatus a, ErrorStatus b)
Definition: parser.c:240
static ErrorStatus ts_parser__version_status(TSParser *self, StackVersion version)
Definition: parser.c:283

References Length::bytes, ErrorComparisonPreferRight, ErrorComparisonTakeRight, i, n, status, ts_parser__compare_versions(), ts_parser__version_status(), ts_stack_can_merge(), ts_stack_dynamic_precedence(), ts_stack_is_active(), ts_stack_node_count_since_error(), ts_stack_position(), ts_stack_version_count(), and ts_subtree_error_cost().

Referenced by ts_parser__recover().

◆ ts_parser__breakdown_lookahead()

static void ts_parser__breakdown_lookahead ( TSParser self,
Subtree lookahead,
TSStateId  state,
ReusableNode reusable_node 
)
static

Definition at line 218 of file parser.c.

223  {
224  bool did_descend = false;
225  Subtree tree = reusable_node_tree(reusable_node);
226  while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
227  LOG("state_mismatch sym:%s", TREE_NAME(tree));
228  reusable_node_descend(reusable_node);
229  tree = reusable_node_tree(reusable_node);
230  did_descend = true;
231  }
232 
233  if (did_descend) {
234  ts_subtree_release(&self->tree_pool, *lookahead);
235  *lookahead = tree;
236  ts_subtree_retain(*lookahead);
237  }
238 }
static bool reusable_node_descend(ReusableNode *self)
Definition: reusable_node.h:62
static Subtree reusable_node_tree(ReusableNode *self)
Definition: reusable_node.h:23
static TSStateId ts_subtree_parse_state(Subtree self)
Definition: subtree.h:220

References LOG, reusable_node_descend(), reusable_node_tree(), TREE_NAME, ts_subtree_child_count(), ts_subtree_parse_state(), ts_subtree_release(), and ts_subtree_retain().

Referenced by ts_parser__advance(), and ts_parser__handle_error().

◆ ts_parser__breakdown_top_of_stack()

static bool ts_parser__breakdown_top_of_stack ( TSParser self,
StackVersion  version 
)
static

Definition at line 170 of file parser.c.

173  {
174  bool did_break_down = false;
175  bool pending = false;
176 
177  do {
178  StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
179  if (!pop.size) break;
180 
181  did_break_down = true;
182  pending = false;
183  for (uint32_t i = 0; i < pop.size; i++) {
184  StackSlice slice = pop.contents[i];
185  TSStateId state = ts_stack_state(self->stack, slice.version);
186  Subtree parent = *array_front(&slice.subtrees);
187 
188  for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
189  Subtree child = ts_subtree_children(parent)[j];
190  pending = ts_subtree_child_count(child) > 0;
191 
192  if (ts_subtree_is_error(child)) {
193  state = ERROR_STATE;
194  } else if (!ts_subtree_extra(child)) {
196  }
197 
198  ts_subtree_retain(child);
199  ts_stack_push(self->stack, slice.version, child, pending, state);
200  }
201 
202  for (uint32_t j = 1; j < slice.subtrees.size; j++) {
203  Subtree tree = slice.subtrees.contents[j];
204  ts_stack_push(self->stack, slice.version, tree, false, state);
205  }
206 
207  ts_subtree_release(&self->tree_pool, parent);
208  array_delete(&slice.subtrees);
209 
210  LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
211  LOG_STACK();
212  }
213  } while (pending);
214 
215  return did_break_down;
216 }
#define array_front(self)
Definition: array.h:31
#define array_delete(self)
Definition: array.h:41
SubtreeArray subtrees
Definition: stack.h:19
StackVersion version
Definition: stack.h:20
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version)
Definition: stack.c:524
static bool ts_subtree_is_error(Subtree self)
Definition: subtree.h:342

References array_delete, array_front, ERROR_STATE, i, LOG, LOG_STACK, n, StackSlice::subtrees, TREE_NAME, ts_language_next_state(), ts_stack_pop_pending(), ts_stack_push(), ts_stack_state(), ts_subtree_child_count(), ts_subtree_children, ts_subtree_extra(), ts_subtree_is_error(), ts_subtree_release(), ts_subtree_retain(), ts_subtree_symbol(), and StackSlice::version.

Referenced by ts_parser__advance(), and ts_parser__reuse_node().

◆ ts_parser__can_reuse_first_leaf()

static bool ts_parser__can_reuse_first_leaf ( TSParser self,
TSStateId  state,
Subtree  tree,
TableEntry table_entry 
)
static

Definition at line 350 of file parser.c.

355  {
356  TSLexMode current_lex_mode = self->language->lex_modes[state];
357  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
358  TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
359  TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
360 
361  // At the end of a non-terminal extra node, the lexer normally returns
362  // NULL, which indicates that the parser should look for a reduce action
363  // at symbol `0`. Avoid reusing tokens in this situation to ensure that
364  // the same thing happens when incrementally reparsing.
365  if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
366 
367  // If the token was created in a state with the same set of lookaheads, it is reusable.
368  if (
369  table_entry->action_count > 0 &&
370  memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
371  (
372  leaf_symbol != self->language->keyword_capture_token ||
374  )
375  ) return true;
376 
377  // Empty tokens are not reusable in states with different lookaheads.
378  if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
379 
380  // If the current state allows external tokens or other tokens that conflict with this
381  // token, this token is not reusable.
382  return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
383 }
static ut8 bytes[32]
Definition: asm_arc.c:23
uint16_t TSSymbol
Definition: parser.h:19
unsigned short uint16_t
Definition: sftypes.h:30
uint16_t external_lex_state
Definition: parser.h:79
uint16_t lex_state
Definition: parser.h:78
bool is_reusable
Definition: language.h:16
static Length ts_subtree_size(Subtree self)
Definition: subtree.h:265
static TSStateId ts_subtree_leaf_parse_state(Subtree self)
Definition: subtree.h:250

References TableEntry::action_count, bytes, TSLexMode::external_lex_state, TableEntry::is_reusable, TSLexMode::lex_state, ts_builtin_sym_end, ts_subtree_is_keyword(), ts_subtree_leaf_parse_state(), ts_subtree_leaf_symbol(), ts_subtree_parse_state(), and ts_subtree_size().

Referenced by ts_parser__get_cached_token(), and ts_parser__reuse_node().

◆ ts_parser__compare_versions()

static ErrorComparison ts_parser__compare_versions ( TSParser self,
ErrorStatus  a,
ErrorStatus  b 
)
static

Definition at line 240 of file parser.c.

244  {
245  (void)self;
246  if (!a.is_in_error && b.is_in_error) {
247  if (a.cost < b.cost) {
249  } else {
251  }
252  }
253 
254  if (a.is_in_error && !b.is_in_error) {
255  if (b.cost < a.cost) {
257  } else {
259  }
260  }
261 
262  if (a.cost < b.cost) {
263  if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
265  } else {
267  }
268  }
269 
270  if (b.cost < a.cost) {
271  if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
273  } else {
275  }
276  }
277 
278  if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
279  if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
280  return ErrorComparisonNone;
281 }
#define b(i)
Definition: sha256.c:42
#define a(i)
Definition: sha256.c:41
static const unsigned MAX_COST_DIFFERENCE
Definition: parser.c:77

References a, b, ErrorComparisonNone, ErrorComparisonPreferLeft, ErrorComparisonPreferRight, ErrorComparisonTakeLeft, ErrorComparisonTakeRight, and MAX_COST_DIFFERENCE.

Referenced by ts_parser__better_version_exists(), and ts_parser__condense_stack().

◆ ts_parser__condense_stack()

static unsigned ts_parser__condense_stack ( TSParser self)
static

Definition at line 1595 of file parser.c.

1595  {
1596  bool made_changes = false;
1597  unsigned min_error_cost = UINT_MAX;
1598  for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1599  // Prune any versions that have been marked for removal.
1600  if (ts_stack_is_halted(self->stack, i)) {
1602  i--;
1603  continue;
1604  }
1605 
1606  // Keep track of the minimum error cost of any stack version so
1607  // that it can be returned.
1608  ErrorStatus status_i = ts_parser__version_status(self, i);
1609  if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1610  min_error_cost = status_i.cost;
1611  }
1612 
1613  // Examine each pair of stack versions, removing any versions that
1614  // are clearly worse than another version. Ensure that the versions
1615  // are ordered from most promising to least promising.
1616  for (StackVersion j = 0; j < i; j++) {
1617  ErrorStatus status_j = ts_parser__version_status(self, j);
1618 
1619  switch (ts_parser__compare_versions(self, status_j, status_i)) {
1621  made_changes = true;
1623  i--;
1624  j = i;
1625  break;
1626 
1628  case ErrorComparisonNone:
1629  if (ts_stack_merge(self->stack, j, i)) {
1630  made_changes = true;
1631  i--;
1632  j = i;
1633  }
1634  break;
1635 
1637  made_changes = true;
1638  if (ts_stack_merge(self->stack, j, i)) {
1639  i--;
1640  j = i;
1641  } else {
1642  ts_stack_swap_versions(self->stack, i, j);
1643  }
1644  break;
1645 
1647  made_changes = true;
1648  ts_stack_remove_version(self->stack, j);
1649  i--;
1650  j--;
1651  break;
1652  }
1653  }
1654  }
1655 
1656  // Enfore a hard upper bound on the number of stack versions by
1657  // discarding the least promising versions.
1660  made_changes = true;
1661  }
1662 
1663  // If the best-performing stack version is currently paused, or all
1664  // versions are paused, then resume the best paused version and begin
1665  // the error recovery process. Otherwise, remove the paused versions.
1666  if (ts_stack_version_count(self->stack) > 0) {
1667  bool has_unpaused_version = false;
1668  for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1669  if (ts_stack_is_paused(self->stack, i)) {
1670  if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1671  LOG("resume version:%u", i);
1672  min_error_cost = ts_stack_error_cost(self->stack, i);
1673  Subtree lookahead = ts_stack_resume(self->stack, i);
1674  ts_parser__handle_error(self, i, lookahead);
1675  has_unpaused_version = true;
1676  } else {
1678  i--;
1679  n--;
1680  }
1681  } else {
1682  has_unpaused_version = true;
1683  }
1684  }
1685  }
1686 
1687  if (made_changes) {
1688  LOG("condense");
1689  LOG_STACK();
1690  }
1691 
1692  return min_error_cost;
1693 }
unsigned cost
Definition: parser.c:111
bool is_in_error
Definition: parser.c:114
unsigned accept_count
Definition: parser.c:102
#define UINT_MAX
Definition: md5.h:55
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2)
Definition: stack.c:663
Subtree ts_stack_resume(Stack *self, StackVersion version)
Definition: stack.c:728
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2)
Definition: stack.c:679
bool ts_stack_is_halted(const Stack *self, StackVersion version)
Definition: stack.c:720
unsigned ts_stack_error_cost(const Stack *self, StackVersion version)
Definition: stack.c:466
bool ts_stack_is_paused(const Stack *self, StackVersion version)
Definition: stack.c:724
static const unsigned MAX_VERSION_COUNT
Definition: parser.c:74
static void ts_parser__handle_error(TSParser *self, StackVersion version, Subtree lookahead)
Definition: parser.c:1290

References ErrorStatus::cost, ErrorComparisonNone, ErrorComparisonPreferLeft, ErrorComparisonPreferRight, ErrorComparisonTakeLeft, ErrorComparisonTakeRight, i, ErrorStatus::is_in_error, LOG, LOG_STACK, MAX_VERSION_COUNT, n, ts_parser__compare_versions(), ts_parser__handle_error(), ts_parser__version_status(), ts_stack_error_cost(), ts_stack_is_halted(), ts_stack_is_paused(), ts_stack_merge(), ts_stack_remove_version(), ts_stack_resume(), ts_stack_swap_versions(), ts_stack_version_count(), and UINT_MAX.

Referenced by ts_parser_parse().

◆ ts_parser__do_all_potential_reductions()

static bool ts_parser__do_all_potential_reductions ( TSParser self,
StackVersion  starting_version,
TSSymbol  lookahead_symbol 
)
static

Definition at line 973 of file parser.c.

977  {
978  uint32_t initial_version_count = ts_stack_version_count(self->stack);
979 
980  bool can_shift_lookahead_symbol = false;
981  StackVersion version = starting_version;
982  for (unsigned i = 0; true; i++) {
983  uint32_t version_count = ts_stack_version_count(self->stack);
984  if (version >= version_count) break;
985 
986  bool merged = false;
987  for (StackVersion i = initial_version_count; i < version; i++) {
988  if (ts_stack_merge(self->stack, i, version)) {
989  merged = true;
990  break;
991  }
992  }
993  if (merged) continue;
994 
996  bool has_shift_action = false;
997  array_clear(&self->reduce_actions);
998 
999  TSSymbol first_symbol, end_symbol;
1000  if (lookahead_symbol != 0) {
1001  first_symbol = lookahead_symbol;
1002  end_symbol = lookahead_symbol + 1;
1003  } else {
1004  first_symbol = 1;
1005  end_symbol = self->language->token_count;
1006  }
1007 
1008  for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1009  TableEntry entry;
1010  ts_language_table_entry(self->language, state, symbol, &entry);
1011  for (uint32_t i = 0; i < entry.action_count; i++) {
1012  TSParseAction action = entry.actions[i];
1013  switch (action.type) {
1016  if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1017  break;
1019  if (action.reduce.child_count > 0)
1021  .symbol = action.reduce.symbol,
1022  .count = action.reduce.child_count,
1023  .dynamic_precedence = action.reduce.dynamic_precedence,
1024  .production_id = action.reduce.production_id,
1025  });
1026  break;
1027  default:
1028  break;
1029  }
1030  }
1031  }
1032 
1033  StackVersion reduction_version = STACK_VERSION_NONE;
1034  for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
1035  ReduceAction action = self->reduce_actions.contents[i];
1036 
1037  reduction_version = ts_parser__reduce(
1038  self, version, action.symbol, action.count,
1039  action.dynamic_precedence, action.production_id,
1040  true, false
1041  );
1042  }
1043 
1044  if (has_shift_action) {
1045  can_shift_lookahead_symbol = true;
1046  } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1047  ts_stack_renumber_version(self->stack, reduction_version, version);
1048  continue;
1049  } else if (lookahead_symbol != 0) {
1051  }
1052 
1053  if (version == starting_version) {
1054  version = version_count;
1055  } else {
1056  version++;
1057  }
1058  }
1059 
1060  return can_shift_lookahead_symbol;
1061 }
static char * version
Definition: acr.h:4
#define array_clear(self)
Definition: array.h:35
static void ts_reduce_action_set_add(ReduceActionSet *self, ReduceAction new_action)
Definition: reduce_action.h:20
ReduceActionSet reduce_actions
Definition: parser.c:91
Definition: zipcmp.c:77

References test-lz4-speed::action, array_clear, i, MAX_VERSION_COUNT, STACK_VERSION_NONE, ts_language_table_entry(), ts_parser__reduce(), ts_reduce_action_set_add(), ts_stack_merge(), ts_stack_remove_version(), ts_stack_renumber_version(), ts_stack_state(), ts_stack_version_count(), TSParseActionTypeRecover, TSParseActionTypeReduce, TSParseActionTypeShift, and version.

Referenced by ts_parser__handle_error().

◆ ts_parser__get_cached_token()

static Subtree ts_parser__get_cached_token ( TSParser self,
TSStateId  state,
size_t  position,
Subtree  last_external_token,
TableEntry table_entry 
)
static

Definition at line 577 of file parser.c.

583  {
584  TokenCache *cache = &self->token_cache;
585  if (
586  cache->token.ptr && cache->byte_index == position &&
587  ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
588  ) {
589  ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
590  if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
591  ts_subtree_retain(cache->token);
592  return cache->token;
593  }
594  }
595  return NULL_SUBTREE;
596 }
Subtree last_external_token
Definition: parser.c:82
Subtree token
Definition: parser.c:81
uint32_t byte_index
Definition: parser.c:83
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other)
Definition: subtree.c:1027
static bool ts_parser__can_reuse_first_leaf(TSParser *self, TSStateId state, Subtree tree, TableEntry *table_entry)
Definition: parser.c:350

References TokenCache::byte_index, TokenCache::last_external_token, NULL_SUBTREE, Subtree::ptr, TokenCache::token, ts_language_table_entry(), ts_parser__can_reuse_first_leaf(), ts_subtree_external_scanner_state_eq(), ts_subtree_retain(), and ts_subtree_symbol().

Referenced by ts_parser__advance().

◆ ts_parser__handle_error()

static void ts_parser__handle_error ( TSParser self,
StackVersion  version,
Subtree  lookahead 
)
static

Definition at line 1290 of file parser.c.

1294  {
1295  uint32_t previous_version_count = ts_stack_version_count(self->stack);
1296 
1297  // Perform any reductions that can happen in this state, regardless of the lookahead. After
1298  // skipping one or more invalid tokens, the parser might find a token that would have allowed
1299  // a reduction to take place.
1301  uint32_t version_count = ts_stack_version_count(self->stack);
1302  Length position = ts_stack_position(self->stack, version);
1303 
1304  // Push a discontinuity onto the stack. Merge all of the stack versions that
1305  // were created in the previous step.
1306  bool did_insert_missing_token = false;
1307  for (StackVersion v = version; v < version_count;) {
1308  if (!did_insert_missing_token) {
1309  TSStateId state = ts_stack_state(self->stack, v);
1310  for (
1311  TSSymbol missing_symbol = 1;
1312  missing_symbol < self->language->token_count;
1313  missing_symbol++
1314  ) {
1315  TSStateId state_after_missing_symbol = ts_language_next_state(
1316  self->language, state, missing_symbol
1317  );
1318  if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1319  continue;
1320  }
1321 
1323  self->language,
1324  state_after_missing_symbol,
1325  ts_subtree_leaf_symbol(lookahead)
1326  )) {
1327  // In case the parser is currently outside of any included range, the lexer will
1328  // snap to the beginning of the next included range. The missing token's padding
1329  // must be assigned to position it within the next included range.
1330  ts_lexer_reset(&self->lexer, position);
1331  ts_lexer_mark_end(&self->lexer);
1332  Length padding = length_sub(self->lexer.token_end_position, position);
1333  uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1334 
1335  StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1336  Subtree missing_tree = ts_subtree_new_missing_leaf(
1337  &self->tree_pool, missing_symbol,
1338  padding, lookahead_bytes,
1339  self->language
1340  );
1341  ts_stack_push(
1342  self->stack, version_with_missing_tree,
1343  missing_tree, false,
1344  state_after_missing_symbol
1345  );
1346 
1348  self, version_with_missing_tree,
1349  ts_subtree_leaf_symbol(lookahead)
1350  )) {
1351  LOG(
1352  "recover_with_missing symbol:%s, state:%u",
1353  SYM_NAME(missing_symbol),
1354  ts_stack_state(self->stack, version_with_missing_tree)
1355  );
1356  did_insert_missing_token = true;
1357  break;
1358  }
1359  }
1360  }
1361  }
1362 
1363  ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1364  v = (v == version) ? previous_version_count : v + 1;
1365  }
1366 
1367  for (unsigned i = previous_version_count; i < version_count; i++) {
1368  bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1369  assert(did_merge);
1370  }
1371 
1373 
1374  // Begin recovery with the current lookahead node, rather than waiting for the
1375  // next turn of the parse loop. This ensures that the tree accounts for the the
1376  // current lookahead token's "lookahead bytes" value, which describes how far
1377  // the lexer needed to look ahead beyond the content of the token in order to
1378  // recognize it.
1379  if (ts_subtree_child_count(lookahead) > 0) {
1380  ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1381  }
1382  ts_parser__recover(self, version, lookahead);
1383 
1384  LOG_STACK();
1385 }
const char * v
Definition: dsignal.c:12
static bool ts_language_has_reduce_action(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:57
static Length length_sub(Length len1, Length len2)
Definition: length.h:32
void ts_lexer_reset(Lexer *self, Length position)
Definition: lexer.c:316
void ts_lexer_mark_end(Lexer *self)
Definition: lexer.c:363
Length token_end_position
Definition: lexer.h:17
Lexer lexer
Definition: parser.c:87
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth)
Definition: stack.c:596
StackVersion ts_stack_copy_version(Stack *self, StackVersion version)
Definition: stack.c:669
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, uint32_t lookahead_bytes, const TSLanguage *language)
Definition: subtree.c:558
static uint32_t ts_subtree_total_bytes(Subtree self)
Definition: subtree.h:278
static uint32_t ts_subtree_lookahead_bytes(Subtree self)
Definition: subtree.h:221
static const unsigned MAX_SUMMARY_DEPTH
Definition: parser.c:76
static bool ts_parser__do_all_potential_reductions(TSParser *self, StackVersion starting_version, TSSymbol lookahead_symbol)
Definition: parser.c:973

References assert(), ERROR_STATE, i, length_sub(), LOG, LOG_STACK, MAX_SUMMARY_DEPTH, NULL_SUBTREE, SYM_NAME, ts_language_has_reduce_action(), ts_language_next_state(), ts_lexer_mark_end(), ts_lexer_reset(), ts_parser__breakdown_lookahead(), ts_parser__do_all_potential_reductions(), ts_parser__recover(), ts_stack_copy_version(), ts_stack_merge(), ts_stack_position(), ts_stack_push(), ts_stack_record_summary(), ts_stack_state(), ts_stack_version_count(), ts_subtree_child_count(), ts_subtree_leaf_symbol(), ts_subtree_lookahead_bytes(), ts_subtree_new_missing_leaf(), ts_subtree_total_bytes(), v, and version.

Referenced by ts_parser__condense_stack().

◆ ts_parser__has_included_range_difference()

static bool ts_parser__has_included_range_difference ( const TSParser self,
uint32_t  start_position,
uint32_t  end_position 
)
static

Definition at line 614 of file parser.c.

618  {
622  start_position,
623  end_position
624  );
625 }
bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, uint32_t start_byte, uint32_t end_byte)
unsigned included_range_difference_index
Definition: parser.c:107
TSRangeArray included_range_differences
Definition: parser.c:106

References ts_range_array_intersects().

Referenced by ts_parser__reuse_node().

◆ ts_parser__lex()

static Subtree ts_parser__lex ( TSParser self,
StackVersion  version,
TSStateId  parse_state 
)
static

Definition at line 385 of file parser.c.

389  {
390  TSLexMode lex_mode = self->language->lex_modes[parse_state];
391  if (lex_mode.lex_state == (uint16_t)-1) {
392  LOG("no_lookahead_after_non_terminal_extra");
393  return NULL_SUBTREE;
394  }
395 
396  Length start_position = ts_stack_position(self->stack, version);
397  Subtree external_token = ts_stack_last_external_token(self->stack, version);
398  const bool *valid_external_tokens = ts_language_enabled_external_tokens(
399  self->language,
400  lex_mode.external_lex_state
401  );
402 
403  bool found_external_token = false;
404  bool error_mode = parse_state == ERROR_STATE;
405  bool skipped_error = false;
406  bool called_get_column = false;
407  int32_t first_error_character = 0;
408  Length error_start_position = length_zero();
409  Length error_end_position = length_zero();
410  uint32_t lookahead_end_byte = 0;
411  ts_lexer_reset(&self->lexer, start_position);
412 
413  for (;;) {
414  Length current_position = self->lexer.current_position;
415 
416  if (valid_external_tokens) {
417  LOG(
418  "lex_external state:%d, row:%u, column:%u",
419  lex_mode.external_lex_state,
420  current_position.extent.row,
421  current_position.extent.column
422  );
423  ts_lexer_start(&self->lexer);
424  ts_parser__restore_external_scanner(self, external_token);
425  bool found_token = self->language->external_scanner.scan(
427  &self->lexer.data,
428  valid_external_tokens
429  );
430  ts_lexer_finish(&self->lexer, &lookahead_end_byte);
431 
432  // Zero-length external tokens are generally allowed, but they're not
433  // allowed right after a syntax error. This is for two reasons:
434  // 1. After a syntax error, the lexer is looking for any possible token,
435  // as opposed to the specific set of tokens that are valid in some
436  // parse state. In this situation, it's very easy for an external
437  // scanner to produce unwanted zero-length tokens.
438  // 2. The parser sometimes inserts *missing* tokens to recover from
439  // errors. These tokens are also zero-length. If we allow more
440  // zero-length tokens to be created after missing tokens, it
441  // can lead to infinite loops. Forbidding zero-length tokens
442  // right at the point of error recovery is a conservative strategy
443  // for preventing this kind of infinite loop.
444  if (found_token && (
445  self->lexer.token_end_position.bytes > current_position.bytes ||
446  (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
447  )) {
448  found_external_token = true;
449  called_get_column = self->lexer.did_get_column;
450  break;
451  }
452 
453  ts_lexer_reset(&self->lexer, current_position);
454  }
455 
456  LOG(
457  "lex_internal state:%d, row:%u, column:%u",
458  lex_mode.lex_state,
459  current_position.extent.row,
460  current_position.extent.column
461  );
462  ts_lexer_start(&self->lexer);
463  bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
464  ts_lexer_finish(&self->lexer, &lookahead_end_byte);
465  if (found_token) break;
466 
467  if (!error_mode) {
468  error_mode = true;
469  lex_mode = self->language->lex_modes[ERROR_STATE];
470  valid_external_tokens = ts_language_enabled_external_tokens(
471  self->language,
472  lex_mode.external_lex_state
473  );
474  ts_lexer_reset(&self->lexer, start_position);
475  continue;
476  }
477 
478  if (!skipped_error) {
479  LOG("skip_unrecognized_character");
480  skipped_error = true;
481  error_start_position = self->lexer.token_start_position;
482  error_end_position = self->lexer.token_start_position;
483  first_error_character = self->lexer.data.lookahead;
484  }
485 
486  if (self->lexer.current_position.bytes == error_end_position.bytes) {
487  if (self->lexer.data.eof(&self->lexer.data)) {
488  self->lexer.data.result_symbol = ts_builtin_sym_error;
489  break;
490  }
491  self->lexer.data.advance(&self->lexer.data, false);
492  }
493 
494  error_end_position = self->lexer.current_position;
495  }
496 
497  Subtree result;
498  if (skipped_error) {
499  Length padding = length_sub(error_start_position, start_position);
500  Length size = length_sub(error_end_position, error_start_position);
501  uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
502  result = ts_subtree_new_error(
503  &self->tree_pool,
504  first_error_character,
505  padding,
506  size,
507  lookahead_bytes,
508  parse_state,
509  self->language
510  );
511 
513  SYM_NAME(ts_subtree_symbol(result)),
515  );
516  } else {
518  self->lexer.token_start_position = self->lexer.token_end_position;
519  }
520 
521  bool is_keyword = false;
522  TSSymbol symbol = self->lexer.data.result_symbol;
523  Length padding = length_sub(self->lexer.token_start_position, start_position);
525  uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
526 
527  if (found_external_token) {
528  symbol = self->language->external_scanner.symbol_map[symbol];
529  } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
530  uint32_t end_byte = self->lexer.token_end_position.bytes;
532  ts_lexer_start(&self->lexer);
533  if (
534  self->language->keyword_lex_fn(&self->lexer.data, 0) &&
535  self->lexer.token_end_position.bytes == end_byte &&
536  ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
537  ) {
538  is_keyword = true;
539  symbol = self->lexer.data.result_symbol;
540  }
541  }
542 
543  result = ts_subtree_new_leaf(
544  &self->tree_pool,
545  symbol,
546  padding,
547  size,
548  lookahead_bytes,
549  parse_state,
550  found_external_token,
551  called_get_column,
552  is_keyword,
553  self->language
554  );
555 
556  if (found_external_token) {
557  unsigned length = self->language->external_scanner.serialize(
559  self->lexer.debug_buffer
560  );
562  &((SubtreeHeapData *)result.ptr)->external_scanner_state,
563  self->lexer.debug_buffer,
564  length
565  );
566  }
567 
569  SYM_NAME(ts_subtree_symbol(result)),
571  );
572  }
573 
574  return result;
575 }
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
static bool ts_language_has_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol)
Definition: language.h:96
static const bool * ts_language_enabled_external_tokens(const TSLanguage *self, unsigned external_scanner_state)
Definition: language.h:216
static Length length_zero(void)
Definition: length.h:39
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte)
Definition: lexer.c:337
void ts_lexer_start(Lexer *self)
Definition: lexer.c:322
#define ts_builtin_sym_error
Definition: parser.h:12
int int32_t
Definition: sftypes.h:33
TSPoint extent
Definition: length.h:11
TSLexer data
Definition: lexer.h:14
Length token_start_position
Definition: lexer.h:16
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]
Definition: lexer.h:31
Length current_position
Definition: lexer.h:15
bool(* keyword_lex_fn)(TSLexer *, TSStateId)
Definition: parser.h:115
bool(* eof)(const TSLexer *)
Definition: parser.h:50
TSSymbol result_symbol
Definition: parser.h:45
void * external_scanner_payload
Definition: parser.c:98
uint32_t row
Definition: api.h:56
uint32_t column
Definition: api.h:57
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version)
Definition: stack.c:619
Subtree ts_subtree_new_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, Length size, uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language)
Definition: subtree.c:167
Subtree ts_subtree_new_error(SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language)
Definition: subtree.c:244
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length)
Definition: subtree.c:28
static Length ts_subtree_total_size(Subtree self)
Definition: subtree.h:274
#define LOG_LOOKAHEAD(symbol_name, size)
Definition: parser.c:28
static void ts_parser__restore_external_scanner(TSParser *self, Subtree external_token)
Definition: parser.c:335

References bytes, Length::bytes, TSPoint::column, ERROR_STATE, Length::extent, TSLexMode::external_lex_state, length, length_sub(), length_zero(), TSLexMode::lex_state, LOG, LOG_LOOKAHEAD, NULL_SUBTREE, Subtree::ptr, TSPoint::row, SYM_NAME, ts_builtin_sym_error, ts_external_scanner_state_init(), ts_language_enabled_external_tokens(), ts_language_has_actions(), ts_lexer_finish(), ts_lexer_reset(), ts_lexer_start(), ts_parser__restore_external_scanner(), ts_stack_has_advanced_since_error(), ts_stack_last_external_token(), ts_stack_position(), ts_subtree_new_error(), ts_subtree_new_leaf(), ts_subtree_symbol(), and ts_subtree_total_size().

Referenced by ts_parser__advance().

◆ ts_parser__log()

static void ts_parser__log ( TSParser self)
static

Definition at line 151 of file parser.c.

151  {
152  if (self->lexer.logger.log) {
153  self->lexer.logger.log(
154  self->lexer.logger.payload,
156  self->lexer.debug_buffer
157  );
158  }
159 
160  if (self->dot_graph_file) {
161  fprintf(self->dot_graph_file, "graph {\nlabel=\"");
162  for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) {
163  if (*c == '"') fputc('\\', self->dot_graph_file);
164  fputc(*c, self->dot_graph_file);
165  }
166  fprintf(self->dot_graph_file, "\"\n}\n\n");
167  }
168 }
@ TSLogTypeParse
Definition: api.h:74
#define c(i)
Definition: sha256.c:43
TSLogger logger
Definition: lexer.h:22
void * payload
Definition: api.h:79
void(* log)(void *payload, TSLogType, const char *)
Definition: api.h:80
FILE * dot_graph_file
Definition: parser.c:99

References c, and TSLogTypeParse.

◆ ts_parser__recover()

static void ts_parser__recover ( TSParser self,
StackVersion  version,
Subtree  lookahead 
)
static

Definition at line 1122 of file parser.c.

1126  {
1127  bool did_recover = false;
1128  unsigned previous_version_count = ts_stack_version_count(self->stack);
1129  Length position = ts_stack_position(self->stack, version);
1130  StackSummary *summary = ts_stack_get_summary(self->stack, version);
1131  unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1132  unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1133 
1134  // When the parser is in the error state, there are two strategies for recovering with a
1135  // given lookahead token:
1136  // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1137  // create a new stack version that is in that state again. This entails popping all of the
1138  // subtrees that have been pushed onto the stack since that previous state, and wrapping
1139  // them in an ERROR node.
1140  // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1141  // move on to the next lookahead token, remaining in the error state.
1142  //
1143  // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1144  // of the previous parse states and their depths. Look at each state in the summary, to see
1145  // if the current lookahead token would be valid in that state.
1146  if (summary && !ts_subtree_is_error(lookahead)) {
1147  for (unsigned i = 0; i < summary->size; i++) {
1148  StackSummaryEntry entry = summary->contents[i];
1149 
1150  if (entry.state == ERROR_STATE) continue;
1151  if (entry.position.bytes == position.bytes) continue;
1152  unsigned depth = entry.depth;
1153  if (node_count_since_error > 0) depth++;
1154 
1155  // Do not recover in ways that create redundant stack versions.
1156  bool would_merge = false;
1157  for (unsigned j = 0; j < previous_version_count; j++) {
1158  if (
1159  ts_stack_state(self->stack, j) == entry.state &&
1160  ts_stack_position(self->stack, j).bytes == position.bytes
1161  ) {
1162  would_merge = true;
1163  break;
1164  }
1165  }
1166  if (would_merge) continue;
1167 
1168  // Do not recover if the result would clearly be worse than some existing stack version.
1169  unsigned new_cost =
1170  current_error_cost +
1172  (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1173  (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1174  if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1175 
1176  // If the current lookahead token is valid in some previous state, recover to that state.
1177  // Then stop looking for further recoveries.
1178  if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1179  if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1180  did_recover = true;
1181  LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1182  LOG_STACK();
1183  break;
1184  }
1185  }
1186  }
1187  }
1188 
1189  // In the process of attempting to recover, some stack versions may have been created
1190  // and subsequently halted. Remove those versions.
1191  for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1192  if (!ts_stack_is_active(self->stack, i)) {
1193  ts_stack_remove_version(self->stack, i--);
1194  }
1195  }
1196 
1197  // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1198  // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1199  // current lookahead token by wrapping it in an ERROR node.
1200 
1201  // Don't pursue this additional strategy if there are already too many stack versions.
1202  if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1203  ts_stack_halt(self->stack, version);
1204  ts_subtree_release(&self->tree_pool, lookahead);
1205  return;
1206  }
1207 
1208  // If the parser is still in the error state at the end of the file, just wrap everything
1209  // in an ERROR node and terminate.
1210  if (ts_subtree_is_eof(lookahead)) {
1211  LOG("recover_eof");
1212  SubtreeArray children = array_new();
1213  Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1214  ts_stack_push(self->stack, version, parent, false, 1);
1215  ts_parser__accept(self, version, lookahead);
1216  return;
1217  }
1218 
1219  // Do not recover if the result would clearly be worse than some existing stack version.
1220  unsigned new_cost =
1221  current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1224  if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1225  ts_stack_halt(self->stack, version);
1226  ts_subtree_release(&self->tree_pool, lookahead);
1227  return;
1228  }
1229 
1230  // If the current lookahead token is an extra token, mark it as extra. This means it won't
1231  // be counted in error cost calculations.
1232  unsigned n;
1233  const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1234  if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1235  MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1236  ts_subtree_set_extra(&mutable_lookahead, true);
1237  lookahead = ts_subtree_from_mut(mutable_lookahead);
1238  }
1239 
1240  // Wrap the lookahead token in an ERROR.
1241  LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1242  SubtreeArray children = array_new();
1243  array_reserve(&children, 1);
1244  array_push(&children, lookahead);
1245  MutableSubtree error_repeat = ts_subtree_new_node(
1247  &children,
1248  0,
1249  self->language
1250  );
1251 
1252  // If other tokens have already been skipped, so there is already an ERROR at the top of the
1253  // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1254  // ERROR.
1255  if (node_count_since_error > 0) {
1256  StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1257 
1258  // TODO: Figure out how to make this condition occur.
1259  // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1260  // If multiple stack versions have merged at this point, just pick one of the errors
1261  // arbitrarily and discard the rest.
1262  if (pop.size > 1) {
1263  for (unsigned i = 1; i < pop.size; i++) {
1264  ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
1265  }
1266  while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
1267  ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
1268  }
1269  }
1270 
1271  ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
1272  array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
1273  error_repeat = ts_subtree_new_node(
1275  &pop.contents[0].subtrees,
1276  0,
1277  self->language
1278  );
1279  }
1280 
1281  // Push the new ERROR onto the stack.
1282  ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1283  if (ts_subtree_has_external_tokens(lookahead)) {
1285  self->stack, version, ts_subtree_last_external_token(lookahead)
1286  );
1287  }
1288 }
static RZ_NULLABLE RzILOpBitVector * shift(RzILOpBitVector *val, RZ_NULLABLE RzILOpBool **carry_out, arm_shifter type, RZ_OWN RzILOpBitVector *dist)
Definition: arm_il32.c:190
#define array_new()
Definition: array.h:25
#define array_push(self, element)
Definition: array.h:43
#define array_reserve(self, new_capacity)
Definition: array.h:37
#define ERROR_COST_PER_SKIPPED_TREE
Definition: error_costs.h:7
#define ERROR_COST_PER_SKIPPED_LINE
Definition: error_costs.h:8
#define ERROR_COST_PER_SKIPPED_CHAR
Definition: error_costs.h:9
static const TSParseAction * ts_language_actions(const TSLanguage *self, TSStateId state, TSSymbol symbol, uint32_t *count)
Definition: language.h:45
#define ts_builtin_sym_error_repeat
Definition: language.h:11
int type
Definition: mipsasm.c:17
StackSummary * ts_stack_get_summary(Stack *self, StackVersion version)
Definition: stack.c:611
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count)
Definition: stack.c:507
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token)
Definition: stack.c:459
Subtree ts_subtree_new_error_node(SubtreeArray *children, bool extra, const TSLanguage *language)
Definition: subtree.c:542
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self)
Definition: subtree.c:90
Subtree ts_subtree_last_external_token(Subtree tree)
Definition: subtree.c:802
static bool ts_subtree_has_external_tokens(Subtree self)
Definition: subtree.h:330
static void ts_subtree_set_extra(MutableSubtree *self, bool is_extra)
Definition: subtree.h:236
static bool ts_parser__recover_to_state(TSParser *self, StackVersion version, unsigned depth, TSStateId goal_state)
Definition: parser.c:1063
static bool ts_parser__better_version_exists(TSParser *self, StackVersion version, bool is_in_error, unsigned cost)
Definition: parser.c:298
int summary
Definition: zipcmp.c:234

References array_new, array_push, array_reserve, Length::bytes, ERROR_COST_PER_SKIPPED_CHAR, ERROR_COST_PER_SKIPPED_LINE, ERROR_COST_PER_SKIPPED_TREE, ERROR_STATE, Length::extent, i, LOG, LOG_STACK, MAX_VERSION_COUNT, n, TSPoint::row, shift(), summary, TREE_NAME, ts_builtin_sym_error_repeat, ts_language_actions(), ts_language_has_actions(), ts_parser__accept(), ts_parser__better_version_exists(), ts_parser__recover_to_state(), ts_stack_error_cost(), ts_stack_get_summary(), ts_stack_halt(), ts_stack_is_active(), ts_stack_node_count_since_error(), ts_stack_pop_count(), ts_stack_position(), ts_stack_push(), ts_stack_remove_version(), ts_stack_renumber_version(), ts_stack_set_last_external_token(), ts_stack_state(), ts_stack_version_count(), ts_subtree_array_delete(), ts_subtree_from_mut(), ts_subtree_has_external_tokens(), ts_subtree_is_eof(), ts_subtree_is_error(), ts_subtree_last_external_token(), ts_subtree_make_mut(), ts_subtree_new_error_node(), ts_subtree_new_node(), ts_subtree_release(), ts_subtree_set_extra(), ts_subtree_symbol(), ts_subtree_total_bytes(), ts_subtree_total_size(), TSParseActionTypeShift, and type.

Referenced by ts_parser__advance(), and ts_parser__handle_error().

◆ ts_parser__recover_to_state()

static bool ts_parser__recover_to_state ( TSParser self,
StackVersion  version,
unsigned  depth,
TSStateId  goal_state 
)
static

Definition at line 1063 of file parser.c.

1068  {
1069  StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1070  StackVersion previous_version = STACK_VERSION_NONE;
1071 
1072  for (unsigned i = 0; i < pop.size; i++) {
1073  StackSlice slice = pop.contents[i];
1074 
1075  if (slice.version == previous_version) {
1076  ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1077  array_erase(&pop, i--);
1078  continue;
1079  }
1080 
1081  if (ts_stack_state(self->stack, slice.version) != goal_state) {
1082  ts_stack_halt(self->stack, slice.version);
1083  ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1084  array_erase(&pop, i--);
1085  continue;
1086  }
1087 
1088  SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1089  if (error_trees.size > 0) {
1090  assert(error_trees.size == 1);
1091  Subtree error_tree = error_trees.contents[0];
1092  uint32_t error_child_count = ts_subtree_child_count(error_tree);
1093  if (error_child_count > 0) {
1094  array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1095  for (unsigned j = 0; j < error_child_count; j++) {
1096  ts_subtree_retain(slice.subtrees.contents[j]);
1097  }
1098  }
1099  ts_subtree_array_delete(&self->tree_pool, &error_trees);
1100  }
1101 
1103 
1104  if (slice.subtrees.size > 0) {
1105  Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1106  ts_stack_push(self->stack, slice.version, error, false, goal_state);
1107  } else {
1108  array_delete(&slice.subtrees);
1109  }
1110 
1111  for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1112  Subtree tree = self->trailing_extras.contents[j];
1113  ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1114  }
1115 
1116  previous_version = slice.version;
1117  }
1118 
1119  return previous_version != STACK_VERSION_NONE;
1120 }
#define array_erase(self, index)
Definition: array.h:79
SubtreeArray trailing_extras
Definition: parser.c:93
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version)
Definition: stack.c:547
void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination)
Definition: subtree.c:95
void error(const char *msg)
Definition: untgz.c:593

References array_delete, array_erase, array_splice, assert(), error(), i, STACK_VERSION_NONE, StackSlice::subtrees, ts_stack_halt(), ts_stack_pop_count(), ts_stack_pop_error(), ts_stack_push(), ts_stack_state(), ts_subtree_array_delete(), ts_subtree_array_remove_trailing_extras(), ts_subtree_child_count(), ts_subtree_children, ts_subtree_new_error_node(), ts_subtree_retain(), and StackSlice::version.

Referenced by ts_parser__recover().

◆ ts_parser__reduce()

static StackVersion ts_parser__reduce ( TSParser self,
StackVersion  version,
TSSymbol  symbol,
uint32_t  count,
int  dynamic_precedence,
uint16_t  production_id,
bool  is_fragile,
bool  end_of_non_terminal_extra 
)
static

Definition at line 805 of file parser.c.

814  {
815  uint32_t initial_version_count = ts_stack_version_count(self->stack);
816 
817  // Pop the given number of nodes from the given version of the parse stack.
818  // If stack versions have previously merged, then there may be more than one
819  // path back through the stack. For each path, create a new parent node to
820  // contain the popped children, and push it onto the stack in place of the
821  // children.
822  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
823  uint32_t removed_version_count = 0;
824  for (uint32_t i = 0; i < pop.size; i++) {
825  StackSlice slice = pop.contents[i];
826  StackVersion slice_version = slice.version - removed_version_count;
827 
828  // This is where new versions are added to the parse stack. The versions
829  // will all be sorted and truncated at the end of the outer parsing loop.
830  // Allow the maximum version count to be temporarily exceeded, but only
831  // by a limited threshold.
832  if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
833  ts_stack_remove_version(self->stack, slice_version);
835  removed_version_count++;
836  while (i + 1 < pop.size) {
837  StackSlice next_slice = pop.contents[i + 1];
838  if (next_slice.version != slice.version) break;
839  ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
840  i++;
841  }
842  continue;
843  }
844 
845  // Extra tokens on top of the stack should not be included in this new parent
846  // node. They will be re-pushed onto the stack after the parent node is
847  // created and pushed.
848  SubtreeArray children = slice.subtrees;
850 
852  symbol, &children, production_id, self->language
853  );
854 
855  // This pop operation may have caused multiple stack versions to collapse
856  // into one, because they all diverged from a common state. In that case,
857  // choose one of the arrays of trees to be the parent node's children, and
858  // delete the rest of the tree arrays.
859  while (i + 1 < pop.size) {
860  StackSlice next_slice = pop.contents[i + 1];
861  if (next_slice.version != slice.version) break;
862  i++;
863 
864  SubtreeArray children = next_slice.subtrees;
866 
868  self,
869  ts_subtree_from_mut(parent),
870  &children
871  )) {
875  parent = ts_subtree_new_node(
876  symbol, &children, production_id, self->language
877  );
878  } else {
880  ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
881  }
882  }
883 
884  TSStateId state = ts_stack_state(self->stack, slice_version);
885  TSStateId next_state = ts_language_next_state(self->language, state, symbol);
886  if (end_of_non_terminal_extra && next_state == state) {
887  parent.ptr->extra = true;
888  }
889  if (is_fragile || pop.size > 1 || initial_version_count > 1) {
890  parent.ptr->fragile_left = true;
891  parent.ptr->fragile_right = true;
893  } else {
894  parent.ptr->parse_state = state;
895  }
896  parent.ptr->dynamic_precedence += dynamic_precedence;
897 
898  // Push the parent node onto the stack, along with any extra tokens that
899  // were previously on top of the stack.
900  ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
901  for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
902  ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
903  }
904 
905  for (StackVersion j = 0; j < slice_version; j++) {
906  if (j == version) continue;
907  if (ts_stack_merge(self->stack, j, slice_version)) {
908  removed_version_count++;
909  break;
910  }
911  }
912  }
913 
914  // Return the first new stack version that was created.
915  return ts_stack_version_count(self->stack) > initial_version_count
916  ? initial_version_count
918 }
#define array_swap(self, other)
Definition: array.h:87
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
int32_t dynamic_precedence
Definition: subtree.h:139
bool fragile_right
Definition: subtree.h:125
bool fragile_left
Definition: subtree.h:124
TSStateId parse_state
Definition: subtree.h:119
SubtreeArray trailing_extras2
Definition: parser.c:94
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self)
Definition: subtree.c:83
#define TS_TREE_STATE_NONE
Definition: subtree.h:18
static const unsigned MAX_VERSION_COUNT_OVERFLOW
Definition: parser.c:75
static bool ts_parser__select_children(TSParser *self, Subtree left, const SubtreeArray *children)
Definition: parser.c:757
SubtreeHeapData * ptr
Definition: subtree.h:164

References array_clear, array_swap, count, SubtreeHeapData::dynamic_precedence, SubtreeHeapData::extra, SubtreeHeapData::fragile_left, SubtreeHeapData::fragile_right, i, MAX_VERSION_COUNT, MAX_VERSION_COUNT_OVERFLOW, SubtreeHeapData::parse_state, MutableSubtree::ptr, STACK_VERSION_NONE, StackSlice::subtrees, ts_language_next_state(), ts_parser__select_children(), ts_stack_merge(), ts_stack_pop_count(), ts_stack_push(), ts_stack_remove_version(), ts_stack_state(), ts_stack_version_count(), ts_subtree_array_clear(), ts_subtree_array_delete(), ts_subtree_array_remove_trailing_extras(), ts_subtree_from_mut(), ts_subtree_new_node(), ts_subtree_release(), TS_TREE_STATE_NONE, and StackSlice::version.

Referenced by ts_parser__advance(), and ts_parser__do_all_potential_reductions().

◆ ts_parser__restore_external_scanner()

static void ts_parser__restore_external_scanner ( TSParser self,
Subtree  external_token 
)
static

Definition at line 335 of file parser.c.

338  {
339  if (external_token.ptr) {
340  self->language->external_scanner.deserialize(
343  external_token.ptr->external_scanner_state.length
344  );
345  } else {
346  self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
347  }
348 }
uint32_t length
Definition: subtree.h:36
ExternalScannerState external_scanner_state
Definition: subtree.h:148
const char * ts_external_scanner_state_data(const ExternalScannerState *self)
Definition: subtree.c:53

References SubtreeHeapData::external_scanner_state, ExternalScannerState::length, NULL, Subtree::ptr, and ts_external_scanner_state_data().

Referenced by ts_parser__lex().

◆ ts_parser__reuse_node()

static Subtree ts_parser__reuse_node ( TSParser self,
StackVersion  version,
TSStateId state,
uint32_t  position,
Subtree  last_external_token,
TableEntry table_entry 
)
static

Definition at line 627 of file parser.c.

634  {
635  Subtree result;
636  while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
637  uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
638  uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
639 
640  // Do not reuse an EOF node if the included ranges array has changes
641  // later on in the file.
642  if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
643 
644  if (byte_offset > position) {
645  LOG("before_reusable_node symbol:%s", TREE_NAME(result));
646  break;
647  }
648 
649  if (byte_offset < position) {
650  LOG("past_reusable_node symbol:%s", TREE_NAME(result));
651  if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
653  }
654  continue;
655  }
656 
658  LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
660  continue;
661  }
662 
663  const char *reason = NULL;
664  if (ts_subtree_has_changes(result)) {
665  reason = "has_changes";
666  } else if (ts_subtree_is_error(result)) {
667  reason = "is_error";
668  } else if (ts_subtree_missing(result)) {
669  reason = "is_missing";
670  } else if (ts_subtree_is_fragile(result)) {
671  reason = "is_fragile";
672  } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
673  reason = "contains_different_included_range";
674  }
675 
676  if (reason) {
677  LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
678  if (!reusable_node_descend(&self->reusable_node)) {
681  *state = ts_stack_state(self->stack, version);
682  }
683  continue;
684  }
685 
686  TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
687  ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
688  if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
689  LOG(
690  "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
691  TREE_NAME(result),
692  SYM_NAME(leaf_symbol)
693  );
695  break;
696  }
697 
698  LOG("reuse_node symbol:%s", TREE_NAME(result));
699  ts_subtree_retain(result);
700  return result;
701  }
702 
703  return NULL_SUBTREE;
704 }
static uint32_t reusable_node_byte_offset(ReusableNode *self)
Definition: reusable_node.h:29
static void reusable_node_advance_past_leaf(ReusableNode *self)
Definition: reusable_node.h:76
#define UINT32_MAX
Subtree last_external_token
Definition: reusable_node.h:11
static bool ts_subtree_has_changes(Subtree self)
Definition: subtree.h:217
static bool ts_subtree_missing(Subtree self)
Definition: subtree.h:218
static bool ts_subtree_is_fragile(Subtree self)
Definition: subtree.h:338
static bool ts_parser__has_included_range_difference(const TSParser *self, uint32_t start_position, uint32_t end_position)
Definition: parser.c:614

References LOG, NULL, NULL_SUBTREE, reusable_node_advance(), reusable_node_advance_past_leaf(), reusable_node_byte_offset(), reusable_node_descend(), reusable_node_tree(), SYM_NAME, TREE_NAME, ts_language_table_entry(), ts_parser__breakdown_top_of_stack(), ts_parser__can_reuse_first_leaf(), ts_parser__has_included_range_difference(), ts_stack_state(), ts_subtree_external_scanner_state_eq(), ts_subtree_has_changes(), ts_subtree_is_eof(), ts_subtree_is_error(), ts_subtree_is_fragile(), ts_subtree_leaf_symbol(), ts_subtree_missing(), ts_subtree_retain(), ts_subtree_total_bytes(), and UINT32_MAX.

Referenced by ts_parser__advance().

◆ ts_parser__select_children()

static bool ts_parser__select_children ( TSParser self,
Subtree  left,
const SubtreeArray *  children 
)
static

Definition at line 757 of file parser.c.

761  {
762  array_assign(&self->scratch_trees, children);
763 
764  // Create a temporary subtree using the scratch trees array. This node does
765  // not perform any allocation except for possibly growing the array to make
766  // room for its own heap data. The scratch tree is never explicitly released,
767  // so the same 'scratch trees' array can be reused again later.
768  MutableSubtree scratch_tree = ts_subtree_new_node(
769  ts_subtree_symbol(left),
770  &self->scratch_trees,
771  0,
772  self->language
773  );
774 
775  return ts_parser__select_tree(
776  self,
777  left,
778  ts_subtree_from_mut(scratch_tree)
779  );
780 }
#define array_assign(self, other)
Definition: array.h:84
SubtreeArray scratch_trees
Definition: parser.c:95

References array_assign, ts_parser__select_tree(), ts_subtree_from_mut(), ts_subtree_new_node(), and ts_subtree_symbol().

Referenced by ts_parser__reduce().

◆ ts_parser__select_tree()

static bool ts_parser__select_tree ( TSParser self,
Subtree  left,
Subtree  right 
)
static

Definition at line 710 of file parser.c.

710  {
711  if (!left.ptr) return true;
712  if (!right.ptr) return false;
713 
714  if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
715  LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
716  return true;
717  }
718 
719  if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
720  LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
721  return false;
722  }
723 
725  LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
726  TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
728  return true;
729  }
730 
732  LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
735  return false;
736  }
737 
738  if (ts_subtree_error_cost(left) > 0) return true;
739 
740  int comparison = ts_subtree_compare(left, right);
741  switch (comparison) {
742  case -1:
743  LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
744  return false;
745  break;
746  case 1:
747  LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
748  return true;
749  default:
750  LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
751  return false;
752  }
753 }
int ts_subtree_compare(Subtree left, Subtree right)
Definition: subtree.c:615
static int32_t ts_subtree_dynamic_precedence(Subtree self)
Definition: subtree.h:310

References LOG, Subtree::ptr, TREE_NAME, ts_subtree_compare(), ts_subtree_dynamic_precedence(), and ts_subtree_error_cost().

Referenced by ts_parser__accept(), and ts_parser__select_children().

◆ ts_parser__set_cached_token()

static void ts_parser__set_cached_token ( TSParser self,
size_t  byte_index,
Subtree  last_external_token,
Subtree  token 
)
static

Definition at line 598 of file parser.c.

603  {
604  TokenCache *cache = &self->token_cache;
605  if (token.ptr) ts_subtree_retain(token);
606  if (last_external_token.ptr) ts_subtree_retain(last_external_token);
607  if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
609  cache->token = token;
610  cache->byte_index = byte_index;
611  cache->last_external_token = last_external_token;
612 }

References TokenCache::byte_index, TokenCache::last_external_token, Subtree::ptr, TokenCache::token, ts_subtree_release(), and ts_subtree_retain().

Referenced by ts_parser__advance(), ts_parser_delete(), ts_parser_new(), and ts_parser_reset().

◆ ts_parser__shift()

static void ts_parser__shift ( TSParser self,
StackVersion  version,
TSStateId  state,
Subtree  lookahead,
bool  extra 
)
static

Definition at line 782 of file parser.c.

788  {
789  bool is_leaf = ts_subtree_child_count(lookahead) == 0;
790  Subtree subtree_to_push = lookahead;
791  if (extra != ts_subtree_extra(lookahead) && is_leaf) {
792  MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
793  ts_subtree_set_extra(&result, extra);
794  subtree_to_push = ts_subtree_from_mut(result);
795  }
796 
797  ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
798  if (ts_subtree_has_external_tokens(subtree_to_push)) {
800  self->stack, version, ts_subtree_last_external_token(subtree_to_push)
801  );
802  }
803 }

References ts_stack_push(), ts_stack_set_last_external_token(), ts_subtree_child_count(), ts_subtree_extra(), ts_subtree_from_mut(), ts_subtree_has_external_tokens(), ts_subtree_last_external_token(), ts_subtree_make_mut(), and ts_subtree_set_extra().

Referenced by ts_parser__advance().

◆ ts_parser__version_status()

static ErrorStatus ts_parser__version_status ( TSParser self,
StackVersion  version 
)
static

Definition at line 283 of file parser.c.

286  {
287  unsigned cost = ts_stack_error_cost(self->stack, version);
288  bool is_paused = ts_stack_is_paused(self->stack, version);
289  if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
290  return (ErrorStatus) {
291  .cost = cost,
292  .node_count = ts_stack_node_count_since_error(self->stack, version),
293  .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
294  .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
295  };
296 }

References ERROR_COST_PER_SKIPPED_TREE, ERROR_STATE, ts_stack_dynamic_precedence(), ts_stack_error_cost(), ts_stack_is_paused(), ts_stack_node_count_since_error(), and ts_stack_state().

Referenced by ts_parser__better_version_exists(), and ts_parser__condense_stack().

◆ ts_parser_cancellation_flag()

const size_t* ts_parser_cancellation_flag ( const TSParser self)

Get the parser's current cancellation flag pointer.

Definition at line 1795 of file parser.c.

1795  {
1796  return (const size_t *)self->cancellation_flag;
1797 }

◆ ts_parser_delete()

void ts_parser_delete ( TSParser parser)

Delete the parser, freeing all of the memory that it used.

Definition at line 1725 of file parser.c.

1725  {
1726  if (!self) return;
1727 
1729  ts_stack_delete(self->stack);
1730  if (self->reduce_actions.contents) {
1731  array_delete(&self->reduce_actions);
1732  }
1733  if (self->included_range_differences.contents) {
1735  }
1736  if (self->old_tree.ptr) {
1737  ts_subtree_release(&self->tree_pool, self->old_tree);
1738  self->old_tree = NULL_SUBTREE;
1739  }
1740  ts_lexer_delete(&self->lexer);
1744  array_delete(&self->trailing_extras);
1746  array_delete(&self->scratch_trees);
1747  ts_free(self);
1748 }
#define ts_free
Definition: alloc.h:30
void ts_lexer_delete(Lexer *self)
Definition: lexer.c:304
static void reusable_node_delete(ReusableNode *self)
Definition: reusable_node.h:35
Subtree old_tree
Definition: parser.c:105
void ts_stack_delete(Stack *self)
Definition: stack.c:424
void ts_subtree_pool_delete(SubtreePool *self)
Definition: subtree.c:129
bool ts_parser_set_language(TSParser *self, const TSLanguage *language)
Definition: parser.c:1754

References array_delete, NULL, NULL_SUBTREE, reusable_node_delete(), ts_free, ts_lexer_delete(), ts_parser__set_cached_token(), ts_parser_set_language(), ts_stack_delete(), ts_subtree_pool_delete(), and ts_subtree_release().

Referenced by core_cmd_tsrzcmd(), guess_data_free(), guess_next_autocmplt_token(), rz_type_parse_string_declaration_single(), rz_type_parse_string_single(), and type_parse_string().

◆ ts_parser_has_outstanding_parse()

static bool ts_parser_has_outstanding_parse ( TSParser self)
static

Definition at line 1695 of file parser.c.

1695  {
1696  return (
1697  ts_stack_state(self->stack, 0) != 1 ||
1698  ts_stack_node_count_since_error(self->stack, 0) != 0
1699  );
1700 }

References ts_stack_node_count_since_error(), and ts_stack_state().

Referenced by ts_parser_parse().

◆ ts_parser_included_ranges()

const TSRange* ts_parser_included_ranges ( const TSParser self,
uint32_t length 
)

Get the ranges of text that the parser will include when parsing.

The returned pointer is owned by the parser. The caller should not free it or write to it. The length of the array will be written to the given length pointer.

Definition at line 1819 of file parser.c.

1819  {
1820  return ts_lexer_included_ranges(&self->lexer, count);
1821 }
TSRange * ts_lexer_included_ranges(const Lexer *self, uint32_t *count)
Definition: lexer.c:395

References count, and ts_lexer_included_ranges().

◆ ts_parser_language()

const TSLanguage* ts_parser_language ( const TSParser self)

Get the parser's current language.

Definition at line 1750 of file parser.c.

1750  {
1751  return self->language;
1752 }

◆ ts_parser_logger()

TSLogger ts_parser_logger ( const TSParser self)

Get the parser's current logger.

Definition at line 1775 of file parser.c.

1775  {
1776  return self->lexer.logger;
1777 }

◆ ts_parser_new()

TSParser* ts_parser_new ( void  )

Create a new parser.

Definition at line 1704 of file parser.c.

1704  {
1705  TSParser *self = ts_calloc(1, sizeof(TSParser));
1706  ts_lexer_init(&self->lexer);
1707  array_init(&self->reduce_actions);
1708  array_reserve(&self->reduce_actions, 4);
1709  self->tree_pool = ts_subtree_pool_new(32);
1710  self->stack = ts_stack_new(&self->tree_pool);
1711  self->finished_tree = NULL_SUBTREE;
1712  self->reusable_node = reusable_node_new();
1713  self->dot_graph_file = NULL;
1714  self->cancellation_flag = NULL;
1715  self->timeout_duration = 0;
1716  self->end_clock = clock_null();
1717  self->operation_count = 0;
1718  self->old_tree = NULL_SUBTREE;
1719  self->included_range_differences = (TSRangeArray) array_new();
1720  self->included_range_difference_index = 0;
1722  return self;
1723 }
#define ts_calloc
Definition: alloc.h:24
#define array_init(self)
Definition: array.h:22
static TSClock clock_null(void)
Definition: clock.h:119
void ts_lexer_init(Lexer *self)
Definition: lexer.c:275
static ReusableNode reusable_node_new(void)
Definition: reusable_node.h:14
Stack * ts_stack_new(SubtreePool *subtree_pool)
Definition: stack.c:405
SubtreePool ts_subtree_pool_new(uint32_t capacity)
Definition: subtree.c:123

References array_init, array_new, array_reserve, clock_null(), NULL, NULL_SUBTREE, reusable_node_new(), ts_calloc, ts_lexer_init(), ts_parser__set_cached_token(), ts_stack_new(), and ts_subtree_pool_new().

Referenced by core_cmd_tsrzcmd(), guess_next_autocmplt_token(), rz_type_parse_string_declaration_single(), rz_type_parse_string_single(), ts_parser_new_wasm(), and type_parse_string().

◆ ts_parser_parse()

TSTree* ts_parser_parse ( TSParser self,
const TSTree old_tree,
TSInput  input 
)

Use the parser to parse some source code and create a syntax tree.

If you are parsing this document for the first time, pass NULL for the old_tree parameter. Otherwise, if you have already parsed an earlier version of this document and the document has since been edited, pass the previous syntax tree so that the unchanged parts of it can be reused. This will save time and memory. For this to work correctly, you must have already edited the old syntax tree using the ts_tree_edit function in a way that exactly matches the source code changes.

The TSInput parameter lets you specify how to read the text. It has the following three fields:

  1. read: A function to retrieve a chunk of text at a given byte offset and (row, column) position. The function should return a pointer to the text and write its length to the bytes_read pointer. The parser does not take ownership of this buffer; it just borrows it until it has finished reading it. The function should write a zero value to the bytes_read pointer to indicate the end of the document.
  2. payload: An arbitrary pointer that will be passed to each invocation of the read function.
  3. encoding: An indication of how the text is encoded. Either TSInputEncodingUTF8 or TSInputEncodingUTF16.

This function returns a syntax tree on success, and NULL on failure. There are three possible reasons for failure:

  1. The parser does not have a language assigned. Check for this using the ts_parser_language function.
  2. Parsing was cancelled due to a timeout that was set by an earlier call to the ts_parser_set_timeout_micros function. You can resume parsing from where the parser left out by calling ts_parser_parse again with the same arguments. Or you can start parsing from scratch by first calling ts_parser_reset.
  3. Parsing was cancelled using a cancellation flag that was set by an earlier call to ts_parser_set_cancellation_flag. You can resume parsing from where the parser left out by calling ts_parser_parse again with the same arguments.

Definition at line 1844 of file parser.c.

1848  {
1849  if (!self->language || !input.read) return NULL;
1850 
1851  ts_lexer_set_input(&self->lexer, input);
1852 
1854  self->included_range_difference_index = 0;
1855 
1856  if (ts_parser_has_outstanding_parse(self)) {
1857  LOG("resume_parsing");
1858  } else if (old_tree) {
1859  ts_subtree_retain(old_tree->root);
1860  self->old_tree = old_tree->root;
1862  old_tree->included_ranges, old_tree->included_range_count,
1865  );
1866  reusable_node_reset(&self->reusable_node, old_tree->root);
1867  LOG("parse_after_edit");
1868  LOG_TREE(self->old_tree);
1869  for (unsigned i = 0; i < self->included_range_differences.size; i++) {
1870  TSRange *range = &self->included_range_differences.contents[i];
1871  LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
1872  }
1873  } else {
1875  LOG("new_parse");
1876  }
1877 
1878  self->operation_count = 0;
1879  if (self->timeout_duration) {
1880  self->end_clock = clock_after(clock_now(), self->timeout_duration);
1881  } else {
1882  self->end_clock = clock_null();
1883  }
1884 
1885  uint32_t position = 0, last_position = 0, version_count = 0;
1886  do {
1887  for (
1888  StackVersion version = 0;
1889  version_count = ts_stack_version_count(self->stack),
1890  version < version_count;
1891  version++
1892  ) {
1893  bool allow_node_reuse = version_count == 1;
1894  while (ts_stack_is_active(self->stack, version)) {
1895  LOG(
1896  "process version:%d, version_count:%u, state:%d, row:%u, col:%u",
1897  version,
1899  ts_stack_state(self->stack, version),
1902  );
1903 
1904  if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
1905  LOG_STACK();
1906 
1907  position = ts_stack_position(self->stack, version).bytes;
1908  if (position > last_position || (version > 0 && position == last_position)) {
1909  last_position = position;
1910  break;
1911  }
1912  }
1913  }
1914 
1915  unsigned min_error_cost = ts_parser__condense_stack(self);
1916  if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
1917  break;
1918  }
1919 
1920  while (self->included_range_difference_index < self->included_range_differences.size) {
1921  TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
1922  if (range->end_byte <= position) {
1923  self->included_range_difference_index++;
1924  } else {
1925  break;
1926  }
1927  }
1928  } while (version_count != 0);
1929 
1930  ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
1931  LOG("done");
1932  LOG_TREE(self->finished_tree);
1933 
1934  TSTree *result = ts_tree_new(
1935  self->finished_tree,
1936  self->language,
1937  self->lexer.included_ranges,
1939  );
1940  self->finished_tree = NULL_SUBTREE;
1941  ts_parser_reset(self);
1942  return result;
1943 }
static TSClock clock_after(TSClock base, TSDuration duration)
Definition: clock.h:127
void ts_range_array_get_changed_ranges(const TSRange *old_ranges, unsigned old_range_count, const TSRange *new_ranges, unsigned new_range_count, TSRangeArray *differences)
void ts_lexer_set_input(Lexer *self, TSInput input)
Definition: lexer.c:308
static void reusable_node_reset(ReusableNode *self, Subtree tree)
Definition: reusable_node.h:81
static void reusable_node_clear(ReusableNode *self)
Definition: reusable_node.h:18
TSRange * included_ranges
Definition: lexer.h:19
uint32_t included_range_count
Definition: lexer.h:24
TSDuration timeout_duration
Definition: parser.c:101
Definition: api.h:60
Definition: tree.h:15
Subtree root
Definition: tree.h:16
TSRange * included_ranges
Definition: tree.h:18
unsigned included_range_count
Definition: tree.h:19
TSTree * ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count)
Definition: tree.c:8
void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language)
Definition: subtree.c:338
static unsigned ts_parser__condense_stack(TSParser *self)
Definition: parser.c:1595
void ts_parser_reset(TSParser *self)
Definition: parser.c:1823
static bool ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse)
Definition: parser.c:1387
#define LOG_TREE(tree)
Definition: parser.c:64
static bool ts_parser_has_outstanding_parse(TSParser *self)
Definition: parser.c:1695
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)

References array_clear, Length::bytes, clock_after(), clock_now(), clock_null(), TSPoint::column, Length::extent, i, TSTree::included_range_count, TSTree::included_ranges, input(), LOG, LOG_STACK, LOG_TREE, NULL, NULL_SUBTREE, capstone::range, reusable_node_clear(), reusable_node_reset(), TSTree::root, TSPoint::row, ts_lexer_set_input(), ts_parser__advance(), ts_parser__condense_stack(), ts_parser_has_outstanding_parse(), ts_parser_reset(), ts_range_array_get_changed_ranges(), ts_stack_is_active(), ts_stack_position(), ts_stack_state(), ts_stack_version_count(), ts_subtree_balance(), ts_subtree_error_cost(), ts_subtree_retain(), and ts_tree_new().

Referenced by ts_parser_parse_string_encoding(), and ts_parser_parse_wasm().

◆ ts_parser_parse_string()

TSTree* ts_parser_parse_string ( TSParser self,
const TSTree old_tree,
const char *  string,
uint32_t  length 
)

Use the parser to parse some source code stored in one contiguous buffer. The first two parameters are the same as in the ts_parser_parse function above. The second two parameters indicate the location of the buffer and its length in bytes.

Definition at line 1945 of file parser.c.

1950  {
1951  return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
1952 }
@ TSInputEncodingUTF8
Definition: api.h:45
TSTree * ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree, const char *string, uint32_t length, TSInputEncoding encoding)
Definition: parser.c:1954

References length, ts_parser_parse_string_encoding(), and TSInputEncodingUTF8.

Referenced by apply_edits(), core_cmd_tsrzcmd(), guess_next_autocmplt_token(), rz_type_parse_string_declaration_single(), rz_type_parse_string_single(), and type_parse_string().

◆ ts_parser_parse_string_encoding()

TSTree* ts_parser_parse_string_encoding ( TSParser self,
const TSTree old_tree,
const char *  string,
uint32_t  length,
TSInputEncoding  encoding 
)

Use the parser to parse some source code stored in one contiguous buffer with a given encoding. The first four parameters work the same as in the ts_parser_parse_string method above. The final parameter indicates whether the text is encoded as UTF8 or UTF16.

Definition at line 1954 of file parser.c.

1960  {
1961  TSStringInput input = {string, length};
1962  return ts_parser_parse(self, old_tree, (TSInput) {
1963  &input,
1965  encoding,
1966  });
1967 }
Definition: api.h:67
TSTree * ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input)
Definition: parser.c:1844
static const char * ts_string_input_read(void *_self, uint32_t byte, TSPoint pt, uint32_t *length)
Definition: parser.c:132

References cmd_descs_generate::encoding, input(), length, ts_parser_parse(), and ts_string_input_read().

Referenced by ts_parser_parse_string().

◆ ts_parser_print_dot_graphs()

void ts_parser_print_dot_graphs ( TSParser self,
int  file 
)

Set the file descriptor to which the parser should write debugging graphs during parsing. The graphs are formatted in the DOT language. You may want to pipe these graphs directly to a dot(1) process in order to generate SVG output. You can turn off this logging by passing a negative number.

Definition at line 1783 of file parser.c.

1783  {
1784  if (self->dot_graph_file) {
1785  fclose(self->dot_graph_file);
1786  }
1787 
1788  if (fd >= 0) {
1789  self->dot_graph_file = fdopen(fd, "a");
1790  } else {
1791  self->dot_graph_file = NULL;
1792  }
1793 }
static const z80_opcode fd[]
Definition: z80_tab.h:997

References fd, and NULL.

◆ ts_parser_reset()

void ts_parser_reset ( TSParser self)

Instruct the parser to start the next parse from the beginning.

If the parser previously failed because of a timeout or a cancellation, then by default, it will resume where it left off on the next call to ts_parser_parse or other parsing functions. If you don't want to resume, and instead intend to use this parser to parse some other document, you must call ts_parser_reset first.

Definition at line 1823 of file parser.c.

1823  {
1824  if (self->language && self->language->external_scanner.deserialize) {
1825  self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
1826  }
1827 
1828  if (self->old_tree.ptr) {
1829  ts_subtree_release(&self->tree_pool, self->old_tree);
1830  self->old_tree = NULL_SUBTREE;
1831  }
1832 
1834  ts_lexer_reset(&self->lexer, length_zero());
1835  ts_stack_clear(self->stack);
1837  if (self->finished_tree.ptr) {
1839  self->finished_tree = NULL_SUBTREE;
1840  }
1841  self->accept_count = 0;
1842 }
void(* deserialize)(void *, const char *, unsigned)
Definition: parser.h:124
struct TSLanguage::@436 external_scanner
void ts_stack_clear(Stack *self)
Definition: stack.c:737

References length_zero(), NULL, NULL_SUBTREE, reusable_node_clear(), ts_lexer_reset(), ts_parser__set_cached_token(), ts_stack_clear(), and ts_subtree_release().

Referenced by ts_parser_parse(), and ts_parser_set_language().

◆ ts_parser_set_cancellation_flag()

void ts_parser_set_cancellation_flag ( TSParser self,
const size_t flag 
)

Set the parser's current cancellation flag pointer.

If a non-null pointer is assigned, then the parser will periodically read from this pointer during parsing. If it reads a non-zero value, it will halt early, returning NULL. See ts_parser_parse for more information.

Definition at line 1799 of file parser.c.

1799  {
1800  self->cancellation_flag = (const volatile size_t *)flag;
1801 }

◆ ts_parser_set_included_ranges()

bool ts_parser_set_included_ranges ( TSParser self,
const TSRange ranges,
uint32_t  length 
)

Set the ranges of text that the parser should include when parsing.

By default, the parser will always include entire documents. This function allows you to parse only a portion of a document but still return a syntax tree whose ranges match up with the document as a whole. You can also pass multiple disjoint ranges.

The second and third parameters specify the location and length of an array of ranges. The parser does not take ownership of these ranges; it copies the data, so it doesn't matter how these ranges are allocated.

If length is zero, then the entire document will be parsed. Otherwise, the given ranges must be ordered from earliest to latest in the document, and they must not overlap. That is, the following must hold for all i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte

If this requirement is not satisfied, the operation will fail, the ranges will not be assigned, and this function will return false. On success, this function returns true

Definition at line 1811 of file parser.c.

1815  {
1816  return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
1817 }
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count)
Definition: lexer.c:367

References count, and ts_lexer_set_included_ranges().

Referenced by ts_parser_parse_wasm().

◆ ts_parser_set_language()

bool ts_parser_set_language ( TSParser self,
const TSLanguage language 
)

Set the language that the parser should use for parsing.

Returns a boolean indicating whether or not the language was successfully assigned. True means assignment succeeded. False means there was a version mismatch: the language was generated with an incompatible version of the Tree-sitter CLI. Check the language's version using ts_language_version and compare it to this library's TREE_SITTER_LANGUAGE_VERSION and TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION constants.

Definition at line 1754 of file parser.c.

1754  {
1755  if (language) {
1756  if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
1757  if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
1758  }
1759 
1761  self->language->external_scanner.destroy(self->external_scanner_payload);
1762  }
1763 
1764  if (language && language->external_scanner.create) {
1765  self->external_scanner_payload = language->external_scanner.create();
1766  } else {
1767  self->external_scanner_payload = NULL;
1768  }
1769 
1770  self->language = language;
1771  ts_parser_reset(self);
1772  return true;
1773 }
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
Definition: api.h:30
#define TREE_SITTER_LANGUAGE_VERSION
Definition: api.h:24
void *(* create)(void)
Definition: parser.h:120
void(* destroy)(void *)
Definition: parser.h:121
uint32_t version
Definition: parser.h:91

References TSLanguage::create, TSLanguage::external_scanner, NULL, TREE_SITTER_LANGUAGE_VERSION, TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, ts_parser_reset(), and TSLanguage::version.

Referenced by core_cmd_tsrzcmd(), guess_next_autocmplt_token(), rz_type_parse_string_declaration_single(), rz_type_parse_string_single(), ts_parser_delete(), and type_parse_string().

◆ ts_parser_set_logger()

void ts_parser_set_logger ( TSParser self,
TSLogger  logger 
)

Set the logger that a parser should use during parsing.

The parser does not take ownership over the logger payload. If a logger was previously assigned, the caller is responsible for releasing any memory owned by the previous logger.

Definition at line 1779 of file parser.c.

1779  {
1780  self->lexer.logger = logger;
1781 }

Referenced by ts_parser_enable_logger_wasm().

◆ ts_parser_set_timeout_micros()

void ts_parser_set_timeout_micros ( TSParser self,
uint64_t  timeout 
)

Set the maximum duration in microseconds that parsing should be allowed to take before halting.

If parsing takes longer than this, it will halt early, returning NULL. See ts_parser_parse for more information.

Definition at line 1807 of file parser.c.

1807  {
1808  self->timeout_duration = duration_from_micros(timeout_micros);
1809 }
static TSDuration duration_from_micros(uint64_t micros)
Definition: clock.h:111

References duration_from_micros().

◆ ts_parser_timeout_micros()

uint64_t ts_parser_timeout_micros ( const TSParser self)

Get the duration in microseconds that parsing is allowed to take.

Definition at line 1803 of file parser.c.

1803  {
1804  return duration_to_micros(self->timeout_duration);
1805 }
static uint64_t duration_to_micros(TSDuration self)
Definition: clock.h:115

References duration_to_micros().

◆ ts_string_input_read()

static const char* ts_string_input_read ( void *  _self,
uint32_t  byte,
TSPoint  pt,
uint32_t length 
)
static

Definition at line 132 of file parser.c.

137  {
138  (void)pt;
139  TSStringInput *self = (TSStringInput *)_self;
140  if (byte >= self->length) {
141  *length = 0;
142  return "";
143  } else {
144  *length = self->length - byte;
145  return self->string + byte;
146  }
147 }

References length.

Referenced by ts_parser_parse_string_encoding().

Variable Documentation

◆ MAX_COST_DIFFERENCE

const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE
static

Definition at line 77 of file parser.c.

Referenced by ts_parser__compare_versions().

◆ MAX_SUMMARY_DEPTH

const unsigned MAX_SUMMARY_DEPTH = 16
static

Definition at line 76 of file parser.c.

Referenced by ts_parser__handle_error().

◆ MAX_VERSION_COUNT

const unsigned MAX_VERSION_COUNT = 6
static

◆ MAX_VERSION_COUNT_OVERFLOW

const unsigned MAX_VERSION_COUNT_OVERFLOW = 4
static

Definition at line 75 of file parser.c.

Referenced by ts_parser__reduce().

◆ OP_COUNT_PER_TIMEOUT_CHECK

const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100
static

Definition at line 78 of file parser.c.

Referenced by ts_parser__advance().