Rizin
unix-like reverse engineering framework and cli tools
query.c
Go to the documentation of this file.
1 #include "tree_sitter/api.h"
2 #include "./alloc.h"
3 #include "./array.h"
4 #include "./language.h"
5 #include "./point.h"
6 #include "./tree_cursor.h"
7 #include "./unicode.h"
8 #include <wctype.h>
9 
10 // #define DEBUG_ANALYZE_QUERY
11 // #define DEBUG_EXECUTE_QUERY
12 
13 #define MAX_STEP_CAPTURE_COUNT 3
14 #define MAX_NEGATED_FIELD_COUNT 8
15 #define MAX_STATE_PREDECESSOR_COUNT 256
16 #define MAX_ANALYSIS_STATE_DEPTH 8
17 #define MAX_ANALYSIS_ITERATION_COUNT 256
18 
19 /*
20  * Stream - A sequence of unicode characters derived from a UTF8 string.
21  * This struct is used in parsing queries from S-expressions.
22  */
23 typedef struct {
24  const char *input;
25  const char *start;
26  const char *end;
29 } Stream;
30 
31 /*
32  * QueryStep - A step in the process of matching a query. Each node within
33  * a query S-expression corresponds to one of these steps. An entire pattern
34  * is represented as a sequence of these steps. The basic properties of a
35  * node are represented by these fields:
36  * - `symbol` - The grammar symbol to match. A zero value represents the
37  * wildcard symbol, '_'.
38  * - `field` - The field name to match. A zero value means that a field name
39  * was not specified.
40  * - `capture_ids` - An array of integers representing the names of captures
41  * associated with this node in the pattern, terminated by a `NONE` value.
42  * - `depth` - The depth where this node occurs in the pattern. The root node
43  * of the pattern has depth zero.
44  * - `negated_field_list_id` - An id representing a set of fields that must
45  * that must not be present on a node matching this step.
46  *
47  * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
48  * which forbids additional child nodes:
49  * - `is_immediate` - Indicates that the node matching this step cannot be preceded
50  * by other sibling nodes that weren't specified in the pattern.
51  * - `is_last_child` - Indicates that the node matching this step cannot have any
52  * subsequent named siblings.
53  *
54  * For simple patterns, steps are matched in sequential order. But in order to
55  * handle alternative/repeated/optional sub-patterns, query steps are not always
56  * structured as a linear sequence; they sometimes need to split and merge. This
57  * is done using the following fields:
58  * - `alternative_index` - The index of a different query step that serves as
59  * an alternative to this step. A `NONE` value represents no alternative.
60  * When a query state reaches a step with an alternative index, the state
61  * is duplicated, with one copy remaining at the original step, and one copy
62  * moving to the alternative step. The alternative may have its own alternative
63  * step, so this splitting is an iterative process.
64  * - `is_dead_end` - Indicates that this state cannot be passed directly, and
65  * exists only in order to redirect to an alternative index, with no splitting.
66  * - `is_pass_through` - Indicates that state has no matching logic of its own,
67  * and exists only to split a state. One copy of the state advances immediately
68  * to the next step, and one moves to the alternative step.
69  * - `alternative_is_immediate` - Indicates that this step's alternative step
70  * should be treated as if `is_immediate` is true.
71  *
72  * Steps also store some derived state that summarizes how they relate to other
73  * steps within the same pattern. This is used to optimize the matching process:
74  * - `contains_captures` - Indicates that this step or one of its child steps
75  * has a non-empty `capture_ids` list.
76  * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
77  * it and all of its subsequent sibling steps within the same parent pattern
78  * are guaranteed to match.
79  * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
80  * for the entire top-level pattern. When iterating through a query's
81  * captures using `ts_query_cursor_next_capture`, this field is used to
82  * detect that a capture can safely be returned from a match that has not
83  * even completed yet.
84  */
85 typedef struct {
93  bool is_named: 1;
94  bool is_immediate: 1;
95  bool is_last_child: 1;
96  bool is_pass_through: 1;
97  bool is_dead_end: 1;
102 } QueryStep;
103 
104 /*
105  * Slice - A slice of an external array. Within a query, capture names,
106  * literal string values, and predicate step informations are stored in three
107  * contiguous arrays. Individual captures, string values, and predicates are
108  * represented as slices of these three arrays.
109  */
110 typedef struct {
113 } Slice;
114 
115 /*
116  * SymbolTable - a two-way mapping of strings to ids.
117  */
118 typedef struct {
119  Array(char) characters;
120  Array(Slice) slices;
121 } SymbolTable;
122 
126 typedef Array(uint8_t) CaptureQuantifiers;
127 
128 /*
129  * PatternEntry - Information about the starting point for matching a particular
130  * pattern. These entries are stored in a 'pattern map' - a sorted array that
131  * makes it possible to efficiently lookup patterns based on the symbol for their
132  * first step. The entry consists of the following fields:
133  * - `pattern_index` - the index of the pattern within the query
134  * - `step_index` - the index of the pattern's first step in the shared `steps` array
135  * - `is_rooted` - whether or not the pattern has a single root node. This property
136  * affects decisions about whether or not to start the pattern for nodes outside
137  * of a QueryCursor's range restriction.
138  */
139 typedef struct {
140  uint16_t step_index;
141  uint16_t pattern_index;
142  bool is_rooted;
144 
145 typedef struct {
149 } QueryPattern;
150 
151 typedef struct {
154 } StepOffset;
155 
156 /*
157  * QueryState - The state of an in-progress match of a particular pattern
158  * in a query. While executing, a `TSQueryCursor` must keep track of a number
159  * of possible in-progress matches. Each of those possible matches is
160  * represented as one of these states. Fields:
161  * - `id` - A numeric id that is exposed to the public API. This allows the
162  * caller to remove a given match, preventing any more of its captures
163  * from being returned.
164  * - `start_depth` - The depth in the tree where the first step of the state's
165  * pattern was matched.
166  * - `pattern_index` - The pattern that the state is matching.
167  * - `consumed_capture_count` - The number of captures from this match that
168  * have already been returned.
169  * - `capture_list_id` - A numeric id that can be used to retrieve the state's
170  * list of captures from the `CaptureListPool`.
171  * - `seeking_immediate_match` - A flag that indicates that the state's next
172  * step must be matched by the very next sibling. This is used when
173  * processing repetitions.
174  * - `has_in_progress_alternatives` - A flag that indicates that there is are
175  * other states that have the same captures as this state, but are at
176  * different steps in their pattern. This means that in order to obey the
177  * 'longest-match' rule, this state should not be returned as a match until
178  * it is clear that there can be no other alternative match with more captures.
179  */
180 typedef struct {
189  bool dead: 1;
190  bool needs_parent: 1;
191 } QueryState;
192 
193 typedef Array(TSQueryCapture) CaptureList;
194 
195 /*
196  * CaptureListPool - A collection of *lists* of captures. Each query state needs
197  * to maintain its own list of captures. To avoid repeated allocations, this struct
198  * maintains a fixed set of capture lists, and keeps track of which ones are
199  * currently in use by a query state.
200  */
201 typedef struct {
202  Array(CaptureList) list;
203  CaptureList empty_list;
204  // The maximum number of capture lists that we are allowed to allocate. We
205  // never allow `list` to allocate more entries than this, dropping pending
206  // matches if needed to stay under the limit.
207  uint32_t max_capture_list_count;
208  // The number of capture lists allocated in `list` that are not currently in
209  // use. We reuse those existing-but-unused capture lists before trying to
210  // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
211  // list's length to indicate that it's not in use.
212  uint32_t free_capture_list_count;
214 
215 /*
216  * AnalysisState - The state needed for walking the parse table when analyzing
217  * a query pattern, to determine at which steps the pattern might fail to match.
218  */
219 typedef struct {
224  bool done: 1;
226 
227 typedef struct {
231 } AnalysisState;
232 
233 typedef Array(AnalysisState *) AnalysisStateSet;
234 
235 typedef Array(AnalysisState *) AnalysisStatePool;
236 
237 /*
238  * AnalysisSubgraph - A subset of the states in the parse table that are used
239  * in constructing nodes with a certain symbol. Each state is accompanied by
240  * some information about the possible node that could be produced in
241  * downstream states.
242  */
243 typedef struct {
245  uint8_t production_id;
246  uint8_t child_index: 7;
247  bool done: 1;
249 
250 typedef struct {
252  Array(TSStateId) start_states;
255 
256 /*
257  * StatePredecessorMap - A map that stores the predecessors of each parse state.
258  * This is used during query analysis to determine which parse states can lead
259  * to which reduce actions.
260  */
261 typedef struct {
264 
265 /*
266  * TSQuery - A tree query, compiled from a string of S-expressions. The query
267  * itself is immutable. The mutable state used in the process of executing the
268  * query is stored in a `TSQueryCursor`.
269  */
270 struct TSQuery {
272  Array(CaptureQuantifiers) capture_quantifiers;
273  SymbolTable predicate_values;
274  Array(QueryStep) steps;
275  Array(PatternEntry) pattern_map;
276  Array(TSQueryPredicateStep) predicate_steps;
278  Array(StepOffset) step_offsets;
279  Array(TSFieldId) negated_fields;
280  Array(char) string_buffer;
281  const TSLanguage *language;
282  uint16_t wildcard_root_pattern_count;
283 };
284 
285 /*
286  * TSQueryCursor - A stateful struct used to execute a query on a tree.
287  */
289  const TSQuery *query;
292  Array(QueryState) finished_states;
293  CaptureListPool capture_list_pool;
294  uint32_t depth;
295  uint32_t start_byte;
296  uint32_t end_byte;
297  TSPoint start_point;
298  TSPoint end_point;
299  uint32_t next_state_id;
300  bool ascending;
301  bool halted;
302  bool did_exceed_match_limit;
303 };
304 
305 static const TSQueryError PARENT_DONE = -1;
307 static const uint16_t NONE = UINT16_MAX;
308 static const TSSymbol WILDCARD_SYMBOL = 0;
309 
310 /**********
311  * Stream
312  **********/
313 
314 // Advance to the next unicode code point in the stream.
315 static bool stream_advance(Stream *self) {
316  self->input += self->next_size;
317  if (self->input < self->end) {
319  (const uint8_t *)self->input,
320  self->end - self->input,
321  &self->next
322  );
323  if (size > 0) {
324  self->next_size = size;
325  return true;
326  }
327  } else {
328  self->next_size = 0;
329  self->next = '\0';
330  }
331  return false;
332 }
333 
334 // Reset the stream to the given input position, represented as a pointer
335 // into the input string.
336 static void stream_reset(Stream *self, const char *input) {
337  self->input = input;
338  self->next_size = 0;
339  stream_advance(self);
340 }
341 
342 static Stream stream_new(const char *string, uint32_t length) {
343  Stream self = {
344  .next = 0,
345  .input = string,
346  .start = string,
347  .end = string + length,
348  };
349  stream_advance(&self);
350  return self;
351 }
352 
353 static void stream_skip_whitespace(Stream *self) {
354  for (;;) {
355  if (iswspace(self->next)) {
356  stream_advance(self);
357  } else if (self->next == ';') {
358  // skip over comments
359  stream_advance(self);
360  while (self->next && self->next != '\n') {
361  if (!stream_advance(self)) break;
362  }
363  } else {
364  break;
365  }
366  }
367 }
368 
369 static bool stream_is_ident_start(Stream *self) {
370  return iswalnum(self->next) || self->next == '_' || self->next == '-';
371 }
372 
374  do {
376  } while (
377  iswalnum(stream->next) ||
378  stream->next == '_' ||
379  stream->next == '-' ||
380  stream->next == '.' ||
381  stream->next == '?' ||
382  stream->next == '!'
383  );
384 }
385 
387  return self->input - self->start;
388 }
389 
390 /******************
391  * CaptureListPool
392  ******************/
393 
395  return (CaptureListPool) {
396  .list = array_new(),
397  .empty_list = array_new(),
398  .max_capture_list_count = UINT32_MAX,
399  .free_capture_list_count = 0,
400  };
401 }
402 
404  for (uint16_t i = 0; i < self->list.size; i++) {
405  // This invalid size means that the list is not in use.
406  self->list.contents[i].size = UINT32_MAX;
407  }
408  self->free_capture_list_count = self->list.size;
409 }
410 
412  for (uint16_t i = 0; i < self->list.size; i++) {
413  array_delete(&self->list.contents[i]);
414  }
415  array_delete(&self->list);
416 }
417 
418 static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
419  if (id >= self->list.size) return &self->empty_list;
420  return &self->list.contents[id];
421 }
422 
423 static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
424  assert(id < self->list.size);
425  return &self->list.contents[id];
426 }
427 
428 static bool capture_list_pool_is_empty(const CaptureListPool *self) {
429  // The capture list pool is empty if all allocated lists are in use, and we
430  // have reached the maximum allowed number of allocated lists.
431  return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
432 }
433 
435  // First see if any already allocated capture list is currently unused.
436  if (self->free_capture_list_count > 0) {
437  for (uint16_t i = 0; i < self->list.size; i++) {
438  if (self->list.contents[i].size == UINT32_MAX) {
439  array_clear(&self->list.contents[i]);
440  self->free_capture_list_count--;
441  return i;
442  }
443  }
444  }
445 
446  // Otherwise allocate and initialize a new capture list, as long as that
447  // doesn't put us over the requested maximum.
448  uint32_t i = self->list.size;
449  if (i >= self->max_capture_list_count) {
450  return NONE;
451  }
452  CaptureList list;
453  array_init(&list);
454  array_push(&self->list, list);
455  return i;
456 }
457 
459  if (id >= self->list.size) return;
460  self->list.contents[id].size = UINT32_MAX;
461  self->free_capture_list_count++;
462 }
463 
464 /**************
465  * Quantifiers
466  **************/
467 
469  TSQuantifier left,
470  TSQuantifier right
471 ) {
472  switch (left)
473  {
474  case TSQuantifierZero:
475  return TSQuantifierZero;
477  switch (right) {
478  case TSQuantifierZero:
479  return TSQuantifierZero;
481  case TSQuantifierOne:
482  return TSQuantifierZeroOrOne;
485  return TSQuantifierZeroOrMore;
486  };
487  break;
489  switch (right) {
490  case TSQuantifierZero:
491  return TSQuantifierZero;
494  case TSQuantifierOne:
496  return TSQuantifierZeroOrMore;
497  };
498  break;
499  case TSQuantifierOne:
500  return right;
502  switch (right) {
503  case TSQuantifierZero:
504  return TSQuantifierZero;
507  return TSQuantifierZeroOrMore;
508  case TSQuantifierOne:
510  return TSQuantifierOneOrMore;
511  };
512  break;
513  }
514  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
515 }
516 
518  TSQuantifier left,
519  TSQuantifier right
520 ) {
521  switch (left)
522  {
523  case TSQuantifierZero:
524  switch (right) {
525  case TSQuantifierZero:
526  return TSQuantifierZero;
528  case TSQuantifierOne:
529  return TSQuantifierZeroOrOne;
532  return TSQuantifierZeroOrMore;
533  };
534  break;
536  switch (right) {
537  case TSQuantifierZero:
539  case TSQuantifierOne:
540  return TSQuantifierZeroOrOne;
541  break;
544  return TSQuantifierZeroOrMore;
545  break;
546  };
547  break;
549  return TSQuantifierZeroOrMore;
550  case TSQuantifierOne:
551  switch (right) {
552  case TSQuantifierZero:
554  return TSQuantifierZeroOrOne;
556  return TSQuantifierZeroOrMore;
557  case TSQuantifierOne:
558  return TSQuantifierOne;
560  return TSQuantifierOneOrMore;
561  };
562  break;
564  switch (right) {
565  case TSQuantifierZero:
568  return TSQuantifierZeroOrMore;
569  case TSQuantifierOne:
571  return TSQuantifierOneOrMore;
572  };
573  break;
574  }
575  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
576 }
577 
579  TSQuantifier left,
580  TSQuantifier right
581 ) {
582  switch (left)
583  {
584  case TSQuantifierZero:
585  return right;
587  switch (right) {
588  case TSQuantifierZero:
589  return TSQuantifierZeroOrOne;
592  return TSQuantifierZeroOrMore;
593  case TSQuantifierOne:
595  return TSQuantifierOneOrMore;
596  };
597  break;
599  switch (right) {
600  case TSQuantifierZero:
601  return TSQuantifierZeroOrMore;
604  return TSQuantifierZeroOrMore;
605  case TSQuantifierOne:
607  return TSQuantifierOneOrMore;
608  };
609  break;
610  case TSQuantifierOne:
611  switch (right) {
612  case TSQuantifierZero:
613  return TSQuantifierOne;
616  case TSQuantifierOne:
618  return TSQuantifierOneOrMore;
619  };
620  break;
622  return TSQuantifierOneOrMore;
623  }
624  return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
625 }
626 
627 // Create new capture quantifiers structure
628 static CaptureQuantifiers capture_quantifiers_new(void) {
629  return (CaptureQuantifiers) array_new();
630 }
631 
632 // Delete capture quantifiers structure
634  CaptureQuantifiers *self
635 ) {
636  array_delete(self);
637 }
638 
639 // Clear capture quantifiers structure
641  CaptureQuantifiers *self
642 ) {
643  array_clear(self);
644 }
645 
646 // Replace capture quantifiers with the given quantifiers
648  CaptureQuantifiers *self,
649  CaptureQuantifiers *quantifiers
650 ) {
651  array_clear(self);
652  array_push_all(self, quantifiers);
653 }
654 
655 // Return capture quantifier for the given capture id
657  const CaptureQuantifiers *self,
658  uint16_t id
659 ) {
660  return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
661 }
662 
663 // Add the given quantifier to the current value for id
665  CaptureQuantifiers *self,
666  uint16_t id,
667  TSQuantifier quantifier
668 ) {
669  if (self->size <= id) {
670  array_grow_by(self, id + 1 - self->size);
671  }
672  uint8_t *own_quantifier = array_get(self, id);
673  *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
674 }
675 
676 // Point-wise add the given quantifiers to the current values
678  CaptureQuantifiers *self,
679  CaptureQuantifiers *quantifiers
680 ) {
681  if (self->size < quantifiers->size) {
682  array_grow_by(self, quantifiers->size - self->size);
683  }
684  for (uint16_t id = 0; id < quantifiers->size; id++) {
685  uint8_t *quantifier = array_get(quantifiers, id);
686  uint8_t *own_quantifier = array_get(self, id);
687  *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
688  }
689 }
690 
691 // Join the given quantifier with the current values
693  CaptureQuantifiers *self,
694  TSQuantifier quantifier
695 ) {
696  for (uint16_t id = 0; id < self->size; id++) {
697  uint8_t *own_quantifier = array_get(self, id);
698  *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
699  }
700 }
701 
702 // Point-wise join the quantifiers from a list of alternatives with the current values
704  CaptureQuantifiers *self,
705  CaptureQuantifiers *quantifiers
706 ) {
707  if (self->size < quantifiers->size) {
708  array_grow_by(self, quantifiers->size - self->size);
709  }
710  for (uint32_t id = 0; id < quantifiers->size; id++) {
711  uint8_t *quantifier = array_get(quantifiers, id);
712  uint8_t *own_quantifier = array_get(self, id);
713  *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
714  }
715  for (uint32_t id = quantifiers->size; id < self->size; id++) {
716  uint8_t *own_quantifier = array_get(self, id);
717  *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
718  }
719 }
720 
721 /**************
722  * SymbolTable
723  **************/
724 
726  return (SymbolTable) {
727  .characters = array_new(),
728  .slices = array_new(),
729  };
730 }
731 
732 static void symbol_table_delete(SymbolTable *self) {
733  array_delete(&self->characters);
734  array_delete(&self->slices);
735 }
736 
738  const SymbolTable *self,
739  const char *name,
741 ) {
742  for (unsigned i = 0; i < self->slices.size; i++) {
743  Slice slice = self->slices.contents[i];
744  if (
745  slice.length == length &&
746  !strncmp(&self->characters.contents[slice.offset], name, length)
747  ) return i;
748  }
749  return -1;
750 }
751 
752 static const char *symbol_table_name_for_id(
753  const SymbolTable *self,
754  uint16_t id,
756 ) {
757  Slice slice = self->slices.contents[id];
758  *length = slice.length;
759  return &self->characters.contents[slice.offset];
760 }
761 
763  SymbolTable *self,
764  const char *name,
766 ) {
767  int id = symbol_table_id_for_name(self, name, length);
768  if (id >= 0) return (uint16_t)id;
769  Slice slice = {
770  .offset = self->characters.size,
771  .length = length,
772  };
773  array_grow_by(&self->characters, length + 1);
774  memcpy(&self->characters.contents[slice.offset], name, length);
775  self->characters.contents[self->characters.size - 1] = 0;
776  array_push(&self->slices, slice);
777  return self->slices.size - 1;
778 }
779 
780 /************
781  * QueryStep
782  ************/
783 
785  TSSymbol symbol,
786  uint16_t depth,
787  bool is_immediate
788 ) {
789  return (QueryStep) {
790  .symbol = symbol,
791  .depth = depth,
792  .field = 0,
793  .capture_ids = {NONE, NONE, NONE},
794  .alternative_index = NONE,
795  .negated_field_list_id = 0,
796  .contains_captures = false,
797  .is_last_child = false,
798  .is_named = false,
799  .is_pass_through = false,
800  .is_dead_end = false,
801  .root_pattern_guaranteed = false,
802  .is_immediate = is_immediate,
803  .alternative_is_immediate = false,
804  };
805 }
806 
807 static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
808  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
809  if (self->capture_ids[i] == NONE) {
810  self->capture_ids[i] = capture_id;
811  break;
812  }
813  }
814 }
815 
816 static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
817  for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
818  if (self->capture_ids[i] == capture_id) {
819  self->capture_ids[i] = NONE;
820  while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
821  if (self->capture_ids[i + 1] == NONE) break;
822  self->capture_ids[i] = self->capture_ids[i + 1];
823  self->capture_ids[i + 1] = NONE;
824  i++;
825  }
826  break;
827  }
828  }
829 }
830 
831 /**********************
832  * StatePredecessorMap
833  **********************/
834 
836  const TSLanguage *language
837 ) {
838  return (StatePredecessorMap) {
839  .contents = ts_calloc(
840  (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
841  sizeof(TSStateId)
842  ),
843  };
844 }
845 
847  ts_free(self->contents);
848 }
849 
850 static inline void state_predecessor_map_add(
851  StatePredecessorMap *self,
853  TSStateId predecessor
854 ) {
855  size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
856  TSStateId *count = &self->contents[index];
857  if (
858  *count == 0 ||
859  (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
860  ) {
861  (*count)++;
862  self->contents[index + *count] = predecessor;
863  }
864 }
865 
867  const StatePredecessorMap *self,
869  unsigned *count
870 ) {
871  size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
872  *count = self->contents[index];
873  return &self->contents[index + 1];
874 }
875 
876 /****************
877  * AnalysisState
878  ****************/
879 
880 static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
881  unsigned result = 0;
882  for (unsigned i = 0; i < self->depth; i++) {
883  TSSymbol symbol = self->stack[i].parent_symbol;
884  for (unsigned j = 0; j < i; j++) {
885  if (self->stack[j].parent_symbol == symbol) {
886  result++;
887  break;
888  }
889  }
890  }
891  return result;
892 }
893 
895  AnalysisState *const *self,
896  AnalysisState *const *other
897 ) {
898  for (unsigned i = 0; i < (*self)->depth; i++) {
899  if (i >= (*other)->depth) return -1;
900  if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
901  if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
902  }
903  if ((*self)->depth < (*other)->depth) return 1;
904  if ((*self)->step_index < (*other)->step_index) return -1;
905  if ((*self)->step_index > (*other)->step_index) return 1;
906  return 0;
907 }
908 
909 static inline int analysis_state__compare(
910  AnalysisState *const *self,
911  AnalysisState *const *other
912 ) {
913  int result = analysis_state__compare_position(self, other);
914  if (result != 0) return result;
915  for (unsigned i = 0; i < (*self)->depth; i++) {
916  if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
917  if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
918  if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
919  if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
920  if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
921  if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
922  }
923  return 0;
924 }
925 
927  return &self->stack[self->depth - 1];
928 }
929 
930 static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
931  for (unsigned i = 0; i < self->depth; i++) {
932  if (self->stack[i].parent_symbol == symbol) return true;
933  }
934  return false;
935 }
936 
937 static inline AnalysisState *analysis_state__clone(AnalysisState const *self) {
938  AnalysisState *new_state = ts_malloc(sizeof(AnalysisState));
939  *new_state = *self;
940  return new_state;
941 }
942 
943 /****************
944  * AnalysisStateSet
945  ****************/
946 
947 // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
948 // cloning one from scratch.
950  AnalysisStatePool *self,
951  AnalysisState *borrowed_item
952 ) {
953  AnalysisState *new_item;
954  if (self->size) {
955  new_item = array_pop(self);
956  *new_item = *borrowed_item;
957  } else {
958  new_item = analysis_state__clone(borrowed_item);
959  }
960 
961  return new_item;
962 }
963 
964 // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
965 // set. The set does not contain duplicates, so if the item is already present, it will not be
966 // inserted, and no clone will be made.
967 //
968 // The caller retains ownership of the passed-in memory. However, the clone that is created by this
969 // function will be managed by the state set.
971  AnalysisStateSet *self,
972  AnalysisStatePool *pool,
973  AnalysisState *borrowed_item
974 ) {
975  unsigned index, exists;
976  array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
977  if (!exists) {
978  AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
979  array_insert(self, index, new_item);
980  }
981 }
982 
983 // Inserts a clone of the passed-in item at the end position of this list.
984 //
985 // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
986 // `analysis_state__compare`) than largest item already in this set. If items are inserted in the
987 // wrong order, the set will not function properly for future use.
988 //
989 // The caller retains ownership of the passed-in memory. However, the clone that is created by this
990 // function will be managed by the state set.
992  AnalysisStateSet *self,
993  AnalysisStatePool *pool,
994  AnalysisState *borrowed_item
995 ) {
996  AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
997  array_push(self, new_item);
998 }
999 
1000 // Removes all items from this set, returning it to an empty state.
1001 static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStatePool *pool) {
1002  array_push_all(pool, self);
1003  array_clear(self);
1004 }
1005 
1006 // Releases all memory that is managed with this state set, including any items currently present.
1007 // After calling this function, the set is no longer suitable for use.
1008 static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1009  for (unsigned i = 0; i < self->size; i++) {
1010  ts_free(self->contents[i]);
1011  }
1012  array_delete(self);
1013 }
1014 
1015 /***********************
1016  * AnalysisSubgraphNode
1017  ***********************/
1018 
1019 static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1020  if (self->state < other->state) return -1;
1021  if (self->state > other->state) return 1;
1022  if (self->child_index < other->child_index) return -1;
1023  if (self->child_index > other->child_index) return 1;
1024  if (self->done < other->done) return -1;
1025  if (self->done > other->done) return 1;
1026  if (self->production_id < other->production_id) return -1;
1027  if (self->production_id > other->production_id) return 1;
1028  return 0;
1029 }
1030 
1031 /*********
1032  * Query
1033  *********/
1034 
1035 // The `pattern_map` contains a mapping from TSSymbol values to indices in the
1036 // `steps` array. For a given syntax node, the `pattern_map` makes it possible
1037 // to quickly find the starting steps of all of the patterns whose root matches
1038 // that node. Each entry has two fields: a `pattern_index`, which identifies one
1039 // of the patterns in the query, and a `step_index`, which indicates the start
1040 // offset of that pattern's steps within the `steps` array.
1041 //
1042 // The entries are sorted by the patterns' root symbols, and lookups use a
1043 // binary search. This ensures that the cost of this initial lookup step
1044 // scales logarithmically with the number of patterns in the query.
1045 //
1046 // This returns `true` if the symbol is present and `false` otherwise.
1047 // If the symbol is not present `*result` is set to the index where the
1048 // symbol should be inserted.
1049 static inline bool ts_query__pattern_map_search(
1050  const TSQuery *self,
1051  TSSymbol needle,
1052  uint32_t *result
1053 ) {
1054  uint32_t base_index = self->wildcard_root_pattern_count;
1055  uint32_t size = self->pattern_map.size - base_index;
1056  if (size == 0) {
1057  *result = base_index;
1058  return false;
1059  }
1060  while (size > 1) {
1061  uint32_t half_size = size / 2;
1062  uint32_t mid_index = base_index + half_size;
1063  TSSymbol mid_symbol = self->steps.contents[
1064  self->pattern_map.contents[mid_index].step_index
1065  ].symbol;
1066  if (needle > mid_symbol) base_index = mid_index;
1067  size -= half_size;
1068  }
1069 
1070  TSSymbol symbol = self->steps.contents[
1071  self->pattern_map.contents[base_index].step_index
1072  ].symbol;
1073 
1074  if (needle > symbol) {
1075  base_index++;
1076  if (base_index < self->pattern_map.size) {
1077  symbol = self->steps.contents[
1078  self->pattern_map.contents[base_index].step_index
1079  ].symbol;
1080  }
1081  }
1082 
1083  *result = base_index;
1084  return needle == symbol;
1085 }
1086 
1087 // Insert a new pattern's start index into the pattern map, maintaining
1088 // the pattern map's ordering invariant.
1089 static inline void ts_query__pattern_map_insert(
1090  TSQuery *self,
1091  TSSymbol symbol,
1092  PatternEntry new_entry
1093 ) {
1094  uint32_t index;
1095  ts_query__pattern_map_search(self, symbol, &index);
1096 
1097  // Ensure that the entries are sorted not only by symbol, but also
1098  // by pattern_index. This way, states for earlier patterns will be
1099  // initiated first, which allows the ordering of the states array
1100  // to be maintained more efficiently.
1101  while (index < self->pattern_map.size) {
1102  PatternEntry *entry = &self->pattern_map.contents[index];
1103  if (
1104  self->steps.contents[entry->step_index].symbol == symbol &&
1105  entry->pattern_index < new_entry.pattern_index
1106  ) {
1107  index++;
1108  } else {
1109  break;
1110  }
1111  }
1112 
1113  array_insert(&self->pattern_map, index, new_entry);
1114 }
1115 
1116 static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1117  // Walk forward through all of the steps in the query, computing some
1118  // basic information about each step. Mark all of the steps that contain
1119  // captures, and record the indices of all of the steps that have child steps.
1120  Array(uint32_t) parent_step_indices = array_new();
1121  for (unsigned i = 0; i < self->steps.size; i++) {
1122  QueryStep *step = &self->steps.contents[i];
1123  if (step->depth == PATTERN_DONE_MARKER) {
1124  step->parent_pattern_guaranteed = true;
1125  step->root_pattern_guaranteed = true;
1126  continue;
1127  }
1128 
1129  bool has_children = false;
1130  bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1131  step->contains_captures = step->capture_ids[0] != NONE;
1132  for (unsigned j = i + 1; j < self->steps.size; j++) {
1133  QueryStep *next_step = &self->steps.contents[j];
1134  if (
1135  next_step->depth == PATTERN_DONE_MARKER ||
1136  next_step->depth <= step->depth
1137  ) break;
1138  if (next_step->capture_ids[0] != NONE) {
1139  step->contains_captures = true;
1140  }
1141  if (!is_wildcard) {
1142  next_step->root_pattern_guaranteed = true;
1143  next_step->parent_pattern_guaranteed = true;
1144  }
1145  has_children = true;
1146  }
1147 
1148  if (has_children && !is_wildcard) {
1149  array_push(&parent_step_indices, i);
1150  }
1151  }
1152 
1153  // For every parent symbol in the query, initialize an 'analysis subgraph'.
1154  // This subgraph lists all of the states in the parse table that are directly
1155  // involved in building subtrees for this symbol.
1156  //
1157  // In addition to the parent symbols in the query, construct subgraphs for all
1158  // of the hidden symbols in the grammar, because these might occur within
1159  // one of the parent nodes, such that their children appear to belong to the
1160  // parent.
1161  Array(AnalysisSubgraph) subgraphs = array_new();
1162  for (unsigned i = 0; i < parent_step_indices.size; i++) {
1163  uint32_t parent_step_index = parent_step_indices.contents[i];
1164  TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1165  AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1166  array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1167  }
1168  for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) {
1169  if (!ts_language_symbol_metadata(self->language, sym).visible) {
1170  AnalysisSubgraph subgraph = { .symbol = sym };
1171  array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1172  }
1173  }
1174 
1175  // Scan the parse table to find the data needed to populate these subgraphs.
1176  // Collect three things during this scan:
1177  // 1) All of the parse states where one of these symbols can start.
1178  // 2) All of the parse states where one of these symbols can end, along
1179  // with information about the node that would be created.
1180  // 3) A list of predecessor states for each state.
1181  StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1182  for (TSStateId state = 1; state < self->language->state_count; state++) {
1183  unsigned subgraph_index, exists;
1184  LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1185  while (ts_lookahead_iterator_next(&lookahead_iterator)) {
1186  if (lookahead_iterator.action_count) {
1187  for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1188  const TSParseAction *action = &lookahead_iterator.actions[i];
1189  if (action->type == TSParseActionTypeReduce) {
1190  const TSSymbol *aliases, *aliases_end;
1192  self->language,
1193  action->reduce.symbol,
1194  &aliases,
1195  &aliases_end
1196  );
1197  for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1199  &subgraphs,
1200  .symbol,
1201  *symbol,
1202  &subgraph_index,
1203  &exists
1204  );
1205  if (exists) {
1206  AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1207  if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1208  array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1209  .state = state,
1210  .production_id = action->reduce.production_id,
1211  .child_index = action->reduce.child_count,
1212  .done = true,
1213  }));
1214  }
1215  }
1216  }
1217  } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1218  TSStateId next_state = action->shift.state;
1219  state_predecessor_map_add(&predecessor_map, next_state, state);
1220  }
1221  }
1222  } else if (lookahead_iterator.next_state != 0) {
1223  if (lookahead_iterator.next_state != state) {
1224  state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1225  }
1226  if (ts_language_state_is_primary(self->language, state)) {
1227  const TSSymbol *aliases, *aliases_end;
1229  self->language,
1230  lookahead_iterator.symbol,
1231  &aliases,
1232  &aliases_end
1233  );
1234  for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1236  &subgraphs,
1237  .symbol,
1238  *symbol,
1239  &subgraph_index,
1240  &exists
1241  );
1242  if (exists) {
1243  AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1244  if (
1245  subgraph->start_states.size == 0 ||
1246  *array_back(&subgraph->start_states) != state
1247  )
1248  array_push(&subgraph->start_states, state);
1249  }
1250  }
1251  }
1252  }
1253  }
1254  }
1255 
1256  // For each subgraph, compute the preceding states by walking backward
1257  // from the end states using the predecessor map.
1258  Array(AnalysisSubgraphNode) next_nodes = array_new();
1259  for (unsigned i = 0; i < subgraphs.size; i++) {
1260  AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1261  if (subgraph->nodes.size == 0) {
1262  array_delete(&subgraph->start_states);
1263  array_erase(&subgraphs, i);
1264  i--;
1265  continue;
1266  }
1267  array_assign(&next_nodes, &subgraph->nodes);
1268  while (next_nodes.size > 0) {
1269  AnalysisSubgraphNode node = array_pop(&next_nodes);
1270  if (node.child_index > 1) {
1271  unsigned predecessor_count;
1272  const TSStateId *predecessors = state_predecessor_map_get(
1273  &predecessor_map,
1274  node.state,
1275  &predecessor_count
1276  );
1277  for (unsigned j = 0; j < predecessor_count; j++) {
1278  AnalysisSubgraphNode predecessor_node = {
1279  .state = predecessors[j],
1280  .child_index = node.child_index - 1,
1281  .production_id = node.production_id,
1282  .done = false,
1283  };
1284  unsigned index, exists;
1286  &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1287  &index, &exists
1288  );
1289  if (!exists) {
1290  array_insert(&subgraph->nodes, index, predecessor_node);
1291  array_push(&next_nodes, predecessor_node);
1292  }
1293  }
1294  }
1295  }
1296  }
1297 
1298  #ifdef DEBUG_ANALYZE_QUERY
1299  printf("\nSubgraphs:\n");
1300  for (unsigned i = 0; i < subgraphs.size; i++) {
1301  AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1302  printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1303  for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1304  printf(
1305  " {state: %u}\n",
1306  subgraph->start_states.contents[j]
1307  );
1308  }
1309  for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1310  AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
1311  printf(
1312  " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1313  node->state, node->child_index, node->production_id, node->done
1314  );
1315  }
1316  printf("\n");
1317  }
1318  #endif
1319 
1320  // For each non-terminal pattern, determine if the pattern can successfully match,
1321  // and identify all of the possible children within the pattern where matching could fail.
1322  bool all_patterns_are_valid = true;
1323  AnalysisStateSet states = array_new();
1324  AnalysisStateSet next_states = array_new();
1325  AnalysisStateSet deeper_states = array_new();
1326  AnalysisStatePool state_pool = array_new();
1327  Array(uint16_t) final_step_indices = array_new();
1328  for (unsigned i = 0; i < parent_step_indices.size; i++) {
1329  uint16_t parent_step_index = parent_step_indices.contents[i];
1330  uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
1331  TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1332  if (parent_symbol == ts_builtin_sym_error) continue;
1333 
1334  // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1335  // root symbol is a terminal, then return an error.
1336  unsigned subgraph_index, exists;
1337  array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1338  if (!exists) {
1339  unsigned first_child_step_index = parent_step_index + 1;
1340  uint32_t i, exists;
1341  array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists);
1342  assert(exists);
1343  *error_offset = self->step_offsets.contents[i].byte_offset;
1344  all_patterns_are_valid = false;
1345  break;
1346  }
1347 
1348  // Initialize an analysis state at every parse state in the table where
1349  // this parent symbol can occur.
1350  AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1351  analysis_state_set__clear(&states, &state_pool);
1352  analysis_state_set__clear(&deeper_states, &state_pool);
1353  for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1354  TSStateId parse_state = subgraph->start_states.contents[j];
1356  .step_index = parent_step_index + 1,
1357  .stack = {
1358  [0] = {
1359  .parse_state = parse_state,
1360  .parent_symbol = parent_symbol,
1361  .child_index = 0,
1362  .field_id = 0,
1363  .done = false,
1364  },
1365  },
1366  .depth = 1,
1367  }));
1368  }
1369 
1370  // Walk the subgraph for this non-terminal, tracking all of the possible
1371  // sequences of progress within the pattern.
1372  bool can_finish_pattern = false;
1373  bool did_abort_analysis = false;
1374  unsigned recursion_depth_limit = 0;
1375  unsigned prev_final_step_count = 0;
1376  array_clear(&final_step_indices);
1377  for (unsigned iteration = 0;; iteration++) {
1378  if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1379  did_abort_analysis = true;
1380  break;
1381  }
1382 
1383  #ifdef DEBUG_ANALYZE_QUERY
1384  printf("Iteration: %u. Final step indices:", iteration);
1385  for (unsigned j = 0; j < final_step_indices.size; j++) {
1386  printf(" %4u", final_step_indices.contents[j]);
1387  }
1388  printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol));
1389  for (unsigned j = 0; j < states.size; j++) {
1390  AnalysisState *state = states.contents[j];
1391  printf(" %3u: step: %u, stack: [", j, state->step_index);
1392  for (unsigned k = 0; k < state->depth; k++) {
1393  printf(
1394  " {%s, child: %u, state: %4u",
1395  self->language->symbol_names[state->stack[k].parent_symbol],
1396  state->stack[k].child_index,
1397  state->stack[k].parse_state
1398  );
1399  if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1400  if (state->stack[k].done) printf(", DONE");
1401  printf("}");
1402  }
1403  printf(" ]\n");
1404  }
1405  #endif
1406 
1407  // If no further progress can be made within the current recursion depth limit, then
1408  // bump the depth limit by one, and continue to process the states the exceeded the
1409  // limit. But only allow this if progress has been made since the last time the depth
1410  // limit was increased.
1411  if (states.size == 0) {
1412  if (
1413  deeper_states.size > 0
1414  && final_step_indices.size > prev_final_step_count
1415  ) {
1416  #ifdef DEBUG_ANALYZE_QUERY
1417  printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1418  #endif
1419 
1420  prev_final_step_count = final_step_indices.size;
1421  recursion_depth_limit++;
1422  AnalysisStateSet _states = states;
1423  states = deeper_states;
1424  deeper_states = _states;
1425  continue;
1426  }
1427 
1428  break;
1429  }
1430 
1431  analysis_state_set__clear(&next_states, &state_pool);
1432  for (unsigned j = 0; j < states.size; j++) {
1433  AnalysisState * const state = states.contents[j];
1434 
1435  // For efficiency, it's important to avoid processing the same analysis state more
1436  // than once. To achieve this, keep the states in order of ascending position within
1437  // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1438  // the states that have made the least progress. Avoid advancing states that have already
1439  // made more progress.
1440  if (next_states.size > 0) {
1441  int comparison = analysis_state__compare_position(
1442  &state,
1443  array_back(&next_states)
1444  );
1445  if (comparison == 0) {
1446  #ifdef DEBUG_ANALYZE_QUERY
1447  printf("Skip iteration for state %u\n", j);
1448  #endif
1449  analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, state);
1450  continue;
1451  } else if (comparison > 0) {
1452  #ifdef DEBUG_ANALYZE_QUERY
1453  printf("Terminate iteration at state %u\n", j);
1454  #endif
1455  while (j < states.size) {
1457  &next_states,
1458  &state_pool,
1459  states.contents[j]
1460  );
1461  j++;
1462  }
1463  break;
1464  }
1465  }
1466 
1467  const TSStateId parse_state = analysis_state__top(state)->parse_state;
1468  const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1469  const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1470  const unsigned child_index = analysis_state__top(state)->child_index;
1471  const QueryStep * const step = &self->steps.contents[state->step_index];
1472 
1473  unsigned subgraph_index, exists;
1474  array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1475  if (!exists) continue;
1476  const AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1477 
1478  // Follow every possible path in the parse table, but only visit states that
1479  // are part of the subgraph for the current symbol.
1480  LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1481  while (ts_lookahead_iterator_next(&lookahead_iterator)) {
1482  TSSymbol sym = lookahead_iterator.symbol;
1483 
1484  AnalysisSubgraphNode successor = {
1485  .state = parse_state,
1486  .child_index = child_index,
1487  };
1488  if (lookahead_iterator.action_count) {
1489  const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1490  if (action->type == TSParseActionTypeShift) {
1491  if (!action->shift.extra) {
1492  successor.state = action->shift.state;
1493  successor.child_index++;
1494  }
1495  } else {
1496  continue;
1497  }
1498  } else if (lookahead_iterator.next_state != 0) {
1499  successor.state = lookahead_iterator.next_state;
1500  successor.child_index++;
1501  } else {
1502  continue;
1503  }
1504 
1505  unsigned node_index;
1507  &subgraph->nodes,
1508  analysis_subgraph_node__compare, &successor,
1509  &node_index, &exists
1510  );
1511  while (node_index < subgraph->nodes.size) {
1512  AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1513  if (node->state != successor.state || node->child_index != successor.child_index) break;
1514 
1515  // Use the subgraph to determine what alias and field will eventually be applied
1516  // to this child node.
1517  TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1518  TSSymbol visible_symbol = alias
1519  ? alias
1520  : self->language->symbol_metadata[sym].visible
1521  ? self->language->public_symbol_map[sym]
1522  : 0;
1523  TSFieldId field_id = parent_field_id;
1524  if (!field_id) {
1525  const TSFieldMapEntry *field_map, *field_map_end;
1526  ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1527  for (; field_map != field_map_end; field_map++) {
1528  if (!field_map->inherited && field_map->child_index == child_index) {
1529  field_id = field_map->field_id;
1530  break;
1531  }
1532  }
1533  }
1534 
1535  // Create a new state that has advanced past this hypothetical subtree.
1536  AnalysisState next_state = *state;
1537  AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1538  next_state_top->child_index = successor.child_index;
1539  next_state_top->parse_state = successor.state;
1540  if (node->done) next_state_top->done = true;
1541 
1542  // Determine if this hypothetical child node would match the current step
1543  // of the query pattern.
1544  bool does_match = false;
1545  if (visible_symbol) {
1546  does_match = true;
1547  if (step->symbol == WILDCARD_SYMBOL) {
1548  if (
1549  step->is_named &&
1550  !self->language->symbol_metadata[visible_symbol].named
1551  ) does_match = false;
1552  } else if (step->symbol != visible_symbol) {
1553  does_match = false;
1554  }
1555  if (step->field && step->field != field_id) {
1556  does_match = false;
1557  }
1558  if (
1559  step->supertype_symbol &&
1560  !analysis_state__has_supertype(state, step->supertype_symbol)
1561  ) does_match = false;
1562  }
1563 
1564  // If this child is hidden, then descend into it and walk through its children.
1565  // If the top entry of the stack is at the end of its rule, then that entry can
1566  // be replaced. Otherwise, push a new entry onto the stack.
1567  else if (sym >= self->language->token_count) {
1568  if (!next_state_top->done) {
1569  if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1570  #ifdef DEBUG_ANALYZE_QUERY
1571  printf("Exceeded depth limit for state %u\n", j);
1572  #endif
1573 
1574  did_abort_analysis = true;
1575  continue;
1576  }
1577 
1578  next_state.depth++;
1579  next_state_top = analysis_state__top(&next_state);
1580  }
1581 
1582  *next_state_top = (AnalysisStateEntry) {
1583  .parse_state = parse_state,
1584  .parent_symbol = sym,
1585  .child_index = 0,
1586  .field_id = field_id,
1587  .done = false,
1588  };
1589 
1590  if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1592  &deeper_states,
1593  &state_pool,
1594  &next_state
1595  );
1596  continue;
1597  }
1598  }
1599 
1600  // Pop from the stack when this state reached the end of its current syntax node.
1601  while (next_state.depth > 0 && next_state_top->done) {
1602  next_state.depth--;
1603  next_state_top = analysis_state__top(&next_state);
1604  }
1605 
1606  // If this hypothetical child did match the current step of the query pattern,
1607  // then advance to the next step at the current depth. This involves skipping
1608  // over any descendant steps of the current child.
1609  const QueryStep *next_step = step;
1610  if (does_match) {
1611  for (;;) {
1612  next_state.step_index++;
1613  next_step = &self->steps.contents[next_state.step_index];
1614  if (
1615  next_step->depth == PATTERN_DONE_MARKER ||
1616  next_step->depth <= parent_depth + 1
1617  ) break;
1618  }
1619  } else if (successor.state == parse_state) {
1620  continue;
1621  }
1622 
1623  for (;;) {
1624  // Skip pass-through states. Although these states have alternatives, they are only
1625  // used to implement repetitions, and query analysis does not need to process
1626  // repetitions in order to determine whether steps are possible and definite.
1627  if (next_step->is_pass_through) {
1628  next_state.step_index++;
1629  next_step++;
1630  continue;
1631  }
1632 
1633  // If the pattern is finished or hypothetical parent node is complete, then
1634  // record that matching can terminate at this step of the pattern. Otherwise,
1635  // add this state to the list of states to process on the next iteration.
1636  if (!next_step->is_dead_end) {
1637  bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1;
1638  if (did_finish_pattern) can_finish_pattern = true;
1639  if (did_finish_pattern || next_state.depth == 0) {
1640  array_insert_sorted_by(&final_step_indices, , next_state.step_index);
1641  } else {
1642  analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, &next_state);
1643  }
1644  }
1645 
1646  // If the state has advanced to a step with an alternative step, then add another state
1647  // at that alternative step. This process is simpler than the process of actually matching a
1648  // pattern during query exection, because for the purposes of query analysis, there is no
1649  // need to process repetitions.
1650  if (
1651  does_match &&
1652  next_step->alternative_index != NONE &&
1653  next_step->alternative_index > next_state.step_index
1654  ) {
1655  next_state.step_index = next_step->alternative_index;
1656  next_step = &self->steps.contents[next_state.step_index];
1657  } else {
1658  break;
1659  }
1660  }
1661  }
1662  }
1663  }
1664 
1665  AnalysisStateSet _states = states;
1666  states = next_states;
1667  next_states = _states;
1668  }
1669 
1670  // If this pattern could not be fully analyzed, then every step should
1671  // be considered fallible.
1672  if (did_abort_analysis) {
1673  for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1674  QueryStep *step = &self->steps.contents[j];
1675  if (
1676  step->depth <= parent_depth ||
1677  step->depth == PATTERN_DONE_MARKER
1678  ) break;
1679  if (!step->is_dead_end) {
1680  step->parent_pattern_guaranteed = false;
1681  step->root_pattern_guaranteed = false;
1682  }
1683  }
1684  continue;
1685  }
1686 
1687  // If this pattern cannot match, store the pattern index so that it can be
1688  // returned to the caller.
1689  if (!can_finish_pattern) {
1690  assert(final_step_indices.size > 0);
1691  uint16_t impossible_step_index = *array_back(&final_step_indices);
1692  uint32_t i, exists;
1693  array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists);
1694  if (i >= self->step_offsets.size) i = self->step_offsets.size - 1;
1695  *error_offset = self->step_offsets.contents[i].byte_offset;
1696  all_patterns_are_valid = false;
1697  break;
1698  }
1699 
1700  // Mark as fallible any step where a match terminated.
1701  // Later, this property will be propagated to all of the step's predecessors.
1702  for (unsigned j = 0; j < final_step_indices.size; j++) {
1703  uint32_t final_step_index = final_step_indices.contents[j];
1704  QueryStep *step = &self->steps.contents[final_step_index];
1705  if (
1706  step->depth != PATTERN_DONE_MARKER &&
1707  step->depth > parent_depth &&
1708  !step->is_dead_end
1709  ) {
1710  step->parent_pattern_guaranteed = false;
1711  step->root_pattern_guaranteed = false;
1712  }
1713  }
1714  }
1715 
1716  // Mark as indefinite any step with captures that are used in predicates.
1717  Array(uint16_t) predicate_capture_ids = array_new();
1718  for (unsigned i = 0; i < self->patterns.size; i++) {
1719  QueryPattern *pattern = &self->patterns.contents[i];
1720 
1721  // Gather all of the captures that are used in predicates for this pattern.
1722  array_clear(&predicate_capture_ids);
1723  for (
1724  unsigned start = pattern->predicate_steps.offset,
1725  end = start + pattern->predicate_steps.length,
1726  j = start; j < end; j++
1727  ) {
1728  TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1729  if (step->type == TSQueryPredicateStepTypeCapture) {
1730  array_insert_sorted_by(&predicate_capture_ids, , step->value_id);
1731  }
1732  }
1733 
1734  // Find all of the steps that have these captures.
1735  for (
1736  unsigned start = pattern->steps.offset,
1737  end = start + pattern->steps.length,
1738  j = start; j < end; j++
1739  ) {
1740  QueryStep *step = &self->steps.contents[j];
1741  for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1742  uint16_t capture_id = step->capture_ids[k];
1743  if (capture_id == NONE) break;
1744  unsigned index, exists;
1745  array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1746  if (exists) {
1747  step->root_pattern_guaranteed = false;
1748  break;
1749  }
1750  }
1751  }
1752  }
1753 
1754  // Propagate fallibility. If a pattern is fallible at a given step, then it is
1755  // fallible at all of its preceding steps.
1756  bool done = self->steps.size == 0;
1757  while (!done) {
1758  done = true;
1759  for (unsigned i = self->steps.size - 1; i > 0; i--) {
1760  QueryStep *step = &self->steps.contents[i];
1761  if (step->depth == PATTERN_DONE_MARKER) continue;
1762 
1763  // Determine if this step is definite or has definite alternatives.
1764  bool parent_pattern_guaranteed = false;
1765  for (;;) {
1766  if (step->root_pattern_guaranteed) {
1767  parent_pattern_guaranteed = true;
1768  break;
1769  }
1770  if (step->alternative_index == NONE || step->alternative_index < i) {
1771  break;
1772  }
1773  step = &self->steps.contents[step->alternative_index];
1774  }
1775 
1776  // If not, mark its predecessor as indefinite.
1777  if (!parent_pattern_guaranteed) {
1778  QueryStep *prev_step = &self->steps.contents[i - 1];
1779  if (
1780  !prev_step->is_dead_end &&
1781  prev_step->depth != PATTERN_DONE_MARKER &&
1782  prev_step->root_pattern_guaranteed
1783  ) {
1784  prev_step->root_pattern_guaranteed = false;
1785  done = false;
1786  }
1787  }
1788  }
1789  }
1790 
1791  #ifdef DEBUG_ANALYZE_QUERY
1792  printf("Steps:\n");
1793  for (unsigned i = 0; i < self->steps.size; i++) {
1794  QueryStep *step = &self->steps.contents[i];
1795  if (step->depth == PATTERN_DONE_MARKER) {
1796  printf(" %u: DONE\n", i);
1797  } else {
1798  printf(
1799  " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1800  i,
1801  (step->symbol == WILDCARD_SYMBOL)
1802  ? "ANY"
1803  : ts_language_symbol_name(self->language, step->symbol),
1804  (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1805  step->depth,
1806  step->parent_pattern_guaranteed,
1807  step->root_pattern_guaranteed
1808  );
1809  }
1810  }
1811  #endif
1812 
1813  // Cleanup
1814  for (unsigned i = 0; i < subgraphs.size; i++) {
1815  array_delete(&subgraphs.contents[i].start_states);
1816  array_delete(&subgraphs.contents[i].nodes);
1817  }
1818  array_delete(&subgraphs);
1819  for (unsigned i = 0; i < state_pool.size; i++) {
1820  ts_free(state_pool.contents[i]);
1821  }
1822  array_delete(&state_pool);
1823  array_delete(&next_nodes);
1825  analysis_state_set__delete(&next_states);
1826  analysis_state_set__delete(&deeper_states);
1827  array_delete(&final_step_indices);
1828  array_delete(&parent_step_indices);
1829  array_delete(&predicate_capture_ids);
1830  state_predecessor_map_delete(&predecessor_map);
1831 
1832  return all_patterns_are_valid;
1833 }
1834 
1836  TSQuery *self,
1837  uint16_t step_index,
1838  TSFieldId *field_ids,
1839  uint16_t field_count
1840 ) {
1841  QueryStep *step = &self->steps.contents[step_index];
1842 
1843  // The negated field array stores a list of field lists, separated by zeros.
1844  // Try to find the start index of an existing list that matches this new list.
1845  bool failed_match = false;
1846  unsigned match_count = 0;
1847  unsigned start_i = 0;
1848  for (unsigned i = 0; i < self->negated_fields.size; i++) {
1849  TSFieldId existing_field_id = self->negated_fields.contents[i];
1850 
1851  // At each zero value, terminate the match attempt. If we've exactly
1852  // matched the new field list, then reuse this index. Otherwise,
1853  // start over the matching process.
1854  if (existing_field_id == 0) {
1855  if (match_count == field_count) {
1856  step->negated_field_list_id = start_i;
1857  return;
1858  } else {
1859  start_i = i + 1;
1860  match_count = 0;
1861  failed_match = false;
1862  }
1863  }
1864 
1865  // If the existing list matches our new list so far, then advance
1866  // to the next element of the new list.
1867  else if (
1868  match_count < field_count &&
1869  existing_field_id == field_ids[match_count] &&
1870  !failed_match
1871  ) {
1872  match_count++;
1873  }
1874 
1875  // Otherwise, this existing list has failed to match.
1876  else {
1877  match_count = 0;
1878  failed_match = true;
1879  }
1880  }
1881 
1882  step->negated_field_list_id = self->negated_fields.size;
1883  array_extend(&self->negated_fields, field_count, field_ids);
1884  array_push(&self->negated_fields, 0);
1885 }
1886 
1888  TSQuery *self,
1889  Stream *stream
1890 ) {
1891  const char *string_start = stream->input;
1892  if (stream->next != '"') return TSQueryErrorSyntax;
1894  const char *prev_position = stream->input;
1895 
1896  bool is_escaped = false;
1897  array_clear(&self->string_buffer);
1898  for (;;) {
1899  if (is_escaped) {
1900  is_escaped = false;
1901  switch (stream->next) {
1902  case 'n':
1903  array_push(&self->string_buffer, '\n');
1904  break;
1905  case 'r':
1906  array_push(&self->string_buffer, '\r');
1907  break;
1908  case 't':
1909  array_push(&self->string_buffer, '\t');
1910  break;
1911  case '0':
1912  array_push(&self->string_buffer, '\0');
1913  break;
1914  default:
1915  array_extend(&self->string_buffer, stream->next_size, stream->input);
1916  break;
1917  }
1918  prev_position = stream->input + stream->next_size;
1919  } else {
1920  if (stream->next == '\\') {
1921  array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
1922  prev_position = stream->input + 1;
1923  is_escaped = true;
1924  } else if (stream->next == '"') {
1925  array_extend(&self->string_buffer, (stream->input - prev_position), prev_position);
1927  return TSQueryErrorNone;
1928  } else if (stream->next == '\n') {
1929  stream_reset(stream, string_start);
1930  return TSQueryErrorSyntax;
1931  }
1932  }
1933  if (!stream_advance(stream)) {
1934  stream_reset(stream, string_start);
1935  return TSQueryErrorSyntax;
1936  }
1937  }
1938 }
1939 
1940 // Parse a single predicate associated with a pattern, adding it to the
1941 // query's internal `predicate_steps` array. Predicates are arbitrary
1942 // S-expressions associated with a pattern which are meant to be handled at
1943 // a higher level of abstraction, such as the Rust/JavaScript bindings. They
1944 // can contain '@'-prefixed capture names, double-quoted strings, and bare
1945 // symbols, which also represent strings.
1947  TSQuery *self,
1948  Stream *stream
1949 ) {
1951  const char *predicate_name = stream->input;
1953  uint32_t length = stream->input - predicate_name;
1955  &self->predicate_values,
1956  predicate_name,
1957  length
1958  );
1959  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1960  .type = TSQueryPredicateStepTypeString,
1961  .value_id = id,
1962  }));
1964 
1965  for (;;) {
1966  if (stream->next == ')') {
1969  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1970  .type = TSQueryPredicateStepTypeDone,
1971  .value_id = 0,
1972  }));
1973  break;
1974  }
1975 
1976  // Parse an '@'-prefixed capture name
1977  else if (stream->next == '@') {
1979 
1980  // Parse the capture name
1982  const char *capture_name = stream->input;
1984  uint32_t length = stream->input - capture_name;
1985 
1986  // Add the capture id to the first step of the pattern
1987  int capture_id = symbol_table_id_for_name(
1988  &self->captures,
1989  capture_name,
1990  length
1991  );
1992  if (capture_id == -1) {
1993  stream_reset(stream, capture_name);
1994  return TSQueryErrorCapture;
1995  }
1996 
1997  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
1998  .type = TSQueryPredicateStepTypeCapture,
1999  .value_id = capture_id,
2000  }));
2001  }
2002 
2003  // Parse a string literal
2004  else if (stream->next == '"') {
2006  if (e) return e;
2008  &self->predicate_values,
2009  self->string_buffer.contents,
2010  self->string_buffer.size
2011  );
2012  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2013  .type = TSQueryPredicateStepTypeString,
2014  .value_id = id,
2015  }));
2016  }
2017 
2018  // Parse a bare symbol
2019  else if (stream_is_ident_start(stream)) {
2020  const char *symbol_start = stream->input;
2022  uint32_t length = stream->input - symbol_start;
2024  &self->predicate_values,
2025  symbol_start,
2026  length
2027  );
2028  array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2029  .type = TSQueryPredicateStepTypeString,
2030  .value_id = id,
2031  }));
2032  }
2033 
2034  else {
2035  return TSQueryErrorSyntax;
2036  }
2037 
2039  }
2040 
2041  return 0;
2042 }
2043 
2044 // Read one S-expression pattern from the stream, and incorporate it into
2045 // the query's internal state machine representation. For nested patterns,
2046 // this function calls itself recursively.
2047 //
2048 // The caller is repsonsible for passing in a dedicated CaptureQuantifiers.
2049 // These should not be shared between different calls to ts_query__parse_pattern!
2051  TSQuery *self,
2052  Stream *stream,
2053  uint32_t depth,
2054  bool is_immediate,
2055  CaptureQuantifiers *capture_quantifiers
2056 ) {
2057  if (stream->next == 0) return TSQueryErrorSyntax;
2058  if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2059 
2060  const uint32_t starting_step_index = self->steps.size;
2061 
2062  // Store the byte offset of each step in the query.
2063  if (
2064  self->step_offsets.size == 0 ||
2065  array_back(&self->step_offsets)->step_index != starting_step_index
2066  ) {
2067  array_push(&self->step_offsets, ((StepOffset) {
2068  .step_index = starting_step_index,
2069  .byte_offset = stream_offset(stream),
2070  }));
2071  }
2072 
2073  // An open bracket is the start of an alternation.
2074  if (stream->next == '[') {
2077 
2078  // Parse each branch, and add a placeholder step in between the branches.
2079  Array(uint32_t) branch_step_indices = array_new();
2080  CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2081  for (;;) {
2082  uint32_t start_index = self->steps.size;
2084  self,
2085  stream,
2086  depth,
2087  is_immediate,
2088  &branch_capture_quantifiers
2089  );
2090 
2091  if (e == PARENT_DONE) {
2092  if (stream->next == ']' && branch_step_indices.size > 0) {
2094  break;
2095  }
2097  }
2098  if (e) {
2099  capture_quantifiers_delete(&branch_capture_quantifiers);
2100  array_delete(&branch_step_indices);
2101  return e;
2102  }
2103 
2104  if(start_index == starting_step_index) {
2105  capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2106  } else {
2107  capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2108  }
2109 
2110  array_push(&branch_step_indices, start_index);
2111  array_push(&self->steps, query_step__new(0, depth, false));
2112  capture_quantifiers_clear(&branch_capture_quantifiers);
2113  }
2114  (void)array_pop(&self->steps);
2115 
2116  // For all of the branches except for the last one, add the subsequent branch as an
2117  // alternative, and link the end of the branch to the current end of the steps.
2118  for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2119  uint32_t step_index = branch_step_indices.contents[i];
2120  uint32_t next_step_index = branch_step_indices.contents[i + 1];
2121  QueryStep *start_step = &self->steps.contents[step_index];
2122  QueryStep *end_step = &self->steps.contents[next_step_index - 1];
2123  start_step->alternative_index = next_step_index;
2124  end_step->alternative_index = self->steps.size;
2125  end_step->is_dead_end = true;
2126  }
2127 
2128  capture_quantifiers_delete(&branch_capture_quantifiers);
2129  array_delete(&branch_step_indices);
2130  }
2131 
2132  // An open parenthesis can be the start of three possible constructs:
2133  // * A grouped sequence
2134  // * A predicate
2135  // * A named node
2136  else if (stream->next == '(') {
2139 
2140  // If this parenthesis is followed by a node, then it represents a grouped sequence.
2141  if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2142  bool child_is_immediate = false;
2143  CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2144  for (;;) {
2145  if (stream->next == '.') {
2146  child_is_immediate = true;
2149  }
2151  self,
2152  stream,
2153  depth,
2154  child_is_immediate,
2155  &child_capture_quantifiers
2156  );
2157  if (e == PARENT_DONE) {
2158  if (stream->next == ')') {
2160  break;
2161  }
2163  }
2164  if (e) {
2165  capture_quantifiers_delete(&child_capture_quantifiers);
2166  return e;
2167  }
2168 
2169  capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2170 
2171  child_is_immediate = false;
2172  capture_quantifiers_clear(&child_capture_quantifiers);
2173  }
2174  capture_quantifiers_delete(&child_capture_quantifiers);
2175  }
2176 
2177  // A dot/pound character indicates the start of a predicate.
2178  else if (stream->next == '.' || stream->next == '#') {
2180  return ts_query__parse_predicate(self, stream);
2181  }
2182 
2183  // Otherwise, this parenthesis is the start of a named node.
2184  else {
2185  TSSymbol symbol;
2186 
2187  // Parse a normal node name
2189  const char *node_name = stream->input;
2191  uint32_t length = stream->input - node_name;
2192 
2193  // TODO - remove.
2194  // For temporary backward compatibility, handle predicates without the leading '#' sign.
2195  if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) {
2196  stream_reset(stream, node_name);
2197  return ts_query__parse_predicate(self, stream);
2198  }
2199 
2200  // Parse the wildcard symbol
2201  else if (length == 1 && node_name[0] == '_') {
2202  symbol = WILDCARD_SYMBOL;
2203  }
2204 
2205  else {
2206  symbol = ts_language_symbol_for_name(
2207  self->language,
2208  node_name,
2209  length,
2210  true
2211  );
2212  if (!symbol) {
2213  stream_reset(stream, node_name);
2214  return TSQueryErrorNodeType;
2215  }
2216  }
2217  } else {
2218  return TSQueryErrorSyntax;
2219  }
2220 
2221  // Add a step for the node.
2222  array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2223  QueryStep *step = array_back(&self->steps);
2224  if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2225  step->supertype_symbol = step->symbol;
2226  step->symbol = WILDCARD_SYMBOL;
2227  }
2228  if (symbol == WILDCARD_SYMBOL) {
2229  step->is_named = true;
2230  }
2231 
2233 
2234  if (stream->next == '/') {
2236  if (!stream_is_ident_start(stream)) {
2237  return TSQueryErrorSyntax;
2238  }
2239 
2240  const char *node_name = stream->input;
2242  uint32_t length = stream->input - node_name;
2243 
2245  self->language,
2246  node_name,
2247  length,
2248  true
2249  );
2250  if (!step->symbol) {
2251  stream_reset(stream, node_name);
2252  return TSQueryErrorNodeType;
2253  }
2254 
2256  }
2257 
2258  // Parse the child patterns
2259  bool child_is_immediate = false;
2260  uint16_t last_child_step_index = 0;
2261  uint16_t negated_field_count = 0;
2262  TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2263  CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2264  for (;;) {
2265  // Parse a negated field assertion
2266  if (stream->next == '!') {
2269  if (!stream_is_ident_start(stream)) {
2270  capture_quantifiers_delete(&child_capture_quantifiers);
2271  return TSQueryErrorSyntax;
2272  }
2273  const char *field_name = stream->input;
2275  uint32_t length = stream->input - field_name;
2277 
2279  self->language,
2280  field_name,
2281  length
2282  );
2283  if (!field_id) {
2284  stream->input = field_name;
2285  capture_quantifiers_delete(&child_capture_quantifiers);
2286  return TSQueryErrorField;
2287  }
2288 
2289  // Keep the field ids sorted.
2290  if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2291  negated_field_ids[negated_field_count] = field_id;
2292  negated_field_count++;
2293  }
2294 
2295  continue;
2296  }
2297 
2298  // Parse a sibling anchor
2299  if (stream->next == '.') {
2300  child_is_immediate = true;
2303  }
2304 
2305  uint16_t step_index = self->steps.size;
2307  self,
2308  stream,
2309  depth + 1,
2310  child_is_immediate,
2311  &child_capture_quantifiers
2312  );
2313  if (e == PARENT_DONE) {
2314  if (stream->next == ')') {
2315  if (child_is_immediate) {
2316  if (last_child_step_index == 0) {
2317  capture_quantifiers_delete(&child_capture_quantifiers);
2318  return TSQueryErrorSyntax;
2319  }
2320  self->steps.contents[last_child_step_index].is_last_child = true;
2321  }
2322 
2323  if (negated_field_count) {
2325  self,
2326  starting_step_index,
2327  negated_field_ids,
2328  negated_field_count
2329  );
2330  }
2331 
2333  break;
2334  }
2336  }
2337  if (e) {
2338  capture_quantifiers_delete(&child_capture_quantifiers);
2339  return e;
2340  }
2341 
2342  capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2343 
2344  last_child_step_index = step_index;
2345  child_is_immediate = false;
2346  capture_quantifiers_clear(&child_capture_quantifiers);
2347  }
2348  capture_quantifiers_delete(&child_capture_quantifiers);
2349  }
2350  }
2351 
2352  // Parse a wildcard pattern
2353  else if (stream->next == '_') {
2356 
2357  // Add a step that matches any kind of node
2358  array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2359  }
2360 
2361  // Parse a double-quoted anonymous leaf node expression
2362  else if (stream->next == '"') {
2363  const char *string_start = stream->input;
2365  if (e) return e;
2366 
2367  // Add a step for the node
2369  self->language,
2370  self->string_buffer.contents,
2371  self->string_buffer.size,
2372  false
2373  );
2374  if (!symbol) {
2375  stream_reset(stream, string_start + 1);
2376  return TSQueryErrorNodeType;
2377  }
2378  array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2379  }
2380 
2381  // Parse a field-prefixed pattern
2382  else if (stream_is_ident_start(stream)) {
2383  // Parse the field name
2384  const char *field_name = stream->input;
2386  uint32_t length = stream->input - field_name;
2388 
2389  if (stream->next != ':') {
2391  return TSQueryErrorSyntax;
2392  }
2395 
2396  // Parse the pattern
2397  CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2399  self,
2400  stream,
2401  depth,
2402  is_immediate,
2403  &field_capture_quantifiers
2404  );
2405  if (e) {
2406  capture_quantifiers_delete(&field_capture_quantifiers);
2407  if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2408  return e;
2409  }
2410 
2411  // Add the field name to the first step of the pattern
2413  self->language,
2414  field_name,
2415  length
2416  );
2417  if (!field_id) {
2418  stream->input = field_name;
2419  return TSQueryErrorField;
2420  }
2421 
2422  uint32_t step_index = starting_step_index;
2423  QueryStep *step = &self->steps.contents[step_index];
2424  for (;;) {
2425  step->field = field_id;
2426  if (
2427  step->alternative_index != NONE &&
2428  step->alternative_index > step_index &&
2429  step->alternative_index < self->steps.size
2430  ) {
2431  step_index = step->alternative_index;
2432  step = &self->steps.contents[step_index];
2433  } else {
2434  break;
2435  }
2436  }
2437 
2438  capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2439  capture_quantifiers_delete(&field_capture_quantifiers);
2440  }
2441 
2442  else {
2443  return TSQueryErrorSyntax;
2444  }
2445 
2447 
2448  // Parse suffixes modifiers for this pattern
2449  TSQuantifier quantifier = TSQuantifierOne;
2450  for (;;) {
2451  // Parse the one-or-more operator.
2452  if (stream->next == '+') {
2453  quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2454 
2457 
2458  QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2459  repeat_step.alternative_index = starting_step_index;
2460  repeat_step.is_pass_through = true;
2461  repeat_step.alternative_is_immediate = true;
2462  array_push(&self->steps, repeat_step);
2463  }
2464 
2465  // Parse the zero-or-more repetition operator.
2466  else if (stream->next == '*') {
2467  quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2468 
2471 
2472  QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2473  repeat_step.alternative_index = starting_step_index;
2474  repeat_step.is_pass_through = true;
2475  repeat_step.alternative_is_immediate = true;
2476  array_push(&self->steps, repeat_step);
2477 
2478  QueryStep *step = &self->steps.contents[starting_step_index];
2479  while (step->alternative_index != NONE) {
2480  step = &self->steps.contents[step->alternative_index];
2481  }
2482  step->alternative_index = self->steps.size;
2483  }
2484 
2485  // Parse the optional operator.
2486  else if (stream->next == '?') {
2487  quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2488 
2491 
2492  QueryStep *step = &self->steps.contents[starting_step_index];
2493  while (step->alternative_index != NONE) {
2494  step = &self->steps.contents[step->alternative_index];
2495  }
2496  step->alternative_index = self->steps.size;
2497  }
2498 
2499  // Parse an '@'-prefixed capture pattern
2500  else if (stream->next == '@') {
2503  const char *capture_name = stream->input;
2505  uint32_t length = stream->input - capture_name;
2507 
2508  // Add the capture id to the first step of the pattern
2509  uint16_t capture_id = symbol_table_insert_name(
2510  &self->captures,
2511  capture_name,
2512  length
2513  );
2514 
2515  // Add the capture quantifier
2516  capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2517 
2518  uint32_t step_index = starting_step_index;
2519  for (;;) {
2520  QueryStep *step = &self->steps.contents[step_index];
2521  query_step__add_capture(step, capture_id);
2522  if (
2523  step->alternative_index != NONE &&
2524  step->alternative_index > step_index &&
2525  step->alternative_index < self->steps.size
2526  ) {
2527  step_index = step->alternative_index;
2528  step = &self->steps.contents[step_index];
2529  } else {
2530  break;
2531  }
2532  }
2533  }
2534 
2535  // No more suffix modifiers
2536  else {
2537  break;
2538  }
2539  }
2540 
2541  capture_quantifiers_mul(capture_quantifiers, quantifier);
2542 
2543  return 0;
2544 }
2545 
2547  const TSLanguage *language,
2548  const char *source,
2549  uint32_t source_len,
2550  uint32_t *error_offset,
2551  TSQueryError *error_type
2552 ) {
2553  if (
2554  !language ||
2555  language->version > TREE_SITTER_LANGUAGE_VERSION ||
2557  ) {
2558  *error_type = TSQueryErrorLanguage;
2559  return NULL;
2560  }
2561 
2562  TSQuery *self = ts_malloc(sizeof(TSQuery));
2563  *self = (TSQuery) {
2564  .steps = array_new(),
2565  .pattern_map = array_new(),
2566  .captures = symbol_table_new(),
2567  .capture_quantifiers = array_new(),
2568  .predicate_values = symbol_table_new(),
2569  .predicate_steps = array_new(),
2570  .patterns = array_new(),
2571  .step_offsets = array_new(),
2572  .string_buffer = array_new(),
2573  .negated_fields = array_new(),
2574  .wildcard_root_pattern_count = 0,
2575  .language = language,
2576  };
2577 
2578  array_push(&self->negated_fields, 0);
2579 
2580  // Parse all of the S-expressions in the given string.
2581  Stream stream = stream_new(source, source_len);
2583  while (stream.input < stream.end) {
2584  uint32_t pattern_index = self->patterns.size;
2585  uint32_t start_step_index = self->steps.size;
2586  uint32_t start_predicate_step_index = self->predicate_steps.size;
2587  array_push(&self->patterns, ((QueryPattern) {
2588  .steps = (Slice) {.offset = start_step_index},
2589  .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2590  .start_byte = stream_offset(&stream),
2591  }));
2592  CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2593  *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2594  array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2595 
2596  QueryPattern *pattern = array_back(&self->patterns);
2597  pattern->steps.length = self->steps.size - start_step_index;
2598  pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2599 
2600  // If any pattern could not be parsed, then report the error information
2601  // and terminate.
2602  if (*error_type) {
2603  if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2604  *error_offset = stream_offset(&stream);
2605  capture_quantifiers_delete(&capture_quantifiers);
2606  ts_query_delete(self);
2607  return NULL;
2608  }
2609 
2610  // Maintain a list of capture quantifiers for each pattern
2611  array_push(&self->capture_quantifiers, capture_quantifiers);
2612 
2613  // Maintain a map that can look up patterns for a given root symbol.
2614  uint16_t wildcard_root_alternative_index = NONE;
2615  for (;;) {
2616  QueryStep *step = &self->steps.contents[start_step_index];
2617 
2618  // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2619  // then optimize the matching process by skipping matching the wildcard.
2620  // Later, during the matching process, the query cursor will check that
2621  // there is a parent node, and capture it if necessary.
2622  if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2623  QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2624  if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) {
2625  wildcard_root_alternative_index = step->alternative_index;
2626  start_step_index += 1;
2627  step = second_step;
2628  }
2629  }
2630 
2631  // Determine whether the pattern has a single root node. This affects
2632  // decisions about whether or not to start matching the pattern when
2633  // a query cursor has a range restriction.
2634  bool is_rooted = true;
2635  uint32_t start_depth = step->depth;
2636  for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2637  QueryStep *step = &self->steps.contents[step_index];
2638  if (step->depth == start_depth) {
2639  is_rooted = false;
2640  break;
2641  }
2642  }
2643 
2645  .step_index = start_step_index,
2646  .pattern_index = pattern_index,
2647  .is_rooted = is_rooted
2648  });
2649  if (step->symbol == WILDCARD_SYMBOL) {
2650  self->wildcard_root_pattern_count++;
2651  }
2652 
2653  // If there are alternatives or options at the root of the pattern,
2654  // then add multiple entries to the pattern map.
2655  if (step->alternative_index != NONE) {
2656  start_step_index = step->alternative_index;
2657  step->alternative_index = NONE;
2658  } else if (wildcard_root_alternative_index != NONE) {
2659  start_step_index = wildcard_root_alternative_index;
2660  wildcard_root_alternative_index = NONE;
2661  } else {
2662  break;
2663  }
2664  }
2665  }
2666 
2667  if (!ts_query__analyze_patterns(self, error_offset)) {
2668  *error_type = TSQueryErrorStructure;
2669  ts_query_delete(self);
2670  return NULL;
2671  }
2672 
2673  array_delete(&self->string_buffer);
2674  return self;
2675 }
2676 
2678  if (self) {
2679  array_delete(&self->steps);
2680  array_delete(&self->pattern_map);
2681  array_delete(&self->predicate_steps);
2682  array_delete(&self->patterns);
2683  array_delete(&self->step_offsets);
2684  array_delete(&self->string_buffer);
2685  array_delete(&self->negated_fields);
2686  symbol_table_delete(&self->captures);
2687  symbol_table_delete(&self->predicate_values);
2688  for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2689  CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2690  capture_quantifiers_delete(capture_quantifiers);
2691  }
2692  array_delete(&self->capture_quantifiers);
2693  ts_free(self);
2694  }
2695 }
2696 
2698  return self->patterns.size;
2699 }
2700 
2702  return self->captures.slices.size;
2703 }
2704 
2706  return self->predicate_values.slices.size;
2707 }
2708 
2710  const TSQuery *self,
2711  uint32_t index,
2712  uint32_t *length
2713 ) {
2714  return symbol_table_name_for_id(&self->captures, index, length);
2715 }
2716 
2718  const TSQuery *self,
2719  uint32_t pattern_index,
2720  uint32_t capture_index
2721 ) {
2722  CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2723  return capture_quantifier_for_id(capture_quantifiers, capture_index);
2724 }
2725 
2727  const TSQuery *self,
2728  uint32_t index,
2729  uint32_t *length
2730 ) {
2731  return symbol_table_name_for_id(&self->predicate_values, index, length);
2732 }
2733 
2735  const TSQuery *self,
2736  uint32_t pattern_index,
2737  uint32_t *step_count
2738 ) {
2739  Slice slice = self->patterns.contents[pattern_index].predicate_steps;
2740  *step_count = slice.length;
2741  if (self->predicate_steps.contents == NULL) {
2742  return NULL;
2743  }
2744  return &self->predicate_steps.contents[slice.offset];
2745 }
2746 
2748  const TSQuery *self,
2749  uint32_t pattern_index
2750 ) {
2751  return self->patterns.contents[pattern_index].start_byte;
2752 }
2753 
2755  const TSQuery *self,
2756  uint32_t byte_offset
2757 ) {
2758  uint32_t step_index = UINT32_MAX;
2759  for (unsigned i = 0; i < self->step_offsets.size; i++) {
2760  StepOffset *step_offset = &self->step_offsets.contents[i];
2761  if (step_offset->byte_offset > byte_offset) break;
2762  step_index = step_offset->step_index;
2763  }
2764  if (step_index < self->steps.size) {
2765  return self->steps.contents[step_index].root_pattern_guaranteed;
2766  } else {
2767  return false;
2768  }
2769 }
2770 
2772  const TSQuery *self,
2773  uint16_t step_index
2774 ) {
2775  assert((uint32_t)step_index + 1 < self->steps.size);
2776  QueryStep *step = &self->steps.contents[step_index];
2777  QueryStep *next_step = &self->steps.contents[step_index + 1];
2778  return (
2779  next_step->depth != PATTERN_DONE_MARKER &&
2780  next_step->depth > step->depth &&
2781  !next_step->parent_pattern_guaranteed
2782  );
2783 }
2784 
2786  TSQuery *self,
2787  const char *name,
2789 ) {
2790  // Remove capture information for any pattern step that previously
2791  // captured with the given name.
2792  int id = symbol_table_id_for_name(&self->captures, name, length);
2793  if (id != -1) {
2794  for (unsigned i = 0; i < self->steps.size; i++) {
2795  QueryStep *step = &self->steps.contents[i];
2797  }
2798  }
2799 }
2800 
2802  TSQuery *self,
2803  uint32_t pattern_index
2804 ) {
2805  // Remove the given pattern from the pattern map. Its steps will still
2806  // be in the `steps` array, but they will never be read.
2807  for (unsigned i = 0; i < self->pattern_map.size; i++) {
2808  PatternEntry *pattern = &self->pattern_map.contents[i];
2809  if (pattern->pattern_index == pattern_index) {
2810  array_erase(&self->pattern_map, i);
2811  i--;
2812  }
2813  }
2814 }
2815 
2816 /***************
2817  * QueryCursor
2818  ***************/
2819 
2821  TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
2822  *self = (TSQueryCursor) {
2823  .did_exceed_match_limit = false,
2824  .ascending = false,
2825  .halted = false,
2826  .states = array_new(),
2827  .finished_states = array_new(),
2828  .capture_list_pool = capture_list_pool_new(),
2829  .start_byte = 0,
2830  .end_byte = UINT32_MAX,
2831  .start_point = {0, 0},
2832  .end_point = POINT_MAX,
2833  };
2834  array_reserve(&self->states, 8);
2835  array_reserve(&self->finished_states, 8);
2836  return self;
2837 }
2838 
2840  array_delete(&self->states);
2841  array_delete(&self->finished_states);
2842  ts_tree_cursor_delete(&self->cursor);
2843  capture_list_pool_delete(&self->capture_list_pool);
2844  ts_free(self);
2845 }
2846 
2848  return self->did_exceed_match_limit;
2849 }
2850 
2852  return self->capture_list_pool.max_capture_list_count;
2853 }
2854 
2856  self->capture_list_pool.max_capture_list_count = limit;
2857 }
2858 
2860  TSQueryCursor *self,
2861  const TSQuery *query,
2862  TSNode node
2863 ) {
2864  array_clear(&self->states);
2865  array_clear(&self->finished_states);
2866  ts_tree_cursor_reset(&self->cursor, node);
2867  capture_list_pool_reset(&self->capture_list_pool);
2868  self->next_state_id = 0;
2869  self->depth = 0;
2870  self->ascending = false;
2871  self->halted = false;
2872  self->query = query;
2873  self->did_exceed_match_limit = false;
2874 }
2875 
2877  TSQueryCursor *self,
2878  uint32_t start_byte,
2879  uint32_t end_byte
2880 ) {
2881  if (end_byte == 0) {
2882  end_byte = UINT32_MAX;
2883  }
2884  self->start_byte = start_byte;
2885  self->end_byte = end_byte;
2886 }
2887 
2889  TSQueryCursor *self,
2890  TSPoint start_point,
2891  TSPoint end_point
2892 ) {
2893  if (end_point.row == 0 && end_point.column == 0) {
2894  end_point = POINT_MAX;
2895  }
2896  self->start_point = start_point;
2897  self->end_point = end_point;
2898 }
2899 
2900 // Search through all of the in-progress states, and find the captured
2901 // node that occurs earliest in the document.
2903  TSQueryCursor *self,
2904  uint32_t *state_index,
2905  uint32_t *byte_offset,
2906  uint32_t *pattern_index,
2907  bool *root_pattern_guaranteed
2908 ) {
2909  bool result = false;
2910  *state_index = UINT32_MAX;
2911  *byte_offset = UINT32_MAX;
2912  *pattern_index = UINT32_MAX;
2913  for (unsigned i = 0; i < self->states.size; i++) {
2914  QueryState *state = &self->states.contents[i];
2915  if (state->dead) continue;
2916 
2917  const CaptureList *captures = capture_list_pool_get(
2918  &self->capture_list_pool,
2919  state->capture_list_id
2920  );
2921  if (state->consumed_capture_count >= captures->size) {
2922  continue;
2923  }
2924 
2925  TSNode node = captures->contents[state->consumed_capture_count].node;
2926  if (
2927  ts_node_end_byte(node) <= self->start_byte ||
2928  point_lte(ts_node_end_point(node), self->start_point)
2929  ) {
2930  state->consumed_capture_count++;
2931  i--;
2932  continue;
2933  }
2934 
2935  uint32_t node_start_byte = ts_node_start_byte(node);
2936  if (
2937  !result ||
2938  node_start_byte < *byte_offset ||
2939  (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
2940  ) {
2941  QueryStep *step = &self->query->steps.contents[state->step_index];
2942  if (root_pattern_guaranteed) {
2943  *root_pattern_guaranteed = step->root_pattern_guaranteed;
2944  } else if (step->root_pattern_guaranteed) {
2945  continue;
2946  }
2947 
2948  result = true;
2949  *state_index = i;
2950  *byte_offset = node_start_byte;
2951  *pattern_index = state->pattern_index;
2952  }
2953  }
2954  return result;
2955 }
2956 
2957 // Determine which node is first in a depth-first traversal
2959  if (left.id != right.id) {
2960  uint32_t left_start = ts_node_start_byte(left);
2961  uint32_t right_start = ts_node_start_byte(right);
2962  if (left_start < right_start) return -1;
2963  if (left_start > right_start) return 1;
2964  uint32_t left_node_count = ts_node_end_byte(left);
2965  uint32_t right_node_count = ts_node_end_byte(right);
2966  if (left_node_count > right_node_count) return -1;
2967  if (left_node_count < right_node_count) return 1;
2968  }
2969  return 0;
2970 }
2971 
2972 // Determine if either state contains a superset of the other state's captures.
2974  TSQueryCursor *self,
2975  QueryState *left_state,
2976  QueryState *right_state,
2977  bool *left_contains_right,
2978  bool *right_contains_left
2979 ) {
2980  const CaptureList *left_captures = capture_list_pool_get(
2981  &self->capture_list_pool,
2982  left_state->capture_list_id
2983  );
2984  const CaptureList *right_captures = capture_list_pool_get(
2985  &self->capture_list_pool,
2986  right_state->capture_list_id
2987  );
2988  *left_contains_right = true;
2989  *right_contains_left = true;
2990  unsigned i = 0, j = 0;
2991  for (;;) {
2992  if (i < left_captures->size) {
2993  if (j < right_captures->size) {
2994  TSQueryCapture *left = &left_captures->contents[i];
2995  TSQueryCapture *right = &right_captures->contents[j];
2996  if (left->node.id == right->node.id && left->index == right->index) {
2997  i++;
2998  j++;
2999  } else {
3000  switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3001  case -1:
3002  *right_contains_left = false;
3003  i++;
3004  break;
3005  case 1:
3006  *left_contains_right = false;
3007  j++;
3008  break;
3009  default:
3010  *right_contains_left = false;
3011  *left_contains_right = false;
3012  i++;
3013  j++;
3014  break;
3015  }
3016  }
3017  } else {
3018  *right_contains_left = false;
3019  break;
3020  }
3021  } else {
3022  if (j < right_captures->size) {
3023  *left_contains_right = false;
3024  }
3025  break;
3026  }
3027  }
3028 }
3029 
3030 #ifdef DEBUG_EXECUTE_QUERY
3031 #define LOG(...) fprintf(stderr, __VA_ARGS__)
3032 #else
3033 #define LOG(...)
3034 #endif
3035 
3037  TSQueryCursor *self,
3038  const PatternEntry *pattern
3039 ) {
3040  QueryStep *step = &self->query->steps.contents[pattern->step_index];
3041  uint32_t start_depth = self->depth - step->depth;
3042 
3043  // Keep the states array in ascending order of start_depth and pattern_index,
3044  // so that it can be processed more efficiently elsewhere. Usually, there is
3045  // no work to do here because of two facts:
3046  // * States with lower start_depth are naturally added first due to the
3047  // order in which nodes are visited.
3048  // * Earlier patterns are naturally added first because of the ordering of the
3049  // pattern_map data structure that's used to initiate matches.
3050  //
3051  // This loop is only needed in cases where two conditions hold:
3052  // * A pattern consists of more than one sibling node, so that its states
3053  // remain in progress after exiting the node that started the match.
3054  // * The first node in the pattern matches against multiple nodes at the
3055  // same depth.
3056  //
3057  // An example of this is the pattern '((comment)* (function))'. If multiple
3058  // `comment` nodes appear in a row, then we may initiate a new state for this
3059  // pattern while another state for the same pattern is already in progress.
3060  // If there are multiple patterns like this in a query, then this loop will
3061  // need to execute in order to keep the states ordered by pattern_index.
3062  uint32_t index = self->states.size;
3063  while (index > 0) {
3064  QueryState *prev_state = &self->states.contents[index - 1];
3065  if (prev_state->start_depth < start_depth) break;
3066  if (prev_state->start_depth == start_depth) {
3067  // Avoid inserting an unnecessary duplicate state, which would be
3068  // immediately pruned by the longest-match criteria.
3069  if (
3070  prev_state->pattern_index == pattern->pattern_index &&
3071  prev_state->step_index == pattern->step_index
3072  ) return;
3073  if (prev_state->pattern_index <= pattern->pattern_index) break;
3074  }
3075  index--;
3076  }
3077 
3078  LOG(
3079  " start state. pattern:%u, step:%u\n",
3080  pattern->pattern_index,
3081  pattern->step_index
3082  );
3083  array_insert(&self->states, index, ((QueryState) {
3084  .id = UINT32_MAX,
3085  .capture_list_id = NONE,
3086  .step_index = pattern->step_index,
3087  .pattern_index = pattern->pattern_index,
3088  .start_depth = start_depth,
3089  .consumed_capture_count = 0,
3090  .seeking_immediate_match = true,
3091  .has_in_progress_alternatives = false,
3092  .needs_parent = step->depth == 1,
3093  .dead = false,
3094  }));
3095 }
3096 
3097 // Acquire a capture list for this state. If there are no capture lists left in the
3098 // pool, this will steal the capture list from another existing state, and mark that
3099 // other state as 'dead'.
3101  TSQueryCursor *self,
3102  QueryState *state,
3103  unsigned state_index_to_preserve
3104 ) {
3105  if (state->capture_list_id == NONE) {
3106  state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3107 
3108  // If there are no capture lists left in the pool, then terminate whichever
3109  // state has captured the earliest node in the document, and steal its
3110  // capture list.
3111  if (state->capture_list_id == NONE) {
3112  self->did_exceed_match_limit = true;
3113  uint32_t state_index, byte_offset, pattern_index;
3114  if (
3116  self,
3117  &state_index,
3118  &byte_offset,
3119  &pattern_index,
3120  NULL
3121  ) &&
3122  state_index != state_index_to_preserve
3123  ) {
3124  LOG(
3125  " abandon state. index:%u, pattern:%u, offset:%u.\n",
3126  state_index, pattern_index, byte_offset
3127  );
3128  QueryState *other_state = &self->states.contents[state_index];
3129  state->capture_list_id = other_state->capture_list_id;
3130  other_state->capture_list_id = NONE;
3131  other_state->dead = true;
3132  CaptureList *list = capture_list_pool_get_mut(
3133  &self->capture_list_pool,
3134  state->capture_list_id
3135  );
3136  array_clear(list);
3137  return list;
3138  } else {
3139  LOG(" ran out of capture lists");
3140  return NULL;
3141  }
3142  }
3143  }
3144  return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3145 }
3146 
3148  TSQueryCursor *self,
3149  QueryState *state,
3150  QueryStep *step,
3151  TSNode node
3152 ) {
3153  if (state->dead) return;
3154  CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3155  if (!capture_list) {
3156  state->dead = true;
3157  return;
3158  }
3159 
3160  for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3161  uint16_t capture_id = step->capture_ids[j];
3162  if (step->capture_ids[j] == NONE) break;
3163  array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3164  LOG(
3165  " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3166  ts_node_type(node),
3167  state->pattern_index,
3168  capture_id,
3169  capture_list->size
3170  );
3171  }
3172 }
3173 
3174 // Duplicate the given state and insert the newly-created state immediately after
3175 // the given state in the `states` array. Ensures that the given state reference is
3176 // still valid, even if the states array is reallocated.
3178  TSQueryCursor *self,
3179  QueryState **state_ref
3180 ) {
3181  const QueryState *state = *state_ref;
3182  uint32_t state_index = state - self->states.contents;
3183  QueryState copy = *state;
3184  copy.capture_list_id = NONE;
3185 
3186  // If the state has captures, copy its capture list.
3187  if (state->capture_list_id != NONE) {
3188  CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3189  if (!new_captures) return NULL;
3190  const CaptureList *old_captures = capture_list_pool_get(
3191  &self->capture_list_pool,
3192  state->capture_list_id
3193  );
3194  array_push_all(new_captures, old_captures);
3195  }
3196 
3197  array_insert(&self->states, state_index + 1, copy);
3198  *state_ref = &self->states.contents[state_index];
3199  return &self->states.contents[state_index + 1];
3200 }
3201 
3202 // Walk the tree, processing patterns until at least one pattern finishes,
3203 // If one or more patterns finish, return `true` and store their states in the
3204 // `finished_states` array. Multiple patterns can finish on the same node. If
3205 // there are no more matches, return `false`.
3206 static inline bool ts_query_cursor__advance(
3207  TSQueryCursor *self,
3208  bool stop_on_definite_step
3209 ) {
3210  bool did_match = false;
3211  for (;;) {
3212  if (self->halted) {
3213  while (self->states.size > 0) {
3214  QueryState state = array_pop(&self->states);
3216  &self->capture_list_pool,
3217  state.capture_list_id
3218  );
3219  }
3220  }
3221 
3222  if (did_match || self->halted) return did_match;
3223 
3224  // Exit the current node.
3225  if (self->ascending) {
3226  LOG(
3227  "leave node. depth:%u, type:%s\n",
3228  self->depth,
3230  );
3231 
3232  // Leave this node by stepping to its next sibling or to its parent.
3233  if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
3234  self->ascending = false;
3235  } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
3236  self->depth--;
3237  } else {
3238  LOG("halt at root\n");
3239  self->halted = true;
3240  }
3241 
3242  // After leaving a node, remove any states that cannot make further progress.
3243  uint32_t deleted_count = 0;
3244  for (unsigned i = 0, n = self->states.size; i < n; i++) {
3245  QueryState *state = &self->states.contents[i];
3246  QueryStep *step = &self->query->steps.contents[state->step_index];
3247 
3248  // If a state completed its pattern inside of this node, but was deferred from finishing
3249  // in order to search for longer matches, mark it as finished.
3250  if (step->depth == PATTERN_DONE_MARKER) {
3251  if (state->start_depth > self->depth || self->halted) {
3252  LOG(" finish pattern %u\n", state->pattern_index);
3253  array_push(&self->finished_states, *state);
3254  did_match = true;
3255  deleted_count++;
3256  continue;
3257  }
3258  }
3259 
3260  // If a state needed to match something within this node, then remove that state
3261  // as it has failed to match.
3262  else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
3263  LOG(
3264  " failed to match. pattern:%u, step:%u\n",
3265  state->pattern_index,
3266  state->step_index
3267  );
3269  &self->capture_list_pool,
3270  state->capture_list_id
3271  );
3272  deleted_count++;
3273  continue;
3274  }
3275 
3276  if (deleted_count > 0) {
3277  self->states.contents[i - deleted_count] = *state;
3278  }
3279  }
3280  self->states.size -= deleted_count;
3281  }
3282 
3283  // Enter a new node.
3284  else {
3285  // Get the properties of the current node.
3286  TSNode node = ts_tree_cursor_current_node(&self->cursor);
3287  TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3288  TSSymbol symbol = ts_node_symbol(node);
3289  bool is_named = ts_node_is_named(node);
3290  bool has_later_siblings;
3291  bool has_later_named_siblings;
3292  bool can_have_later_siblings_with_this_field;
3293  TSFieldId field_id = 0;
3294  TSSymbol supertypes[8] = {0};
3295  unsigned supertype_count = 8;
3297  &self->cursor,
3298  &field_id,
3299  &has_later_siblings,
3300  &has_later_named_siblings,
3301  &can_have_later_siblings_with_this_field,
3302  supertypes,
3303  &supertype_count
3304  );
3305  LOG(
3306  "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3307  self->depth,
3308  ts_node_type(node),
3309  ts_language_field_name_for_id(self->query->language, field_id),
3310  ts_node_start_point(node).row,
3311  self->states.size,
3312  self->finished_states.size
3313  );
3314 
3315  bool node_intersects_range = (
3316  ts_node_end_byte(node) > self->start_byte &&
3317  ts_node_start_byte(node) < self->end_byte &&
3318  point_gt(ts_node_end_point(node), self->start_point) &&
3319  point_lt(ts_node_start_point(node), self->end_point)
3320  );
3321 
3322  bool parent_intersects_range = ts_node_is_null(parent_node) || (
3323  ts_node_end_byte(parent_node) > self->start_byte &&
3324  ts_node_start_byte(parent_node) < self->end_byte &&
3325  point_gt(ts_node_end_point(parent_node), self->start_point) &&
3326  point_lt(ts_node_start_point(parent_node), self->end_point)
3327  );
3328 
3329  // Add new states for any patterns whose root node is a wildcard.
3330  for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3331  PatternEntry *pattern = &self->query->pattern_map.contents[i];
3332 
3333  // If this node matches the first step of the pattern, then add a new
3334  // state at the start of this pattern.
3335  QueryStep *step = &self->query->steps.contents[pattern->step_index];
3336  if (
3337  (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) &&
3338  (!step->field || field_id == step->field) &&
3339  (!step->supertype_symbol || supertype_count > 0)
3340  ) {
3341  ts_query_cursor__add_state(self, pattern);
3342  }
3343  }
3344 
3345  // Add new states for any patterns whose root node matches this node.
3346  unsigned i;
3347  if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3348  PatternEntry *pattern = &self->query->pattern_map.contents[i];
3349 
3350  QueryStep *step = &self->query->steps.contents[pattern->step_index];
3351  do {
3352  // If this node matches the first step of the pattern, then add a new
3353  // state at the start of this pattern.
3354  if (
3355  (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) &&
3356  (!step->field || field_id == step->field)
3357  ) {
3358  ts_query_cursor__add_state(self, pattern);
3359  }
3360 
3361  // Advance to the next pattern whose root node matches this node.
3362  i++;
3363  if (i == self->query->pattern_map.size) break;
3364  pattern = &self->query->pattern_map.contents[i];
3365  step = &self->query->steps.contents[pattern->step_index];
3366  } while (step->symbol == symbol);
3367  }
3368 
3369  // Update all of the in-progress states with current node.
3370  for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
3371  QueryState *state = &self->states.contents[i];
3372  QueryStep *step = &self->query->steps.contents[state->step_index];
3373  state->has_in_progress_alternatives = false;
3374  copy_count = 0;
3375 
3376  // Check that the node matches all of the criteria for the next
3377  // step of the pattern.
3378  if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3379 
3380  // Determine if this node matches this step of the pattern, and also
3381  // if this node can have later siblings that match this step of the
3382  // pattern.
3383  bool node_does_match = false;
3384  if (step->symbol == WILDCARD_SYMBOL) {
3385  node_does_match = is_named || !step->is_named;
3386  } else {
3387  node_does_match = symbol == step->symbol;
3388  }
3389  bool later_sibling_can_match = has_later_siblings;
3390  if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3391  later_sibling_can_match = false;
3392  }
3393  if (step->is_last_child && has_later_named_siblings) {
3394  node_does_match = false;
3395  }
3396  if (step->supertype_symbol) {
3397  bool has_supertype = false;
3398  for (unsigned j = 0; j < supertype_count; j++) {
3399  if (supertypes[j] == step->supertype_symbol) {
3400  has_supertype = true;
3401  break;
3402  }
3403  }
3404  if (!has_supertype) node_does_match = false;
3405  }
3406  if (step->field) {
3407  if (step->field == field_id) {
3408  if (!can_have_later_siblings_with_this_field) {
3409  later_sibling_can_match = false;
3410  }
3411  } else {
3412  node_does_match = false;
3413  }
3414  }
3415 
3416  if (step->negated_field_list_id) {
3417  TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
3418  for (;;) {
3419  TSFieldId negated_field_id = *negated_field_ids;
3420  if (negated_field_id) {
3421  negated_field_ids++;
3422  if (ts_node_child_by_field_id(node, negated_field_id).id) {
3423  node_does_match = false;
3424  break;
3425  }
3426  } else {
3427  break;
3428  }
3429  }
3430  }
3431 
3432  // Remove states immediately if it is ever clear that they cannot match.
3433  if (!node_does_match) {
3434  if (!later_sibling_can_match) {
3435  LOG(
3436  " discard state. pattern:%u, step:%u\n",
3437  state->pattern_index,
3438  state->step_index
3439  );
3441  &self->capture_list_pool,
3442  state->capture_list_id
3443  );
3444  array_erase(&self->states, i);
3445  i--;
3446  }
3447  continue;
3448  }
3449 
3450  // Some patterns can match their root node in multiple ways, capturing different
3451  // children. If this pattern step could match later children within the same
3452  // parent, then this query state cannot simply be updated in place. It must be
3453  // split into two states: one that matches this node, and one which skips over
3454  // this node, to preserve the possibility of matching later siblings.
3455  if (later_sibling_can_match && (
3456  step->contains_captures ||
3457  ts_query__step_is_fallible(self->query, state->step_index)
3458  )) {
3459  if (ts_query_cursor__copy_state(self, &state)) {
3460  LOG(
3461  " split state for capture. pattern:%u, step:%u\n",
3462  state->pattern_index,
3463  state->step_index
3464  );
3465  copy_count++;
3466  }
3467  }
3468 
3469  // If this pattern started with a wildcard, such that the pattern map
3470  // actually points to the *second* step of the pattern, then check
3471  // that the node has a parent, and capture the parent node if necessary.
3472  if (state->needs_parent) {
3473  TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3474  if (ts_node_is_null(parent)) {
3475  LOG(" missing parent node\n");
3476  state->dead = true;
3477  } else {
3478  state->needs_parent = false;
3479  QueryStep *skipped_wildcard_step = step;
3480  do {
3481  skipped_wildcard_step--;
3482  } while (
3483  skipped_wildcard_step->is_dead_end ||
3484  skipped_wildcard_step->is_pass_through ||
3485  skipped_wildcard_step->depth > 0
3486  );
3487  if (skipped_wildcard_step->capture_ids[0] != NONE) {
3488  LOG(" capture wildcard parent\n");
3490  self,
3491  state,
3492  skipped_wildcard_step,
3493  parent
3494  );
3495  }
3496  }
3497  }
3498 
3499  // If the current node is captured in this pattern, add it to the capture list.
3500  if (step->capture_ids[0] != NONE) {
3501  ts_query_cursor__capture(self, state, step, node);
3502  }
3503 
3504  if (state->dead) {
3505  array_erase(&self->states, i);
3506  i--;
3507  continue;
3508  }
3509 
3510  // Advance this state to the next step of its pattern.
3511  state->step_index++;
3512  state->seeking_immediate_match = false;
3513  LOG(
3514  " advance state. pattern:%u, step:%u\n",
3515  state->pattern_index,
3516  state->step_index
3517  );
3518 
3519  QueryStep *next_step = &self->query->steps.contents[state->step_index];
3520  if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
3521 
3522  // If this state's next step has an alternative step, then copy the state in order
3523  // to pursue both alternatives. The alternative step itself may have an alternative,
3524  // so this is an interative process.
3525  unsigned end_index = i + 1;
3526  for (unsigned j = i; j < end_index; j++) {
3527  QueryState *state = &self->states.contents[j];
3528  QueryStep *next_step = &self->query->steps.contents[state->step_index];
3529  if (next_step->alternative_index != NONE) {
3530  // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
3531  // via its alternative index. When a state reaches a dead-end step, it jumps straight
3532  // to the step's alternative.
3533  if (next_step->is_dead_end) {
3534  state->step_index = next_step->alternative_index;
3535  j--;
3536  continue;
3537  }
3538 
3539  // A "pass-through" step exists only to add a branch into the step sequence,
3540  // via its alternative_index. When a state reaches a pass-through step, it splits
3541  // in order to process the alternative step, and then it advances to the next step.
3542  if (next_step->is_pass_through) {
3543  state->step_index++;
3544  j--;
3545  }
3546 
3548  if (copy) {
3549  LOG(
3550  " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
3551  copy->pattern_index,
3552  copy->step_index,
3553  next_step->alternative_index,
3554  next_step->alternative_is_immediate,
3555  capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
3556  );
3557  end_index++;
3558  copy_count++;
3559  copy->step_index = next_step->alternative_index;
3560  if (next_step->alternative_is_immediate) {
3561  copy->seeking_immediate_match = true;
3562  }
3563  }
3564  }
3565  }
3566  }
3567 
3568  for (unsigned i = 0; i < self->states.size; i++) {
3569  QueryState *state = &self->states.contents[i];
3570  if (state->dead) {
3571  array_erase(&self->states, i);
3572  i--;
3573  continue;
3574  }
3575 
3576  // Enfore the longest-match criteria. When a query pattern contains optional or
3577  // repeated nodes, this is necessary to avoid multiple redundant states, where
3578  // one state has a strict subset of another state's captures.
3579  bool did_remove = false;
3580  for (unsigned j = i + 1; j < self->states.size; j++) {
3581  QueryState *other_state = &self->states.contents[j];
3582 
3583  // Query states are kept in ascending order of start_depth and pattern_index.
3584  // Since the longest-match criteria is only used for deduping matches of the same
3585  // pattern and root node, we only need to perform pairwise comparisons within a
3586  // small slice of the states array.
3587  if (
3588  other_state->start_depth != state->start_depth ||
3589  other_state->pattern_index != state->pattern_index
3590  ) break;
3591 
3592  bool left_contains_right, right_contains_left;
3594  self,
3595  state,
3596  other_state,
3597  &left_contains_right,
3598  &right_contains_left
3599  );
3600  if (left_contains_right) {
3601  if (state->step_index == other_state->step_index) {
3602  LOG(
3603  " drop shorter state. pattern: %u, step_index: %u\n",
3604  state->pattern_index,
3605  state->step_index
3606  );
3607  capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
3608  array_erase(&self->states, j);
3609  j--;
3610  continue;
3611  }
3612  other_state->has_in_progress_alternatives = true;
3613  }
3614  if (right_contains_left) {
3615  if (state->step_index == other_state->step_index) {
3616  LOG(
3617  " drop shorter state. pattern: %u, step_index: %u\n",
3618  state->pattern_index,
3619  state->step_index
3620  );
3621  capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3622  array_erase(&self->states, i);
3623  i--;
3624  did_remove = true;
3625  break;
3626  }
3627  state->has_in_progress_alternatives = true;
3628  }
3629  }
3630 
3631  // If the state is at the end of its pattern, remove it from the list
3632  // of in-progress states and add it to the list of finished states.
3633  if (!did_remove) {
3634  LOG(
3635  " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
3636  state->pattern_index,
3637  state->start_depth,
3638  state->step_index,
3639  capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
3640  );
3641  QueryStep *next_step = &self->query->steps.contents[state->step_index];
3642  if (next_step->depth == PATTERN_DONE_MARKER) {
3643  if (state->has_in_progress_alternatives) {
3644  LOG(" defer finishing pattern %u\n", state->pattern_index);
3645  } else {
3646  LOG(" finish pattern %u\n", state->pattern_index);
3647  array_push(&self->finished_states, *state);
3648  array_erase(&self->states, state - self->states.contents);
3649  did_match = true;
3650  i--;
3651  }
3652  }
3653  }
3654  }
3655 
3656  // When the current node ends prior to the desired start offset,
3657  // only descend for the purpose of continuing in-progress matches.
3658  bool should_descend = node_intersects_range;
3659  if (!should_descend) {
3660  for (unsigned i = 0; i < self->states.size; i++) {
3661  QueryState *state = &self->states.contents[i];;
3662  QueryStep *next_step = &self->query->steps.contents[state->step_index];
3663  if (
3664  next_step->depth != PATTERN_DONE_MARKER &&
3665  state->start_depth + next_step->depth > self->depth
3666  ) {
3667  should_descend = true;
3668  break;
3669  }
3670  }
3671  }
3672 
3673  if (!should_descend) {
3674  LOG(
3675  " not descending. node end byte: %u, start byte: %u\n",
3676  ts_node_end_byte(node),
3677  self->start_byte
3678  );
3679  }
3680 
3681  if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) {
3682  self->depth++;
3683  } else {
3684  self->ascending = true;
3685  }
3686  }
3687  }
3688 }
3689 
3691  TSQueryCursor *self,
3693 ) {
3694  if (self->finished_states.size == 0) {
3695  if (!ts_query_cursor__advance(self, false)) {
3696  return false;
3697  }
3698  }
3699 
3700  QueryState *state = &self->finished_states.contents[0];
3701  if (state->id == UINT32_MAX) state->id = self->next_state_id++;
3702  match->id = state->id;
3703  match->pattern_index = state->pattern_index;
3704  const CaptureList *captures = capture_list_pool_get(
3705  &self->capture_list_pool,
3706  state->capture_list_id
3707  );
3708  match->captures = captures->contents;
3709  match->capture_count = captures->size;
3710  capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
3711  array_erase(&self->finished_states, 0);
3712  return true;
3713 }
3714 
3716  TSQueryCursor *self,
3717  uint32_t match_id
3718 ) {
3719  for (unsigned i = 0; i < self->finished_states.size; i++) {
3720  const QueryState *state = &self->finished_states.contents[i];
3721  if (state->id == match_id) {
3723  &self->capture_list_pool,
3724  state->capture_list_id
3725  );
3726  array_erase(&self->finished_states, i);
3727  return;
3728  }
3729  }
3730 
3731  // Remove unfinished query states as well to prevent future
3732  // captures for a match being removed.
3733  for (unsigned i = 0; i < self->states.size; i++) {
3734  const QueryState *state = &self->states.contents[i];
3735  if (state->id == match_id) {
3737  &self->capture_list_pool,
3738  state->capture_list_id
3739  );
3740  array_erase(&self->states, i);
3741  return;
3742  }
3743  }
3744 }
3745 
3747  TSQueryCursor *self,
3749  uint32_t *capture_index
3750 ) {
3751  // The goal here is to return captures in order, even though they may not
3752  // be discovered in order, because patterns can overlap. Search for matches
3753  // until there is a finished capture that is before any unfinished capture.
3754  for (;;) {
3755  // First, find the earliest capture in an unfinished match.
3756  uint32_t first_unfinished_capture_byte;
3757  uint32_t first_unfinished_pattern_index;
3758  uint32_t first_unfinished_state_index;
3759  bool first_unfinished_state_is_definite = false;
3761  self,
3762  &first_unfinished_state_index,
3763  &first_unfinished_capture_byte,
3764  &first_unfinished_pattern_index,
3765  &first_unfinished_state_is_definite
3766  );
3767 
3768  // Then find the earliest capture in a finished match. It must occur
3769  // before the first capture in an *unfinished* match.
3770  QueryState *first_finished_state = NULL;
3771  uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
3772  uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
3773  for (unsigned i = 0; i < self->finished_states.size;) {
3774  QueryState *state = &self->finished_states.contents[i];
3775  const CaptureList *captures = capture_list_pool_get(
3776  &self->capture_list_pool,
3777  state->capture_list_id
3778  );
3779 
3780  // Remove states whose captures are all consumed.
3781  if (state->consumed_capture_count >= captures->size) {
3783  &self->capture_list_pool,
3784  state->capture_list_id
3785  );
3786  array_erase(&self->finished_states, i);
3787  continue;
3788  }
3789 
3790  // Skip captures that precede the cursor's start byte.
3791  TSNode node = captures->contents[state->consumed_capture_count].node;
3792  if (ts_node_end_byte(node) <= self->start_byte) {
3793  state->consumed_capture_count++;
3794  continue;
3795  }
3796 
3797  uint32_t node_start_byte = ts_node_start_byte(node);
3798  if (
3799  node_start_byte < first_finished_capture_byte ||
3800  (
3801  node_start_byte == first_finished_capture_byte &&
3802  state->pattern_index < first_finished_pattern_index
3803  )
3804  ) {
3805  first_finished_state = state;
3806  first_finished_capture_byte = node_start_byte;
3807  first_finished_pattern_index = state->pattern_index;
3808  }
3809  i++;
3810  }
3811 
3812  // If there is finished capture that is clearly before any unfinished
3813  // capture, then return its match, and its capture index. Internally
3814  // record the fact that the capture has been 'consumed'.
3815  QueryState *state;
3816  if (first_finished_state) {
3817  state = first_finished_state;
3818  } else if (first_unfinished_state_is_definite) {
3819  state = &self->states.contents[first_unfinished_state_index];
3820  } else {
3821  state = NULL;
3822  }
3823 
3824  if (state) {
3825  if (state->id == UINT32_MAX) state->id = self->next_state_id++;
3826  match->id = state->id;
3827  match->pattern_index = state->pattern_index;
3828  const CaptureList *captures = capture_list_pool_get(
3829  &self->capture_list_pool,
3830  state->capture_list_id
3831  );
3832  match->captures = captures->contents;
3833  match->capture_count = captures->size;
3834  *capture_index = state->consumed_capture_count;
3835  state->consumed_capture_count++;
3836  return true;
3837  }
3838 
3839  if (capture_list_pool_is_empty(&self->capture_list_pool)) {
3840  LOG(
3841  " abandon state. index:%u, pattern:%u, offset:%u.\n",
3842  first_unfinished_state_index,
3843  first_unfinished_pattern_index,
3844  first_unfinished_capture_byte
3845  );
3847  &self->capture_list_pool,
3848  self->states.contents[first_unfinished_state_index].capture_list_id
3849  );
3850  array_erase(&self->states, first_unfinished_state_index);
3851  }
3852 
3853  // If there are no finished matches that are ready to be returned, then
3854  // continue finding more matches.
3855  if (
3856  !ts_query_cursor__advance(self, true) &&
3857  self->finished_states.size == 0
3858  ) return false;
3859  }
3860 }
3861 
3862 #undef LOG
#define ts_malloc
Definition: alloc.h:21
#define ts_free
Definition: alloc.h:30
#define ts_calloc
Definition: alloc.h:24
#define e(frag)
lzma_index ** i
Definition: index.h:629
const char * ts_node_type(TSNode)
Definition: node.c:420
TSNode ts_node_child_by_field_id(TSNode, TSFieldId)
Definition: node.c:500
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
Definition: api.h:30
TSQueryError
Definition: api.h:135
@ TSQueryErrorLanguage
Definition: api.h:142
@ TSQueryErrorSyntax
Definition: api.h:137
@ TSQueryErrorNodeType
Definition: api.h:138
@ TSQueryErrorNone
Definition: api.h:136
@ TSQueryErrorField
Definition: api.h:139
@ TSQueryErrorStructure
Definition: api.h:141
@ TSQueryErrorCapture
Definition: api.h:140
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *)
Definition: tree_cursor.c:206
@ TSQueryPredicateStepTypeCapture
Definition: api.h:126
void ts_tree_cursor_delete(TSTreeCursor *)
Definition: tree_cursor.c:94
uint32_t ts_node_start_byte(TSNode)
Definition: node.c:36
void ts_tree_cursor_reset(TSTreeCursor *, TSNode)
Definition: tree_cursor.c:76
bool ts_node_is_null(TSNode)
Definition: node.c:434
TSSymbol ts_node_symbol(TSNode)
Definition: node.c:414
TSQuantifier
Definition: api.h:109
@ TSQuantifierZeroOrMore
Definition: api.h:112
@ TSQuantifierZeroOrOne
Definition: api.h:111
@ TSQuantifierZero
Definition: api.h:110
@ TSQuantifierOneOrMore
Definition: api.h:114
@ TSQuantifierOne
Definition: api.h:113
TSPoint ts_node_start_point(TSNode)
Definition: node.c:40
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t)
Definition: language.c:119
const char * ts_language_symbol_name(const TSLanguage *, TSSymbol)
Definition: language.c:59
#define TREE_SITTER_LANGUAGE_VERSION
Definition: api.h:24
bool ts_tree_cursor_goto_parent(TSTreeCursor *)
Definition: tree_cursor.c:239
const char * ts_language_field_name_for_id(const TSLanguage *, TSFieldId)
Definition: language.c:107
uint32_t ts_node_end_byte(TSNode)
Definition: node.c:406
bool ts_node_is_named(TSNode)
Definition: node.c:442
struct TSQueryCursor TSQueryCursor
Definition: api.h:42
TSNode ts_tree_cursor_current_node(const TSTreeCursor *)
Definition: tree_cursor.c:262
TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *string, uint32_t length, bool is_named)
Definition: language.c:74
TSPoint ts_node_end_point(TSNode)
Definition: node.c:410
bool ts_tree_cursor_goto_first_child(TSTreeCursor *)
Definition: tree_cursor.c:101
struct TSQuery TSQuery
Definition: api.h:41
#define array_extend(self, count, contents)
Definition: array.h:59
#define array_new()
Definition: array.h:25
#define array_init(self)
Definition: array.h:22
#define array_insert_sorted_by(self, field, value)
Definition: array.h:121
#define array_erase(self, index)
Definition: array.h:79
#define array_get(self, index)
Definition: array.h:28
#define array_back(self)
Definition: array.h:33
#define array_assign(self, other)
Definition: array.h:84
#define array_push_all(self, other)
Definition: array.h:54
#define array_push(self, element)
Definition: array.h:43
#define array_pop(self)
Definition: array.h:82
#define array_search_sorted_with(self, compare, needle, index, exists)
Definition: array.h:98
#define array_reserve(self, new_capacity)
Definition: array.h:37
#define array_insert(self, index, element)
Definition: array.h:75
#define array_delete(self)
Definition: array.h:41
#define array_clear(self)
Definition: array.h:35
#define array_grow_by(self, count)
Definition: array.h:49
#define array_search_sorted_by(self, field, needle, index, exists)
Definition: array.h:105
#define NULL
Definition: cris-opc.c:27
_Use_decl_annotations_ int __cdecl printf(const char *const _Format,...)
Definition: cs_driver.c:93
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
const char * k
Definition: dsignal.c:11
static states step(struct re_guts *, sopno, sopno, states, int, states)
Definition: engine.c:888
struct tab * done
Definition: enough.c:233
voidpf void uLong size
Definition: ioapi.h:138
voidpf stream
Definition: ioapi.h:138
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol)
Definition: language.c:38
static void ts_language_field_map(const TSLanguage *self, uint32_t production_id, const TSFieldMapEntry **start, const TSFieldMapEntry **end)
Definition: language.h:246
static bool ts_language_state_is_primary(const TSLanguage *self, TSStateId state)
Definition: language.h:205
static bool ts_lookahead_iterator_next(LookaheadIterator *self)
Definition: language.h:137
static void ts_language_aliases_for_symbol(const TSLanguage *self, TSSymbol original_symbol, const TSSymbol **start, const TSSymbol **end)
Definition: language.h:263
static TSSymbol ts_language_alias_at(const TSLanguage *self, uint32_t production_id, uint32_t child_index)
Definition: language.h:236
static LookaheadIterator ts_language_lookaheads(const TSLanguage *self, TSStateId state)
Definition: language.h:110
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
static void list(RzEgg *egg)
Definition: rz-gg.c:52
const char * source
Definition: lz4.h:699
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
int n
Definition: mipsasm.c:19
int id
Definition: op.c:540
static bool point_lt(TSPoint a, TSPoint b)
Definition: point.h:32
static bool point_lte(TSPoint a, TSPoint b)
Definition: point.h:28
#define POINT_MAX
Definition: point.h:7
static bool point_gt(TSPoint a, TSPoint b)
Definition: point.h:36
static int is_immediate(ut32 instr)
#define states
Definition: regexec.c:104
@ field_name
Definition: parser.c:1737
@ field_id
Definition: parser.c:1736
uint16_t TSStateId
Definition: parser.h:16
uint16_t TSFieldId
Definition: parser.h:20
@ TSParseActionTypeShift
Definition: parser.h:54
@ TSParseActionTypeReduce
Definition: parser.h:55
uint16_t TSSymbol
Definition: parser.h:19
#define ts_builtin_sym_error
Definition: parser.h:12
unsigned short uint16_t
Definition: sftypes.h:30
int int32_t
Definition: sftypes.h:33
int size_t
Definition: sftypes.h:40
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT16_MAX
#define UINT32_MAX
uint16_t child_index
Definition: query.c:222
TSSymbol parent_symbol
Definition: query.c:221
TSFieldId field_id
Definition: query.c:223
TSStateId parse_state
Definition: query.c:220
uint16_t depth
Definition: query.c:229
uint16_t step_index
Definition: query.c:230
TSSymbol symbol
Definition: query.c:251
TSSymbol symbol
Definition: language.h:30
const TSParseAction * actions
Definition: language.h:29
TSStateId next_state
Definition: language.h:31
uint16_t action_count
Definition: language.h:32
Slice predicate_steps
Definition: query.c:147
Slice steps
Definition: query.c:146
uint32_t start_byte
Definition: query.c:148
bool dead
Definition: query.c:189
bool needs_parent
Definition: query.c:190
uint32_t capture_list_id
Definition: query.c:182
uint16_t start_depth
Definition: query.c:183
uint16_t step_index
Definition: query.c:184
uint16_t pattern_index
Definition: query.c:185
bool seeking_immediate_match
Definition: query.c:187
bool has_in_progress_alternatives
Definition: query.c:188
uint16_t consumed_capture_count
Definition: query.c:186
uint32_t id
Definition: query.c:181
uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]
Definition: query.c:89
bool parent_pattern_guaranteed
Definition: query.c:101
bool alternative_is_immediate
Definition: query.c:98
uint16_t depth
Definition: query.c:90
bool is_last_child
Definition: query.c:95
bool is_pass_through
Definition: query.c:96
TSSymbol symbol
Definition: query.c:86
bool is_named
Definition: query.c:93
uint16_t alternative_index
Definition: query.c:91
bool root_pattern_guaranteed
Definition: query.c:100
bool is_immediate
Definition: query.c:94
uint16_t negated_field_list_id
Definition: query.c:92
TSSymbol supertype_symbol
Definition: query.c:87
TSFieldId field
Definition: query.c:88
bool is_dead_end
Definition: query.c:97
bool contains_captures
Definition: query.c:99
Definition: query.c:110
uint32_t offset
Definition: query.c:111
uint32_t length
Definition: query.c:112
TSStateId * contents
Definition: query.c:262
uint16_t step_index
Definition: query.c:153
uint32_t byte_offset
Definition: query.c:152
Definition: query.c:23
const char * start
Definition: query.c:25
uint8_t next_size
Definition: query.c:28
const char * input
Definition: query.c:24
int32_t next
Definition: query.c:27
const char * end
Definition: query.c:26
uint8_t child_index
Definition: parser.h:26
bool inherited
Definition: parser.h:27
TSFieldId field_id
Definition: parser.h:25
uint32_t version
Definition: parser.h:91
uint32_t state_count
Definition: parser.h:96
Definition: api.h:92
const void * id
Definition: api.h:94
Definition: api.h:55
uint32_t row
Definition: api.h:56
uint32_t column
Definition: api.h:57
TSNode node
Definition: api.h:105
uint32_t index
Definition: api.h:106
const TSQuery * query
Definition: query.c:289
TSTreeCursor cursor
Definition: query.c:290
Definition: query.c:270
SymbolTable captures
Definition: query.c:271
bool visible
Definition: parser.h:36
bool supertype
Definition: parser.h:38
Definition: zipcmp.c:77
Definition: engine.c:71
Definition: z80asm.h:102
Definition: z80asm.h:140
Definition: dis.h:43
static void ts_query__add_negated_fields(TSQuery *self, uint16_t step_index, TSFieldId *field_ids, uint16_t field_count)
Definition: query.c:1835
static bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol)
Definition: query.c:930
static void capture_list_pool_reset(CaptureListPool *self)
Definition: query.c:403
uint32_t ts_query_string_count(const TSQuery *self)
Definition: query.c:2705
uint32_t ts_query_pattern_count(const TSQuery *self)
Definition: query.c:2697
static void stream_scan_identifier(Stream *stream)
Definition: query.c:373
static void capture_quantifiers_mul(CaptureQuantifiers *self, TSQuantifier quantifier)
Definition: query.c:692
void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node)
Definition: query.c:2859
TSQuantifier ts_query_capture_quantifier_for_id(const TSQuery *self, uint32_t pattern_index, uint32_t capture_index)
Definition: query.c:2717
static bool ts_query_cursor__first_in_progress_capture(TSQueryCursor *self, uint32_t *state_index, uint32_t *byte_offset, uint32_t *pattern_index, bool *root_pattern_guaranteed)
Definition: query.c:2902
bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match)
Definition: query.c:3690
const char * ts_query_string_value_for_id(const TSQuery *self, uint32_t index, uint32_t *length)
Definition: query.c:2726
#define MAX_STATE_PREDECESSOR_COUNT
Definition: query.c:15
static void state_predecessor_map_delete(StatePredecessorMap *self)
Definition: query.c:846
static void capture_quantifiers_add_for_id(CaptureQuantifiers *self, uint16_t id, TSQuantifier quantifier)
Definition: query.c:664
static int symbol_table_id_for_name(const SymbolTable *self, const char *name, uint32_t length)
Definition: query.c:737
#define MAX_ANALYSIS_STATE_DEPTH
Definition: query.c:16
static TSQueryError ts_query__parse_predicate(TSQuery *self, Stream *stream)
Definition: query.c:1946
uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index)
Definition: query.c:2747
void ts_query_delete(TSQuery *self)
Definition: query.c:2677
#define LOG(...)
Definition: query.c:3033
static bool ts_query_cursor__advance(TSQueryCursor *self, bool stop_on_definite_step)
Definition: query.c:3206
static AnalysisStateEntry * analysis_state__top(AnalysisState *self)
Definition: query.c:926
void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id)
Definition: query.c:3715
static void capture_quantifiers_join_all(CaptureQuantifiers *self, CaptureQuantifiers *quantifiers)
Definition: query.c:703
typedef Array(uint8_t)
Definition: query.c:126
static const TSSymbol WILDCARD_SYMBOL
Definition: query.c:308
bool ts_query__step_is_fallible(const TSQuery *self, uint16_t step_index)
Definition: query.c:2771
static SymbolTable symbol_table_new(void)
Definition: query.c:725
static CaptureQuantifiers capture_quantifiers_new(void)
Definition: query.c:628
TSQuery * ts_query_new(const TSLanguage *language, const char *source, uint32_t source_len, uint32_t *error_offset, TSQueryError *error_type)
Definition: query.c:2546
static void state_predecessor_map_add(StatePredecessorMap *self, TSStateId state, TSStateId predecessor)
Definition: query.c:850
static const CaptureList * capture_list_pool_get(const CaptureListPool *self, uint16_t id)
Definition: query.c:418
static void analysis_state_set__delete(AnalysisStateSet *self)
Definition: query.c:1008
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit)
Definition: query.c:2855
static uint16_t capture_list_pool_acquire(CaptureListPool *self)
Definition: query.c:434
static uint32_t stream_offset(Stream *self)
Definition: query.c:386
static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset)
Definition: query.c:1116
void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length)
Definition: query.c:2785
static StatePredecessorMap state_predecessor_map_new(const TSLanguage *language)
Definition: query.c:835
static void stream_skip_whitespace(Stream *self)
Definition: query.c:353
static QueryState * ts_query_cursor__copy_state(TSQueryCursor *self, QueryState **state_ref)
Definition: query.c:3177
static TSQueryError ts_query__parse_pattern(TSQuery *self, Stream *stream, uint32_t depth, bool is_immediate, CaptureQuantifiers *capture_quantifiers)
Definition: query.c:2050
static void capture_quantifiers_delete(CaptureQuantifiers *self)
Definition: query.c:633
static uint16_t symbol_table_insert_name(SymbolTable *self, const char *name, uint32_t length)
Definition: query.c:762
static TSQueryError ts_query__parse_string_literal(TSQuery *self, Stream *stream)
Definition: query.c:1887
static void ts_query__pattern_map_insert(TSQuery *self, TSSymbol symbol, PatternEntry new_entry)
Definition: query.c:1089
#define MAX_STEP_CAPTURE_COUNT
Definition: query.c:13
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self)
Definition: query.c:2847
static CaptureList * ts_query_cursor__prepare_to_capture(TSQueryCursor *self, QueryState *state, unsigned state_index_to_preserve)
Definition: query.c:3100
static unsigned analysis_state__recursion_depth(const AnalysisState *self)
Definition: query.c:880
static bool ts_query__pattern_map_search(const TSQuery *self, TSSymbol needle, uint32_t *result)
Definition: query.c:1049
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self)
Definition: query.c:2851
static TSQuantifier quantifier_mul(TSQuantifier left, TSQuantifier right)
Definition: query.c:468
static AnalysisState * analysis_state__clone(AnalysisState const *self)
Definition: query.c:937
static void query_step__add_capture(QueryStep *self, uint16_t capture_id)
Definition: query.c:807
bool ts_query_cursor_next_capture(TSQueryCursor *self, TSQueryMatch *match, uint32_t *capture_index)
Definition: query.c:3746
#define MAX_NEGATED_FIELD_COUNT
Definition: query.c:14
static int analysis_state__compare_position(AnalysisState *const *self, AnalysisState *const *other)
Definition: query.c:894
static AnalysisState * analysis_state_pool__clone_or_reuse(AnalysisStatePool *self, AnalysisState *borrowed_item)
Definition: query.c:949
static const TSStateId * state_predecessor_map_get(const StatePredecessorMap *self, TSStateId state, unsigned *count)
Definition: query.c:866
static void capture_quantifiers_replace(CaptureQuantifiers *self, CaptureQuantifiers *quantifiers)
Definition: query.c:647
TSQueryCursor * ts_query_cursor_new(void)
Definition: query.c:2820
void ts_query_cursor_delete(TSQueryCursor *self)
Definition: query.c:2839
static CaptureList * capture_list_pool_get_mut(CaptureListPool *self, uint16_t id)
Definition: query.c:423
static TSQuantifier quantifier_add(TSQuantifier left, TSQuantifier right)
Definition: query.c:578
CaptureListPool
Definition: query.c:213
static void symbol_table_delete(SymbolTable *self)
Definition: query.c:732
int ts_query_cursor__compare_nodes(TSNode left, TSNode right)
Definition: query.c:2958
static void analysis_state_set__push_by_clone(AnalysisStateSet *self, AnalysisStatePool *pool, AnalysisState *borrowed_item)
Definition: query.c:991
bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset)
Definition: query.c:2754
static CaptureListPool capture_list_pool_new(void)
Definition: query.c:394
static TSQuantifier capture_quantifier_for_id(const CaptureQuantifiers *self, uint16_t id)
Definition: query.c:656
static void capture_quantifiers_clear(CaptureQuantifiers *self)
Definition: query.c:640
uint32_t ts_query_capture_count(const TSQuery *self)
Definition: query.c:2701
static void capture_quantifiers_add_all(CaptureQuantifiers *self, CaptureQuantifiers *quantifiers)
Definition: query.c:677
static int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other)
Definition: query.c:1019
void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index)
Definition: query.c:2801
static Stream stream_new(const char *string, uint32_t length)
Definition: query.c:342
static bool capture_list_pool_is_empty(const CaptureListPool *self)
Definition: query.c:428
static void capture_list_pool_delete(CaptureListPool *self)
Definition: query.c:411
static void ts_query_cursor__add_state(TSQueryCursor *self, const PatternEntry *pattern)
Definition: query.c:3036
static const uint16_t NONE
Definition: query.c:307
static QueryStep query_step__new(TSSymbol symbol, uint16_t depth, bool is_immediate)
Definition: query.c:784
static void ts_query_cursor__capture(TSQueryCursor *self, QueryState *state, QueryStep *step, TSNode node)
Definition: query.c:3147
static void stream_reset(Stream *self, const char *input)
Definition: query.c:336
const TSQueryPredicateStep * ts_query_predicates_for_pattern(const TSQuery *self, uint32_t pattern_index, uint32_t *step_count)
Definition: query.c:2734
static TSQuantifier quantifier_join(TSQuantifier left, TSQuantifier right)
Definition: query.c:517
static const TSQueryError PARENT_DONE
Definition: query.c:305
PatternEntry
Definition: query.c:143
static void analysis_state_set__insert_sorted_by_clone(AnalysisStateSet *self, AnalysisStatePool *pool, AnalysisState *borrowed_item)
Definition: query.c:970
static const uint16_t PATTERN_DONE_MARKER
Definition: query.c:306
AnalysisSubgraphNode
Definition: query.c:248
static bool stream_is_ident_start(Stream *self)
Definition: query.c:369
static int analysis_state__compare(AnalysisState *const *self, AnalysisState *const *other)
Definition: query.c:909
static bool stream_advance(Stream *self)
Definition: query.c:315
static const char * symbol_table_name_for_id(const SymbolTable *self, uint16_t id, uint32_t *length)
Definition: query.c:752
void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte)
Definition: query.c:2876
void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point)
Definition: query.c:2888
static void capture_list_pool_release(CaptureListPool *self, uint16_t id)
Definition: query.c:458
const char * ts_query_capture_name_for_id(const TSQuery *self, uint32_t index, uint32_t *length)
Definition: query.c:2709
static void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStatePool *pool)
Definition: query.c:1001
#define MAX_ANALYSIS_ITERATION_COUNT
Definition: query.c:17
void ts_query_cursor__compare_captures(TSQueryCursor *self, QueryState *left_state, QueryState *right_state, bool *left_contains_right, bool *right_contains_left)
Definition: query.c:2973
static void query_step__remove_capture(QueryStep *self, uint16_t capture_id)
Definition: query.c:816
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self)
Definition: tree_cursor.c:404
void ts_tree_cursor_current_status(const TSTreeCursor *_self, TSFieldId *field_id, bool *has_later_siblings, bool *has_later_named_siblings, bool *can_have_later_siblings_with_this_field, TSSymbol *supertypes, unsigned *supertype_count)
Definition: tree_cursor.c:284
static uint32_t ts_decode_utf8(const uint8_t *string, uint32_t length, int32_t *code_point)
Definition: unicode.h:26
TSStateId state
Definition: parser.h:63
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)