Rizin
unix-like reverse engineering framework and cli tools
get_changed_ranges.c
Go to the documentation of this file.
1 #include "./get_changed_ranges.h"
2 #include "./subtree.h"
3 #include "./language.h"
4 #include "./error_costs.h"
5 #include "./tree_cursor.h"
6 #include <assert.h>
7 
8 // #define DEBUG_GET_CHANGED_RANGES
9 
10 static void ts_range_array_add(
11  TSRangeArray *self,
12  Length start,
13  Length end
14 ) {
15  if (self->size > 0) {
16  TSRange *last_range = array_back(self);
17  if (start.bytes <= last_range->end_byte) {
18  last_range->end_byte = end.bytes;
19  last_range->end_point = end.extent;
20  return;
21  }
22  }
23 
24  if (start.bytes < end.bytes) {
25  TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
26  array_push(self, range);
27  }
28 }
29 
31  const TSRangeArray *self,
32  unsigned start_index,
33  uint32_t start_byte,
34  uint32_t end_byte
35 ) {
36  for (unsigned i = start_index; i < self->size; i++) {
37  TSRange *range = &self->contents[i];
38  if (range->end_byte > start_byte) {
39  if (range->start_byte >= end_byte) break;
40  return true;
41  }
42  }
43  return false;
44 }
45 
47  const TSRange *old_ranges, unsigned old_range_count,
48  const TSRange *new_ranges, unsigned new_range_count,
49  TSRangeArray *differences
50 ) {
51  unsigned new_index = 0;
52  unsigned old_index = 0;
53  Length current_position = length_zero();
54  bool in_old_range = false;
55  bool in_new_range = false;
56 
57  while (old_index < old_range_count || new_index < new_range_count) {
58  const TSRange *old_range = &old_ranges[old_index];
59  const TSRange *new_range = &new_ranges[new_index];
60 
61  Length next_old_position;
62  if (in_old_range) {
63  next_old_position = (Length) {old_range->end_byte, old_range->end_point};
64  } else if (old_index < old_range_count) {
65  next_old_position = (Length) {old_range->start_byte, old_range->start_point};
66  } else {
67  next_old_position = LENGTH_MAX;
68  }
69 
70  Length next_new_position;
71  if (in_new_range) {
72  next_new_position = (Length) {new_range->end_byte, new_range->end_point};
73  } else if (new_index < new_range_count) {
74  next_new_position = (Length) {new_range->start_byte, new_range->start_point};
75  } else {
76  next_new_position = LENGTH_MAX;
77  }
78 
79  if (next_old_position.bytes < next_new_position.bytes) {
80  if (in_old_range != in_new_range) {
81  ts_range_array_add(differences, current_position, next_old_position);
82  }
83  if (in_old_range) old_index++;
84  current_position = next_old_position;
85  in_old_range = !in_old_range;
86  } else if (next_new_position.bytes < next_old_position.bytes) {
87  if (in_old_range != in_new_range) {
88  ts_range_array_add(differences, current_position, next_new_position);
89  }
90  if (in_new_range) new_index++;
91  current_position = next_new_position;
92  in_new_range = !in_new_range;
93  } else {
94  if (in_old_range != in_new_range) {
95  ts_range_array_add(differences, current_position, next_new_position);
96  }
97  if (in_old_range) old_index++;
98  if (in_new_range) new_index++;
99  in_old_range = !in_old_range;
100  in_new_range = !in_new_range;
101  current_position = next_new_position;
102  }
103  }
104 }
105 
106 typedef struct {
109  unsigned visible_depth;
111 } Iterator;
112 
114  TreeCursor *cursor,
115  const Subtree *tree,
116  const TSLanguage *language
117 ) {
118  array_clear(&cursor->stack);
119  array_push(&cursor->stack, ((TreeCursorEntry) {
120  .subtree = tree,
121  .position = length_zero(),
122  .child_index = 0,
123  .structural_child_index = 0,
124  }));
125  return (Iterator) {
126  .cursor = *cursor,
127  .language = language,
128  .visible_depth = 1,
129  .in_padding = false,
130  };
131 }
132 
133 static bool iterator_done(Iterator *self) {
134  return self->cursor.stack.size == 0;
135 }
136 
138  TreeCursorEntry entry = *array_back(&self->cursor.stack);
139  if (self->in_padding) {
140  return entry.position;
141  } else {
142  return length_add(entry.position, ts_subtree_padding(*entry.subtree));
143  }
144 }
145 
147  TreeCursorEntry entry = *array_back(&self->cursor.stack);
148  Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
149  if (self->in_padding) {
150  return result;
151  } else {
152  return length_add(result, ts_subtree_size(*entry.subtree));
153  }
154 }
155 
156 static bool iterator_tree_is_visible(const Iterator *self) {
157  TreeCursorEntry entry = *array_back(&self->cursor.stack);
158  if (ts_subtree_visible(*entry.subtree)) return true;
159  if (self->cursor.stack.size > 1) {
160  Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
161  return ts_language_alias_at(
162  self->language,
163  parent.ptr->production_id,
164  entry.structural_child_index
165  ) != 0;
166  }
167  return false;
168 }
169 
171  const Iterator *self,
172  Subtree *tree,
173  TSSymbol *alias_symbol,
174  uint32_t *start_byte
175 ) {
176  uint32_t i = self->cursor.stack.size - 1;
177 
178  if (self->in_padding) {
179  if (i == 0) return;
180  i--;
181  }
182 
183  for (; i + 1 > 0; i--) {
184  TreeCursorEntry entry = self->cursor.stack.contents[i];
185 
186  if (i > 0) {
187  const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
188  *alias_symbol = ts_language_alias_at(
189  self->language,
190  parent->ptr->production_id,
191  entry.structural_child_index
192  );
193  }
194 
195  if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
196  *tree = *entry.subtree;
197  *start_byte = entry.position.bytes;
198  break;
199  }
200  }
201 }
202 
203 static void iterator_ascend(Iterator *self) {
204  if (iterator_done(self)) return;
205  if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
206  if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
207  self->cursor.stack.size--;
208 }
209 
210 static bool iterator_descend(Iterator *self, uint32_t goal_position) {
211  if (self->in_padding) return false;
212 
213  bool did_descend;
214  do {
215  did_descend = false;
216  TreeCursorEntry entry = *array_back(&self->cursor.stack);
217  Length position = entry.position;
218  uint32_t structural_child_index = 0;
219  for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
220  const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
221  Length child_left = length_add(position, ts_subtree_padding(*child));
222  Length child_right = length_add(child_left, ts_subtree_size(*child));
223 
224  if (child_right.bytes > goal_position) {
225  array_push(&self->cursor.stack, ((TreeCursorEntry) {
226  .subtree = child,
227  .position = position,
228  .child_index = i,
229  .structural_child_index = structural_child_index,
230  }));
231 
232  if (iterator_tree_is_visible(self)) {
233  if (child_left.bytes > goal_position) {
234  self->in_padding = true;
235  } else {
236  self->visible_depth++;
237  }
238  return true;
239  }
240 
241  did_descend = true;
242  break;
243  }
244 
245  position = child_right;
246  if (!ts_subtree_extra(*child)) structural_child_index++;
247  }
248  } while (did_descend);
249 
250  return false;
251 }
252 
253 static void iterator_advance(Iterator *self) {
254  if (self->in_padding) {
255  self->in_padding = false;
256  if (iterator_tree_is_visible(self)) {
257  self->visible_depth++;
258  } else {
259  iterator_descend(self, 0);
260  }
261  return;
262  }
263 
264  for (;;) {
265  if (iterator_tree_is_visible(self)) self->visible_depth--;
266  TreeCursorEntry entry = array_pop(&self->cursor.stack);
267  if (iterator_done(self)) return;
268 
269  const Subtree *parent = array_back(&self->cursor.stack)->subtree;
270  uint32_t child_index = entry.child_index + 1;
271  if (ts_subtree_child_count(*parent) > child_index) {
272  Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
273  uint32_t structural_child_index = entry.structural_child_index;
274  if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
275  const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
276 
277  array_push(&self->cursor.stack, ((TreeCursorEntry) {
278  .subtree = next_child,
279  .position = position,
280  .child_index = child_index,
281  .structural_child_index = structural_child_index,
282  }));
283 
284  if (iterator_tree_is_visible(self)) {
285  if (ts_subtree_padding(*next_child).bytes > 0) {
286  self->in_padding = true;
287  } else {
288  self->visible_depth++;
289  }
290  } else {
291  iterator_descend(self, 0);
292  }
293  break;
294  }
295  }
296 }
297 
298 typedef enum {
303 
305  const Iterator *old_iter,
306  const Iterator *new_iter
307 ) {
308  Subtree old_tree = NULL_SUBTREE;
309  Subtree new_tree = NULL_SUBTREE;
310  uint32_t old_start = 0;
311  uint32_t new_start = 0;
312  TSSymbol old_alias_symbol = 0;
313  TSSymbol new_alias_symbol = 0;
314  iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
315  iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
316 
317  if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
318  if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
319 
320  if (
321  old_alias_symbol == new_alias_symbol &&
322  ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
323  ) {
324  if (old_start == new_start &&
325  !ts_subtree_has_changes(old_tree) &&
327  ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
330  (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
331  (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
332  return IteratorMatches;
333  } else {
334  return IteratorMayDiffer;
335  }
336  }
337 
338  return IteratorDiffers;
339 }
340 
341 #ifdef DEBUG_GET_CHANGED_RANGES
342 static inline void iterator_print_state(Iterator *self) {
343  TreeCursorEntry entry = *array_back(&self->cursor.stack);
346  const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
347  printf(
348  "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
349  name, self->in_padding ? "(p)" : " ",
350  self->visible_depth,
351  start.row + 1, start.column,
352  end.row + 1, end.column
353  );
354 }
355 #endif
356 
358  const Subtree *old_tree, const Subtree *new_tree,
359  TreeCursor *cursor1, TreeCursor *cursor2,
360  const TSLanguage *language,
361  const TSRangeArray *included_range_differences,
362  TSRange **ranges
363 ) {
364  TSRangeArray results = array_new();
365 
366  Iterator old_iter = iterator_new(cursor1, old_tree, language);
367  Iterator new_iter = iterator_new(cursor2, new_tree, language);
368 
369  unsigned included_range_difference_index = 0;
370 
371  Length position = iterator_start_position(&old_iter);
372  Length next_position = iterator_start_position(&new_iter);
373  if (position.bytes < next_position.bytes) {
374  ts_range_array_add(&results, position, next_position);
375  position = next_position;
376  } else if (position.bytes > next_position.bytes) {
377  ts_range_array_add(&results, next_position, position);
378  next_position = position;
379  }
380 
381  do {
382  #ifdef DEBUG_GET_CHANGED_RANGES
383  printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
384  iterator_print_state(&old_iter);
385  printf("\tvs\t");
386  iterator_print_state(&new_iter);
387  puts("");
388  #endif
389 
390  // Compare the old and new subtrees.
391  IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
392 
393  // Even if the two subtrees appear to be identical, they could differ
394  // internally if they contain a range of text that was previously
395  // excluded from the parse, and is now included, or vice-versa.
396  if (comparison == IteratorMatches && ts_range_array_intersects(
397  included_range_differences,
398  included_range_difference_index,
399  position.bytes,
400  iterator_end_position(&old_iter).bytes
401  )) {
402  comparison = IteratorMayDiffer;
403  }
404 
405  bool is_changed = false;
406  switch (comparison) {
407  // If the subtrees are definitely identical, move to the end
408  // of both subtrees.
409  case IteratorMatches:
410  next_position = iterator_end_position(&old_iter);
411  break;
412 
413  // If the subtrees might differ internally, descend into both
414  // subtrees, finding the first child that spans the current position.
415  case IteratorMayDiffer:
416  if (iterator_descend(&old_iter, position.bytes)) {
417  if (!iterator_descend(&new_iter, position.bytes)) {
418  is_changed = true;
419  next_position = iterator_end_position(&old_iter);
420  }
421  } else if (iterator_descend(&new_iter, position.bytes)) {
422  is_changed = true;
423  next_position = iterator_end_position(&new_iter);
424  } else {
425  next_position = length_min(
426  iterator_end_position(&old_iter),
427  iterator_end_position(&new_iter)
428  );
429  }
430  break;
431 
432  // If the subtrees are different, record a change and then move
433  // to the end of both subtrees.
434  case IteratorDiffers:
435  is_changed = true;
436  next_position = length_min(
437  iterator_end_position(&old_iter),
438  iterator_end_position(&new_iter)
439  );
440  break;
441  }
442 
443  // Ensure that both iterators are caught up to the current position.
444  while (
445  !iterator_done(&old_iter) &&
446  iterator_end_position(&old_iter).bytes <= next_position.bytes
447  ) iterator_advance(&old_iter);
448  while (
449  !iterator_done(&new_iter) &&
450  iterator_end_position(&new_iter).bytes <= next_position.bytes
451  ) iterator_advance(&new_iter);
452 
453  // Ensure that both iterators are at the same depth in the tree.
454  while (old_iter.visible_depth > new_iter.visible_depth) {
455  iterator_ascend(&old_iter);
456  }
457  while (new_iter.visible_depth > old_iter.visible_depth) {
458  iterator_ascend(&new_iter);
459  }
460 
461  if (is_changed) {
462  #ifdef DEBUG_GET_CHANGED_RANGES
463  printf(
464  " change: [[%u, %u] - [%u, %u]]\n",
465  position.extent.row + 1, position.extent.column,
466  next_position.extent.row + 1, next_position.extent.column
467  );
468  #endif
469 
470  ts_range_array_add(&results, position, next_position);
471  }
472 
473  position = next_position;
474 
475  // Keep track of the current position in the included range differences
476  // array in order to avoid scanning the entire array on each iteration.
477  while (included_range_difference_index < included_range_differences->size) {
478  const TSRange *range = &included_range_differences->contents[
479  included_range_difference_index
480  ];
481  if (range->end_byte <= position.bytes) {
482  included_range_difference_index++;
483  } else {
484  break;
485  }
486  }
487  } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
488 
489  Length old_size = ts_subtree_total_size(*old_tree);
490  Length new_size = ts_subtree_total_size(*new_tree);
491  if (old_size.bytes < new_size.bytes) {
492  ts_range_array_add(&results, old_size, new_size);
493  } else if (new_size.bytes < old_size.bytes) {
494  ts_range_array_add(&results, new_size, old_size);
495  }
496 
497  *cursor1 = old_iter.cursor;
498  *cursor2 = new_iter.cursor;
499  *ranges = results.contents;
500  return results.size;
501 }
lzma_index ** i
Definition: index.h:629
const char * ts_language_symbol_name(const TSLanguage *, TSSymbol)
Definition: language.c:59
#define array_new()
Definition: array.h:25
#define array_back(self)
Definition: array.h:33
#define array_push(self, element)
Definition: array.h:43
#define array_pop(self)
Definition: array.h:82
#define array_clear(self)
Definition: array.h:35
static ut8 bytes[32]
Definition: asm_arc.c:23
_Use_decl_annotations_ int __cdecl printf(const char *const _Format,...)
Definition: cs_driver.c:93
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
#define ERROR_STATE
Definition: error_costs.h:4
static void iterator_ascend(Iterator *self)
static void ts_range_array_add(TSRangeArray *self, Length start, Length end)
static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter)
static bool iterator_done(Iterator *self)
static void iterator_advance(Iterator *self)
static bool iterator_tree_is_visible(const Iterator *self)
static bool iterator_descend(Iterator *self, uint32_t goal_position)
static void iterator_get_visible_state(const Iterator *self, Subtree *tree, TSSymbol *alias_symbol, uint32_t *start_byte)
static Length iterator_end_position(Iterator *self)
void ts_range_array_get_changed_ranges(const TSRange *old_ranges, unsigned old_range_count, const TSRange *new_ranges, unsigned new_range_count, TSRangeArray *differences)
static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language)
static Length iterator_start_position(Iterator *self)
bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, uint32_t start_byte, uint32_t end_byte)
IteratorComparison
@ IteratorDiffers
@ IteratorMatches
@ IteratorMayDiffer
unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, TreeCursor *cursor2, const TSLanguage *language, const TSRangeArray *included_range_differences, TSRange **ranges)
voidpf void uLong size
Definition: ioapi.h:138
static TSSymbol ts_language_alias_at(const TSLanguage *self, uint32_t production_id, uint32_t child_index)
Definition: language.h:236
static Length length_zero(void)
Definition: length.h:39
static Length length_add(Length len1, Length len2)
Definition: length.h:25
static Length length_min(Length len1, Length len2)
Definition: length.h:21
static const Length LENGTH_MAX
Definition: length.h:15
int n
Definition: mipsasm.c:19
uint16_t TSSymbol
Definition: parser.h:19
#define ts_builtin_sym_error
Definition: parser.h:12
unsigned int uint32_t
Definition: sftypes.h:29
const TSLanguage * language
TreeCursor cursor
unsigned visible_depth
Definition: length.h:9
uint32_t bytes
Definition: length.h:10
TSPoint extent
Definition: length.h:11
uint16_t production_id
Definition: subtree.h:140
Definition: api.h:55
uint32_t row
Definition: api.h:56
uint32_t column
Definition: api.h:57
Definition: api.h:60
TSPoint end_point
Definition: api.h:62
uint32_t start_byte
Definition: api.h:63
TSPoint start_point
Definition: api.h:61
uint32_t end_byte
Definition: api.h:64
Definition: zipcmp.c:77
Definition: z80asm.h:102
#define ts_subtree_children(self)
Definition: subtree.h:233
static bool ts_subtree_visible(Subtree self)
Definition: subtree.h:214
static Length ts_subtree_total_size(Subtree self)
Definition: subtree.h:274
#define TS_TREE_STATE_NONE
Definition: subtree.h:18
static bool ts_subtree_extra(Subtree self)
Definition: subtree.h:216
static bool ts_subtree_has_changes(Subtree self)
Definition: subtree.h:217
static Length ts_subtree_size(Subtree self)
Definition: subtree.h:265
static TSStateId ts_subtree_parse_state(Subtree self)
Definition: subtree.h:220
static uint32_t ts_subtree_child_count(Subtree self)
Definition: subtree.h:282
#define NULL_SUBTREE
Definition: subtree.h:19
static Length ts_subtree_padding(Subtree self)
Definition: subtree.h:256
static TSSymbol ts_subtree_symbol(Subtree self)
Definition: subtree.h:213
const SubtreeHeapData * ptr
Definition: subtree.h:158