Rizin
unix-like reverse engineering framework and cli tools
lexer.h File Reference
#include "./length.h"
#include "./subtree.h"
#include "tree_sitter/api.h"
#include "tree_sitter/parser.h"

Go to the source code of this file.

Classes

struct  Lexer
 

Functions

void ts_lexer_init (Lexer *)
 
void ts_lexer_delete (Lexer *)
 
void ts_lexer_set_input (Lexer *, TSInput)
 
void ts_lexer_reset (Lexer *, Length)
 
void ts_lexer_start (Lexer *)
 
void ts_lexer_finish (Lexer *, uint32_t *)
 
void ts_lexer_advance_to_end (Lexer *)
 
void ts_lexer_mark_end (Lexer *)
 
bool ts_lexer_set_included_ranges (Lexer *self, const TSRange *ranges, uint32_t count)
 
TSRangets_lexer_included_ranges (const Lexer *self, uint32_t *count)
 

Function Documentation

◆ ts_lexer_advance_to_end()

void ts_lexer_advance_to_end ( Lexer self)

Definition at line 357 of file lexer.c.

357  {
358  while (self->chunk) {
359  ts_lexer__advance(&self->data, false);
360  }
361 }
static void ts_lexer__advance(TSLexer *_self, bool skip)
Definition: lexer.c:200
TSLexer data
Definition: lexer.h:14
const char * chunk
Definition: lexer.h:20

References ts_lexer__advance().

Referenced by parser__halt_parse().

◆ ts_lexer_delete()

void ts_lexer_delete ( Lexer self)

Definition at line 304 of file lexer.c.

304  {
305  ts_free(self->included_ranges);
306 }
#define ts_free
Definition: alloc.h:30
TSRange * included_ranges
Definition: lexer.h:19

References ts_free.

Referenced by ts_parser_delete().

◆ ts_lexer_finish()

void ts_lexer_finish ( Lexer self,
uint32_t lookahead_end_byte 
)

Definition at line 337 of file lexer.c.

337  {
339  ts_lexer__mark_end(&self->data);
340  }
341 
342  uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
343 
344  // In order to determine that a byte sequence is invalid UTF8 or UTF16,
345  // the character decoding algorithm may have looked at the following byte.
346  // Therefore, the next byte *after* the current (invalid) character
347  // affects the interpretation of the current character.
348  if (self->data.lookahead == TS_DECODE_ERROR) {
349  current_lookahead_end_byte++;
350  }
351 
352  if (current_lookahead_end_byte > *lookahead_end_byte) {
353  *lookahead_end_byte = current_lookahead_end_byte;
354  }
355 }
static bool length_is_undefined(Length length)
Definition: length.h:17
static void ts_lexer__mark_end(TSLexer *_self)
Definition: lexer.c:215
unsigned int uint32_t
Definition: sftypes.h:29
Length token_end_position
Definition: lexer.h:17
int32_t lookahead
Definition: parser.h:44
static const int32_t TS_DECODE_ERROR
Definition: unicode.h:16

References length_is_undefined(), TS_DECODE_ERROR, and ts_lexer__mark_end().

Referenced by ts_parser__lex().

◆ ts_lexer_included_ranges()

TSRange* ts_lexer_included_ranges ( const Lexer self,
uint32_t count 
)

Definition at line 395 of file lexer.c.

395  {
396  *count = self->included_range_count;
397  return self->included_ranges;
398 }
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98

References count.

Referenced by ts_parser_included_ranges().

◆ ts_lexer_init()

void ts_lexer_init ( Lexer self)

Definition at line 275 of file lexer.c.

275  {
276  *self = (Lexer) {
277  .data = {
278  // The lexer's methods are stored as struct fields so that generated
279  // parsers can call them without needing to be linked against this
280  // library.
281  .advance = ts_lexer__advance,
282  .mark_end = ts_lexer__mark_end,
283  .get_column = ts_lexer__get_column,
284  .is_at_included_range_start = ts_lexer__is_at_included_range_start,
285  .eof = ts_lexer__eof,
286  .lookahead = 0,
287  .result_symbol = 0,
288  },
289  .chunk = NULL,
290  .chunk_size = 0,
291  .chunk_start = 0,
292  .current_position = {0, {0, 0}},
293  .logger = {
294  .payload = NULL,
295  .log = NULL
296  },
297  .included_ranges = NULL,
298  .included_range_count = 0,
299  .current_included_range_index = 0,
300  };
302 }
#define NULL
Definition: cris-opc.c:27
static uint32_t ts_lexer__get_column(TSLexer *_self)
Definition: lexer.c:239
static bool ts_lexer__eof(const TSLexer *_self)
Definition: lexer.c:42
static bool ts_lexer__is_at_included_range_start(const TSLexer *_self)
Definition: lexer.c:265
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count)
Definition: lexer.c:367
Definition: lexer.h:13

References NULL, ts_lexer__advance(), ts_lexer__eof(), ts_lexer__get_column(), ts_lexer__is_at_included_range_start(), ts_lexer__mark_end(), and ts_lexer_set_included_ranges().

Referenced by parser_init(), and ts_parser_new().

◆ ts_lexer_mark_end()

void ts_lexer_mark_end ( Lexer self)

Definition at line 363 of file lexer.c.

363  {
364  ts_lexer__mark_end(&self->data);
365 }

References ts_lexer__mark_end().

Referenced by ts_parser__handle_error().

◆ ts_lexer_reset()

void ts_lexer_reset ( Lexer self,
Length  position 
)

Definition at line 316 of file lexer.c.

316  {
317  if (position.bytes != self->current_position.bytes) {
318  ts_lexer_goto(self, position);
319  }
320 }
static void ts_lexer_goto(Lexer *self, Length position)
Definition: lexer.c:105
uint32_t bytes
Definition: length.h:10
Length current_position
Definition: lexer.h:15

References Length::bytes, and ts_lexer_goto().

Referenced by parser__lex(), ts_parser__handle_error(), ts_parser__lex(), and ts_parser_reset().

◆ ts_lexer_set_included_ranges()

bool ts_lexer_set_included_ranges ( Lexer self,
const TSRange ranges,
uint32_t  count 
)

Definition at line 367 of file lexer.c.

371  {
372  if (count == 0 || !ranges) {
373  ranges = &DEFAULT_RANGE;
374  count = 1;
375  } else {
376  uint32_t previous_byte = 0;
377  for (unsigned i = 0; i < count; i++) {
378  const TSRange *range = &ranges[i];
379  if (
380  range->start_byte < previous_byte ||
381  range->end_byte < range->start_byte
382  ) return false;
383  previous_byte = range->end_byte;
384  }
385  }
386 
387  size_t size = count * sizeof(TSRange);
388  self->included_ranges = ts_realloc(self->included_ranges, size);
389  memcpy(self->included_ranges, ranges, size);
390  self->included_range_count = count;
391  ts_lexer_goto(self, self->current_position);
392  return true;
393 }
#define ts_realloc
Definition: alloc.h:27
lzma_index ** i
Definition: index.h:629
voidpf void uLong size
Definition: ioapi.h:138
static const TSRange DEFAULT_RANGE
Definition: lexer.c:26
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
Definition: api.h:60

References count, DEFAULT_RANGE, i, memcpy(), capstone::range, ts_lexer_goto(), and ts_realloc.

Referenced by ts_lexer_init(), and ts_parser_set_included_ranges().

◆ ts_lexer_set_input()

void ts_lexer_set_input ( Lexer self,
TSInput  input 
)

Definition at line 308 of file lexer.c.

308  {
309  self->input = input;
310  ts_lexer__clear_chunk(self);
311  ts_lexer_goto(self, self->current_position);
312 }
static void ts_lexer__clear_chunk(Lexer *self)
Definition: lexer.c:49
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)

References input(), ts_lexer__clear_chunk(), and ts_lexer_goto().

Referenced by parser__start(), and ts_parser_parse().

◆ ts_lexer_start()

void ts_lexer_start ( Lexer self)

Definition at line 322 of file lexer.c.

322  {
323  self->token_start_position = self->current_position;
324  self->token_end_position = LENGTH_UNDEFINED;
325  self->data.result_symbol = 0;
326  self->did_get_column = false;
327  if (!ts_lexer__eof(&self->data)) {
328  if (!self->chunk_size) ts_lexer__get_chunk(self);
329  if (!self->lookahead_size) ts_lexer__get_lookahead(self);
330  if (
331  self->current_position.bytes == 0 &&
333  ) ts_lexer__advance(&self->data, true);
334  }
335 }
static const Length LENGTH_UNDEFINED
Definition: length.h:14
static void ts_lexer__get_lookahead(Lexer *self)
Definition: lexer.c:74
static const int32_t BYTE_ORDER_MARK
Definition: lexer.c:24
static void ts_lexer__get_chunk(Lexer *self)
Definition: lexer.c:57
uint32_t lookahead_size
Definition: lexer.h:28
uint32_t chunk_size
Definition: lexer.h:27

References BYTE_ORDER_MARK, LENGTH_UNDEFINED, ts_lexer__advance(), ts_lexer__eof(), ts_lexer__get_chunk(), and ts_lexer__get_lookahead().

Referenced by parser__lex(), and ts_parser__lex().