Rizin
unix-like reverse engineering framework and cli tools
scanner.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2020 ret2libc <sirmy15@gmail.com>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <tree_sitter/parser.h>
5 #include <ctype.h>
6 #include <wctype.h>
7 #include <stdio.h>
8 #include <string.h>
9 
10 #define CMD_IDENTIFIER_MAX_LENGTH 32
11 #define ESCAPE_CHAR '\\'
12 
13 enum TokenType {
21 };
22 
24  return NULL;
25 }
26 
28 }
29 
30 unsigned tree_sitter_rzcmd_external_scanner_serialize(void *payload, char *buffer) {
31  return 0;
32 }
33 
34 void tree_sitter_rzcmd_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
35 }
36 
37 static bool is_pf_cmd(const char *s) {
38  return (strcmp (s, "pfo") && !strncmp (s, "pf", 2)) || !strcmp (s, "Cf");
39 }
40 
41 static bool is_env_cmd(const char *s) {
42  return !strncmp (s, "env", 3);
43 }
44 
45 static bool is_at_cmd(const char *s) {
46  return s[0] == '@';
47 }
48 
49 static bool is_remote_cmd(const char *s) {
50  return s[0] == 'R';
51 }
52 
53 static bool is_comment(const char *s) {
54  return !strncmp (s, "/*", 2) || !strcmp (s, "#");
55 }
56 
57 static bool is_interpret_cmd(const char *s) {
58  return s[0] == '.';
59 }
60 
61 static bool is_special_start(const int32_t ch) {
62  return ch == '*' || ch == '(' || ch == '@' || ch == '|' || ch == '>' ||
63  ch == '.' || ch == '|' || ch == '%' || ch == '~' ||
64  ch == '!';
65 }
66 
67 static bool is_start_of_command(const int32_t ch) {
68  return iswalpha (ch) || ch == '$' || ch == '?' || ch == ':' || ch == '+' ||
69  ch == '=' || ch == '/' || ch == '_' || ch == '#' || ch == '\\' ||
70  ch == '-' || ch == '<' || ch == '&' || is_special_start (ch);
71 }
72 
73 static bool is_mid_command(const char *res, int len, const int32_t ch) {
74  if (ch == ESCAPE_CHAR) {
75  return true;
76  }
77  if (res[0] == '#') {
78  if (len == 1) {
79  return ch == '!' || ch == '?';
80  }
81  return ch == '?';
82  } else if (res[0] == '<') {
83  return ch == '?';
84  }
85  return iswalnum (ch) || ch == '$' || ch == '?' || ch == '.' || ch == '!' ||
86  ch == '+' || ch == '=' || ch == '/' || ch == '*' ||
87  ch == '-' || ch == '&' || ch == '_' ||
88  (is_interpret_cmd (res) && ch == '(') ||
89  (is_remote_cmd (res) && ch == '<') || (is_at_cmd (res) && ch == '@');
90 }
91 
92 static bool is_concat(const int32_t ch) {
93  return ch != '\0' && !iswspace(ch) && ch != '#' && ch != '@' &&
94  ch != '|' && ch != '>' && ch != ';' &&
95  ch != ')' && ch != '`' && ch != '~' && ch != '\\';
96 }
97 
98 static bool is_concat_pf_dot(const int32_t ch) {
99  return is_concat(ch) && ch != '=';
100 }
101 
102 static bool is_concat_eq_sep(const int32_t ch) {
103  return is_concat(ch) && ch != '=';
104 }
105 
106 static bool is_recursive_help(const int32_t before_last_ch, const int32_t last_ch) {
107  return before_last_ch == '?' && last_ch == '*';
108 }
109 
110 static bool is_recursive_help_json(const int32_t trd_last_ch, const int32_t snd_last_ch, const int32_t last_ch) {
111  return trd_last_ch == '?' && snd_last_ch == '*' && last_ch == 'j';
112 }
113 
114 static bool scan_number(TSLexer *lexer, const bool *valid_symbols) {
115  if (!valid_symbols[FILE_DESCRIPTOR]) {
116  return false;
117  }
118 
119  // skip spaces at the beginning
120  while (iswspace (lexer->lookahead)) {
121  lexer->advance (lexer, true);
122  }
123 
124  if (!iswdigit (lexer->lookahead)) {
125  return false;
126  }
127  lexer->advance (lexer, false);
128  for (;;) {
129  if (iswdigit (lexer->lookahead)) {
130  lexer->advance (lexer, false);
131  } else if (lexer->lookahead != '>') {
132  return false;
133  } else {
134  break;
135  }
136  }
137  if (lexer->lookahead == '>') {
139  return true;
140  }
141  return false;
142 }
143 
144 bool tree_sitter_rzcmd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
145  if (valid_symbols[SPEC_SEP] && lexer->lookahead == ':') {
146  lexer->advance(lexer, false);
147  lexer->result_symbol = SPEC_SEP;
148  return true;
149  } else if (valid_symbols[CONCAT] && is_concat(lexer->lookahead)) {
150  lexer->result_symbol = CONCAT;
151  return true;
152  } else if (valid_symbols[CONCAT_PF_DOT] && is_concat_pf_dot(lexer->lookahead)) {
153  lexer->result_symbol = CONCAT_PF_DOT;
154  return true;
155  } else if (valid_symbols[EQ_SEP_CONCAT] && is_concat_eq_sep(lexer->lookahead)) {
156  lexer->result_symbol = EQ_SEP_CONCAT;
157  return true;
158  }
159  if (valid_symbols[CMD_IDENTIFIER] || valid_symbols[HELP_STMT]) {
160  char res[CMD_IDENTIFIER_MAX_LENGTH + 1];
161  int i_res = 0;
162 
163  while (iswspace (lexer->lookahead)) {
164  lexer->advance (lexer, true);
165  }
166 
167  if (!is_start_of_command (lexer->lookahead)) {
168  return false;
169  }
170  res[i_res++] = lexer->lookahead;
171  lexer->advance (lexer, false);
172  while (lexer->lookahead && i_res < CMD_IDENTIFIER_MAX_LENGTH && is_mid_command (res, i_res, lexer->lookahead)) {
173  if (lexer->lookahead == ESCAPE_CHAR) {
174  // ignore escape char and just get the next one, whatever it is
175  lexer->advance (lexer, false);
176  }
177  res[i_res++] = lexer->lookahead;
178  lexer->advance (lexer, false);
179  }
180  res[i_res] = '\0';
181  if (is_comment (res)) {
182  return false;
183  }
184  // ?? is not considered an help command, just a regular one
185  if ((res[i_res - 1] == '?' && strcmp (res, "??") != 0) ||
186  (i_res > 2 && is_recursive_help (res[i_res - 2], res[i_res - 1])) ||
187  (i_res > 3 && is_recursive_help_json (res[i_res - 3], res[i_res - 2], res[i_res - 1]))) {
188  if (i_res == 1) {
189  return false;
190  }
191  lexer->result_symbol = HELP_STMT;
192  } else {
193  if ((is_special_start(res[0]) && strcmp(res, "R=!")) || is_pf_cmd(res) || is_env_cmd(res) || is_at_cmd(res) || !valid_symbols[CMD_IDENTIFIER]) {
194  return false;
195  }
196  lexer->result_symbol = CMD_IDENTIFIER;
197  }
198  return true;
199  }
200  if (valid_symbols[FILE_DESCRIPTOR]) {
201  return scan_number (lexer, valid_symbols);
202  }
203  return false;
204 }
size_t len
Definition: 6502dis.c:15
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
static RzSocket * s
Definition: rtr.c:28
static bool scan_number(TSLexer *lexer, const bool *valid_symbols)
Definition: scanner.c:114
#define CMD_IDENTIFIER_MAX_LENGTH
Definition: scanner.c:10
static bool is_recursive_help_json(const int32_t trd_last_ch, const int32_t snd_last_ch, const int32_t last_ch)
Definition: scanner.c:110
static bool is_concat(const int32_t ch)
Definition: scanner.c:92
static bool is_interpret_cmd(const char *s)
Definition: scanner.c:57
static bool is_concat_eq_sep(const int32_t ch)
Definition: scanner.c:102
void tree_sitter_rzcmd_external_scanner_destroy(void *payload)
Definition: scanner.c:27
void * tree_sitter_rzcmd_external_scanner_create()
Definition: scanner.c:23
static bool is_recursive_help(const int32_t before_last_ch, const int32_t last_ch)
Definition: scanner.c:106
static bool is_at_cmd(const char *s)
Definition: scanner.c:45
static bool is_mid_command(const char *res, int len, const int32_t ch)
Definition: scanner.c:73
static bool is_pf_cmd(const char *s)
Definition: scanner.c:37
void tree_sitter_rzcmd_external_scanner_deserialize(void *payload, const char *buffer, unsigned length)
Definition: scanner.c:34
bool tree_sitter_rzcmd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols)
Definition: scanner.c:144
static bool is_env_cmd(const char *s)
Definition: scanner.c:41
TokenType
Definition: scanner.c:13
@ EQ_SEP_CONCAT
Definition: scanner.c:17
@ HELP_STMT
Definition: scanner.c:15
@ CMD_IDENTIFIER
Definition: scanner.c:14
@ FILE_DESCRIPTOR
Definition: scanner.c:16
@ SPEC_SEP
Definition: scanner.c:20
@ CONCAT
Definition: scanner.c:18
@ CONCAT_PF_DOT
Definition: scanner.c:19
static bool is_comment(const char *s)
Definition: scanner.c:53
#define ESCAPE_CHAR
Definition: scanner.c:11
static bool is_special_start(const int32_t ch)
Definition: scanner.c:61
static bool is_concat_pf_dot(const int32_t ch)
Definition: scanner.c:98
static bool is_start_of_command(const int32_t ch)
Definition: scanner.c:67
unsigned tree_sitter_rzcmd_external_scanner_serialize(void *payload, char *buffer)
Definition: scanner.c:30
static bool is_remote_cmd(const char *s)
Definition: scanner.c:49
int int32_t
Definition: sftypes.h:33
Definition: parser.h:43
void(* advance)(TSLexer *, bool)
Definition: parser.h:46
int32_t lookahead
Definition: parser.h:44
TSSymbol result_symbol
Definition: parser.h:45
Definition: buffer.h:15