Rizin
unix-like reverse engineering framework and cli tools
engine.c
Go to the documentation of this file.
1 /* $OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $ */
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5  * Copyright (c) 1992, 1993, 1994
6  * The Regents of the University of California. All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Henry Spencer.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  * may be used to endorse or promote products derived from this software
21  * without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * @(#)engine.c 8.5 (Berkeley) 3/20/94
36  */
37 
38 /*
39  * The matching engine and friends. This file is #included by regexec.c
40  * after suitable #defines of a variety of macros used herein, so that
41  * different state representations can be used without duplicating masses
42  * of code.
43  */
44 
45 #ifdef SNAMES
46 #define matcher smatcher
47 #define fast sfast
48 #define slow sslow
49 #define dissect sdissect
50 #define backref sbackref
51 #define step sstep
52 #define print sprint
53 #define at sat
54 #define match smat
55 #define nope snope
56 #endif
57 #ifdef LNAMES
58 #define matcher lmatcher
59 #define fast lfast
60 #define slow lslow
61 #define dissect ldissect
62 #define backref lbackref
63 #define step lstep
64 #define print lprint
65 #define at lat
66 #define match lmat
67 #define nope lnope
68 #endif
69 
70 /* another structure passed up and down to avoid zillions of parameters */
71 struct match {
72  struct re_guts *g;
73  int eflags;
74  RzRegexMatch *pmatch; /* [nsub+1] (0 element unused) */
75  char *offp; /* offsets work from here */
76  char *beginp; /* start of string -- virtual NUL precedes */
77  char *endp; /* end of string -- virtual NUL here */
78  char *coldp; /* can be no match starting before here */
79  char **lastpos; /* [nplus+1] */
81  states st; /* current states */
82  states fresh; /* states for a fresh start */
83  states tmp; /* temporary */
84  states empty; /* empty set of states */
85 };
86 
87 static int matcher(struct re_guts *, char *, size_t, RzRegexMatch[], int);
88 static char *dissect(struct match *, char *, char *, sopno, sopno);
89 static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
90 static char *fast(struct match *, char *, char *, sopno, sopno);
91 static char *slow(struct match *, char *, char *, sopno, sopno);
92 static states step(struct re_guts *, sopno, sopno, states, int, states);
93 #define MAX_RECURSION 100
94 #define BOL (OUT + 1)
95 #define EOL (BOL + 1)
96 #define BOLEOL (BOL + 2)
97 #define NOTHING (BOL + 3)
98 #define BOW (BOL + 4)
99 #define EOW (BOL + 5)
100 #define CODEMAX (BOL + 5) /* highest code used */
101 #define NONCHAR(c) ((c) > OUT)
102 #define NNONCHAR (CODEMAX - OUT)
103 #ifdef REDEBUG
104 static void print(struct match *, char *, states, int, FILE *);
105 #endif
106 #ifdef REDEBUG
107 static void at(struct match *, char *, char *, char *, sopno, sopno);
108 #endif
109 #ifdef REDEBUG
110 static char *pchar(int);
111 #endif
112 
113 #ifdef REDEBUG
114 #define SP(t, s, c) print(m, t, s, c, stdout)
115 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
116 #define NOTE(str) \
117  { \
118  if (m->eflags & RZ_REGEX_TRACE) \
119  (void)printf("=%s\n", (str)); \
120  }
121 static int nope = 0;
122 #else
123 #define SP(t, s, c) /* nothing */
124 #define AT(t, p1, p2, s1, s2) /* nothing */
125 #define NOTE(s) /* nothing */
126 #endif
127 
128 /*
129  - matcher - the actual matching engine
130  */
131 static int /* 0 success, RZ_REGEX_NOMATCH failure */
132 matcher(struct re_guts *g, char *string, size_t nmatch, RzRegexMatch pmatch[],
133  int eflags) {
134  char *endp;
135  int i;
136  struct match mv;
137  struct match *m = &mv;
138  char *dp;
139  const sopno gf = g->firststate + 1; /* +1 for OEND */
140  const sopno gl = g->laststate;
141  char *start;
142  char *stop;
143 
144  /* simplify the situation where possible */
145  if (g->cflags & RZ_REGEX_NOSUB)
146  nmatch = 0;
147  if (eflags & RZ_REGEX_STARTEND) {
148  start = string + pmatch[0].rm_so;
149  stop = string + pmatch[0].rm_eo;
150  } else {
151  start = string;
152  stop = start + strlen(start);
153  }
154  if (stop < start)
155  return (RZ_REGEX_INVARG);
156 
157  /* prescreening; this does wonders for this rather slow code */
158  if (g->must != NULL) {
159  for (dp = start; dp < stop; dp++)
160  if (*dp == g->must[0] && stop - dp >= g->mlen &&
161  memcmp(dp, g->must, (size_t)g->mlen) == 0)
162  break;
163  if (dp == stop) /* we didn't find g->must */
164  return (RZ_REGEX_NOMATCH);
165  }
166 
167  /* match struct setup */
168  m->g = g;
169  m->eflags = eflags;
170  m->pmatch = NULL;
171  m->lastpos = NULL;
172  m->offp = string;
173  m->beginp = start;
174  m->endp = stop;
175 
176  if (m->g->nstates * 4 < m->g->nstates)
177  return RZ_REGEX_NOMATCH;
178  STATESETUP(m, 4);
179  SETUP(m->st);
180  SETUP(m->fresh);
181  SETUP(m->tmp);
182  SETUP(m->empty);
183  CLEAR(m->empty);
184 
185  /* this loop does only one repetition except for backrefs */
186  for (;;) {
187  endp = fast(m, start, stop, gf, gl);
188  if (!endp) { /* a miss */
189  free(m->pmatch);
190  free(m->lastpos);
191  STATETEARDOWN(m);
192  return (RZ_REGEX_NOMATCH);
193  }
194  if (nmatch == 0 && !g->backrefs)
195  break; /* no further info needed */
196 
197  /* where? */
198  if (!m->coldp) {
199  break;
200  }
201  for (;;) {
202  NOTE("finding start");
203  endp = slow(m, m->coldp, stop, gf, gl);
204  if (endp || m->coldp > m->endp) {
205  break;
206  }
207  m->coldp++;
208  }
209  if (nmatch == 1 && !g->backrefs)
210  break; /* no further info needed */
211 
212  /* oh my, he wants the subexpressions... */
213  if (!m->pmatch) {
214  if ((m->g->nsub + 1) * sizeof(RzRegexMatch) < m->g->nsub) {
215  return RZ_REGEX_ESPACE;
216  }
217  m->pmatch = (RzRegexMatch *)malloc((m->g->nsub + 1) *
218  sizeof(RzRegexMatch));
219  }
220  if (!m->pmatch) {
221  STATETEARDOWN(m);
222  return (RZ_REGEX_ESPACE);
223  }
224  for (i = 1; i <= m->g->nsub; i++)
225  m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
226  if (!g->backrefs && !(m->eflags & RZ_REGEX_BACKR)) {
227  NOTE("dissecting");
228  dp = dissect(m, m->coldp, endp, gf, gl);
229  } else {
230  if (g->nplus > 0 && !m->lastpos) {
231  if ((g->nplus + 1) * sizeof(char *) < g->nplus) {
232  free(m->pmatch);
233  STATETEARDOWN(m);
234  return RZ_REGEX_ESPACE;
235  }
236  m->lastpos = (char **)malloc((g->nplus + 1) *
237  sizeof(char *));
238  }
239  if (g->nplus > 0 && !m->lastpos) {
240  free(m->pmatch);
241  STATETEARDOWN(m);
242  return (RZ_REGEX_ESPACE);
243  }
244  NOTE("backref dissect");
245  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
246  }
247  if (dp) {
248  break;
249  }
250  /* uh-oh... we couldn't find a subexpression-level match */
251  if (!g->backrefs) { /* must be back references doing it */
252  break;
253  }
254  if (g->nplus || !m->lastpos) {
255  break;
256  }
257  for (;;) {
258  if (dp != NULL || endp <= m->coldp)
259  break; /* defeat */
260  NOTE("backoff");
261  endp = slow(m, m->coldp, endp - 1, gf, gl);
262  if (!endp)
263  break; /* defeat */
264  /* try it on a shorter possibility */
265 #ifndef NDEBUG
266  for (i = 1; i <= m->g->nsub; i++) {
267  if (m->pmatch[i].rm_so != -1) {
268  break;
269  }
270  if (m->pmatch[i].rm_eo != -1) {
271  break;
272  }
273  }
274 #endif
275  NOTE("backoff dissect");
276  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
277  }
278  if (dp != NULL || dp != endp) /* found a shorter one */
279  break;
280 
281  /* despite initial appearances, there is no match here */
282  NOTE("false alarm");
283  if (m->coldp == stop)
284  break;
285  start = m->coldp + 1; /* recycle starting later */
286  }
287 
288  /* fill in the details if requested */
289  if (nmatch > 0) {
290  pmatch[0].rm_so = m->coldp - m->offp;
291  pmatch[0].rm_eo = endp - m->offp;
292  }
293  if (nmatch > 1) {
294  if (m->pmatch) {
295  for (i = 1; i < nmatch; i++) {
296  if (i <= m->g->nsub) {
297  pmatch[i] = m->pmatch[i];
298  } else {
299  pmatch[i].rm_so = -1;
300  pmatch[i].rm_eo = -1;
301  }
302  }
303  }
304  }
305 
306  if (m->pmatch != NULL)
307  free((char *)m->pmatch);
308  if (m->lastpos != NULL)
309  free((char *)m->lastpos);
310  STATETEARDOWN(m);
311  return (0);
312 }
313 
314 /*
315  - dissect - figure out what matched what, no back references
316  */
317 static char * /* == stop (success) always */
318 dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
319  int i;
320  sopno ss; /* start sop of current subRE */
321  sopno es; /* end sop of current subRE */
322  char *sp; /* start of string matched by it */
323  char *stp; /* string matched by it cannot pass here */
324  char *rest; /* start of rest of string */
325  char *tail; /* string unmatched by rest of RE */
326  sopno ssub; /* start sop of subsubRE */
327  sopno esub; /* end sop of subsubRE */
328  char *ssp; /* start of string matched by subsubRE */
329  char *sep; /* end of string matched by subsubRE */
330  char *oldssp; /* previous ssp */
331  char *dp;
332 
333  AT("diss", start, stop, startst, stopst);
334  sp = start;
335  for (ss = startst; ss < stopst; ss = es) {
336  /* identify end of subRE */
337  es = ss;
338  switch (OP(m->g->strip[es])) {
339  case OPLUS_:
340  case OQUEST_:
341  es += OPND(m->g->strip[es]);
342  break;
343  case OCH_:
344  while (OP(m->g->strip[es]) != O_CH)
345  es += OPND(m->g->strip[es]);
346  break;
347  }
348  es++;
349 
350  /* figure out what it matched */
351  switch (OP(m->g->strip[ss])) {
352  case OEND:
353  break;
354  case OCHAR:
355  sp++;
356  break;
357  case OBOL:
358  case OEOL:
359  case OBOW:
360  case OEOW:
361  break;
362  case OANY:
363  case OANYOF:
364  sp++;
365  break;
366  case OBACK_:
367  case O_BACK:
368  break;
369  /* cases where length of match is hard to find */
370  case OQUEST_:
371  stp = stop;
372  for (;;) {
373  /* how long could this one be? */
374  rest = slow(m, sp, stp, ss, es);
375  if (rest) { /* it did match */
376  /* could the rest match the rest? */
377  tail = slow(m, rest, stop, es, stopst);
378  if (tail == stop)
379  break; /* yes! */
380  /* no -- try a shorter match for this one */
381  stp = rest - 1;
382  }
383  }
384  ssub = ss + 1;
385  esub = es - 1;
386  /* did innards match? */
387  if (slow(m, sp, rest, ssub, esub) != NULL) {
388  dp = dissect(m, sp, rest, ssub, esub);
389  if (dp != rest)
390  return NULL;
391  } else if (sp != rest)
392  return NULL;
393  sp = rest;
394  break;
395  case OPLUS_:
396  stp = stop;
397  for (;;) {
398  /* how long could this one be? */
399  rest = slow(m, sp, stp, ss, es);
400  if (rest != NULL) { /* it did match */
401  /* could the rest match the rest? */
402  tail = slow(m, rest, stop, es, stopst);
403  if (tail == stop)
404  break; /* yes! */
405  /* no -- try a shorter match for this one */
406  stp = rest - 1;
407  }
408  }
409  ssub = ss + 1;
410  esub = es - 1;
411  ssp = sp;
412  oldssp = ssp;
413  for (;;) { /* find last match of innards */
414  sep = slow(m, ssp, rest, ssub, esub);
415  if (!sep || sep == ssp)
416  break; /* failed or matched null */
417  oldssp = ssp; /* on to next try */
418  ssp = sep;
419  }
420  if (!sep) {
421  /* last successful match */
422  sep = ssp;
423  ssp = oldssp;
424  }
425  if (sep == rest) { /* must exhaust substring */
426  if (slow(m, ssp, sep, ssub, esub) == rest) {
427  dp = dissect(m, ssp, sep, ssub, esub);
428  if (dp == sep) {
429  sp = rest;
430  }
431  }
432  }
433  break;
434  case OCH_:
435  stp = stop;
436  for (;;) {
437  /* how long could this one be? */
438  rest = slow(m, sp, stp, ss, es);
439  if (rest) { /* it did match */
440  /* could the rest match the rest? */
441  tail = slow(m, rest, stop, es, stopst);
442  if (tail == stop)
443  break; /* yes! */
444  /* no -- try a shorter match for this one */
445  stp = rest - 1;
446  }
447  }
448  ssub = ss + 1;
449  esub = ss + OPND(m->g->strip[ss]) - 1;
450  if (OP(m->g->strip[esub]) != OOR1) {
451  break;
452  }
453  for (;;) { /* find first matching branch */
454  if (slow(m, sp, rest, ssub, esub) == rest)
455  break; /* it matched all of it */
456  /* that one missed, try next one */
457  if (OP(m->g->strip[esub]) == OOR1) {
458  esub++;
459  if (OP(m->g->strip[esub]) == OOR2) {
460  ssub = esub + 1;
461  esub += OPND(m->g->strip[esub]);
462  if (OP(m->g->strip[esub]) == OOR2) {
463  esub--;
464  } else {
465  if (OP(m->g->strip[esub]) != O_CH) {
466  break;
467  }
468  }
469  }
470  }
471  }
472  dp = dissect(m, sp, rest, ssub, esub);
473  if (dp == rest) {
474  sp = rest;
475  }
476  break;
477  case O_PLUS:
478  case O_QUEST:
479  case OOR1:
480  case OOR2:
481  case O_CH:
482  break;
483  case OLPAREN:
484  i = OPND(m->g->strip[ss]);
485  if (i > 0 && i <= m->g->nsub) {
486  m->pmatch[i].rm_so = sp - m->offp;
487  }
488  break;
489  case ORPAREN:
490  i = OPND(m->g->strip[ss]);
491  if (i > 0 && i <= m->g->nsub) {
492  m->pmatch[i].rm_eo = sp - m->offp;
493  }
494  break;
495  default: /* uh oh */
496  break;
497  }
498  }
499 
500  if (sp == stop) {
501  return sp;
502  } else {
503  return NULL;
504  }
505 }
506 
507 /*
508  - backref - figure out what matched what, figuring in back references
509  */
510 static char * /* == stop (success) or NULL (failure) */
511 backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
512  sopno lev, int rec) /* PLUS nesting level */
513 {
514  int i;
515  sopno ss; /* start sop of current subRE */
516  char *sp; /* start of string matched by it */
517  sopno ssub; /* start sop of subsubRE */
518  sopno esub; /* end sop of subsubRE */
519  char *ssp; /* start of string matched by subsubRE */
520  char *dp;
521  size_t len;
522  int hard;
523  sop s;
524  ut64 offsave;
525  cset *cs;
526 
527  AT("back", start, stop, startst, stopst);
528  sp = start;
529 
530  /* get as far as we can with easy stuff */
531  hard = 0;
532  for (ss = startst; !hard && ss < stopst; ss++)
533  switch (OP(s = m->g->strip[ss])) {
534  case OCHAR:
535  if (sp == stop || *sp++ != (char)OPND(s))
536  return (NULL);
537  break;
538  case OANY:
539  if (sp == stop)
540  return (NULL);
541  sp++;
542  break;
543  case OANYOF:
544  cs = &m->g->sets[OPND(s)];
545  if (sp == stop || !CHIN(cs, *sp++))
546  return (NULL);
547  break;
548  case OBOL:
549  if ((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
550  (sp < m->endp && *(sp - 1) == '\n' &&
551  (m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
552  } else
553  return (NULL);
554  break;
555  case OEOL:
556  if ((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
557  (sp < m->endp && *sp == '\n' &&
558  (m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
559  } else
560  return (NULL);
561  break;
562  case OBOW:
563  if (((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
564  (sp < m->endp && *(sp - 1) == '\n' &&
565  (m->g->cflags & RZ_REGEX_NEWLINE)) ||
566  (sp > m->beginp &&
567  !ISWORD((unsigned char)*(sp - 1)))) &&
568  (sp < m->endp && ISWORD((unsigned char)*sp))) { /* yes */
569  } else
570  return (NULL);
571  break;
572  case OEOW:
573  if (((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
574  (sp < m->endp && *sp == '\n' &&
575  (m->g->cflags & RZ_REGEX_NEWLINE)) ||
576  (sp < m->endp && !ISWORD((unsigned char)*sp))) &&
577  (sp > m->beginp && ISWORD((unsigned char)*(sp - 1)))) { /* yes */
578  } else
579  return (NULL);
580  break;
581  case O_QUEST:
582  break;
583  case OOR1: /* matches null but needs to skip */
584  ss++;
585  s = m->g->strip[ss];
586  do {
587  if (OP(s) == OOR2) {
588  ss += OPND(s);
589  }
590  } while (OP(s = m->g->strip[ss]) != O_CH);
591  /* note that the ss++ gets us past the O_CH */
592  break;
593  default: /* have to make a choice */
594  hard = 1;
595  break;
596  }
597  if (!hard) { /* that was it! */
598  if (sp != stop)
599  return (NULL);
600  return (sp);
601  }
602  ss--; /* adjust for the for's final increment */
603 
604  /* the hard stuff */
605  AT("hard", sp, stop, ss, stopst);
606  s = m->g->strip[ss];
607  switch (OP(s)) {
608  case OBACK_: /* the vilest depths */
609  i = OPND(s);
610  if (i > 0 && i <= m->g->nsub) {
611  if (m->pmatch[i].rm_eo == -1) {
612  return NULL;
613  }
614  }
615  if (m->pmatch[i].rm_so != -1) {
616  len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
617  if (len == 0 && rec++ > MAX_RECURSION)
618  return (NULL);
619  if (stop - m->beginp >= len) {
620  if (sp > stop - len) {
621  return (NULL); /* not enough left to match */
622  }
623  }
624  ssp = m->offp + m->pmatch[i].rm_so;
625  if (memcmp(sp, ssp, len) != 0)
626  return (NULL);
627  while (m->g->strip[ss] != SOP(O_BACK, i))
628  ss++;
629  return (backref(m, sp + len, stop, ss + 1, stopst, lev, rec));
630  }
631  break;
632  case OQUEST_: /* to null or not */
633  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
634  if (dp != NULL)
635  return (dp); /* not */
636  return (backref(m, sp, stop, ss + OPND(s) + 1, stopst, lev, rec));
637  break;
638  case OPLUS_:
639  if (m->lastpos && (lev + 1 <= m->g->nplus)) {
640  m->lastpos[lev + 1] = sp;
641  return (backref(m, sp, stop, ss + 1, stopst, lev + 1, rec));
642  }
643  break;
644  case O_PLUS:
645  if (sp == m->lastpos[lev]) /* last pass matched null */
646  return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
647  /* try another pass */
648  m->lastpos[lev] = sp;
649  dp = backref(m, sp, stop, ss - OPND(s) + 1, stopst, lev, rec);
650  if (!dp)
651  return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
652  else
653  return (dp);
654  break;
655  case OCH_: /* find the right one, if any */
656  ssub = ss + 1;
657  esub = ss + OPND(s) - 1;
658  if (OP(m->g->strip[esub]) != OOR1) {
659  break;
660  }
661  for (;;) { /* find first matching branch */
662  dp = backref(m, sp, stop, ssub, esub, lev, rec);
663  if (dp != NULL)
664  return (dp);
665  /* that one missed, try next one */
666  if (OP(m->g->strip[esub]) == O_CH)
667  return (NULL); /* there is none */
668  esub++;
669  if (OP(m->g->strip[esub]) != OOR2) {
670  break;
671  }
672  ssub = esub + 1;
673  esub += OPND(m->g->strip[esub]);
674  if (OP(m->g->strip[esub]) == OOR2)
675  esub--;
676  else if (OP(m->g->strip[esub]) != O_CH) {
677  break;
678  }
679  }
680  break;
681  case OLPAREN: /* must undo assignment if rest fails */
682  i = OPND(s);
683  if (i > 0 && i <= m->g->nsub) {
684  offsave = m->pmatch[i].rm_so;
685  m->pmatch[i].rm_so = sp - m->offp;
686  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
687  if (dp != NULL)
688  return (dp);
689  m->pmatch[i].rm_so = offsave;
690  return (NULL);
691  }
692  break;
693  case ORPAREN: /* must undo assignment if rest fails */
694  i = OPND(s);
695  if (i > 0 && i <= m->g->nsub) {
696  offsave = m->pmatch[i].rm_eo;
697  m->pmatch[i].rm_eo = sp - m->offp;
698  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
699  if (dp != NULL)
700  return (dp);
701  m->pmatch[i].rm_eo = offsave;
702  return (NULL);
703  }
704  break;
705  default: /* uh oh */
706  break;
707  }
708 
709  /* NOTREACHED */
710  return NULL;
711 }
712 
713 /*
714  - fast - step through the string at top speed
715  */
716 static char * /* where tentative match ended, or NULL */
717 fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
718  states st = m->st;
719  states fresh = m->fresh;
720  states tmp = m->tmp;
721  char *p = start;
722  int c = (start == m->beginp) ? OUT : *(start - 1);
723  int lastc; /* previous c */
724  int flagch;
725  int i;
726  char *coldp; /* last p after which no match was underway */
727 
728  CLEAR(st);
729  SET1(st, startst);
730  st = step(m->g, startst, stopst, st, NOTHING, st);
731  ASSIGN(fresh, st);
732  SP("start", st, *p);
733  coldp = NULL;
734  for (;;) {
735  /* next character */
736  lastc = c;
737  c = (p == m->endp) ? OUT : *p;
738  if (EQ(st, fresh)) {
739  coldp = p;
740  }
741 
742  /* is there an EOL and/or BOL between lastc and c? */
743  flagch = '\0';
744  i = 0;
745  if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
746  (lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
747  flagch = BOL;
748  i = m->g->nbol;
749  }
750  if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
751  (c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
752  flagch = (flagch == BOL) ? BOLEOL : EOL;
753  i += m->g->neol;
754  }
755  if (i != 0) {
756  for (; i > 0; i--)
757  st = step(m->g, startst, stopst, st, flagch, st);
758  SP("boleol", st, c);
759  }
760 
761  /* how about a word boundary? */
762  if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
763  (c != OUT && ISWORD(c))) {
764  flagch = BOW;
765  }
766  if ((lastc != OUT && ISWORD(lastc)) &&
767  (flagch == EOL || (c != OUT && !ISWORD(c)))) {
768  flagch = EOW;
769  }
770  if (flagch == BOW || flagch == EOW) {
771  st = step(m->g, startst, stopst, st, flagch, st);
772  SP("boweow", st, c);
773  }
774 
775  /* are we done? */
776  if (ISSET(st, stopst) || p == stop)
777  break; /* NOTE BREAK OUT */
778 
779  /* no, we must deal with this character */
780  ASSIGN(tmp, st);
781  ASSIGN(st, fresh);
782  if (c == OUT) {
783  break;
784  }
785  st = step(m->g, startst, stopst, tmp, c, st);
786  SP("aft", st, c);
787  ASSIGN(tmp, st);
788  if (!EQ(step(m->g, startst, stopst, tmp, NOTHING, tmp), st)) {
789  break;
790  }
791  p++;
792  }
793 
794  if (coldp) {
795  m->coldp = coldp;
796  if (ISSET(st, stopst))
797  return (p + 1);
798  }
799  return NULL;
800 }
801 
802 /*
803  - slow - step through the string more deliberately
804  */
805 static char * /* where it ended */
806 slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
807  states st = m->st;
808  states empty = m->empty;
809  states tmp = m->tmp;
810  char *p = start;
811  int c = (start == m->beginp) ? OUT : *(start - 1);
812  int lastc; /* previous c */
813  int flagch;
814  int i;
815  char *matchp; /* last p at which a match ended */
816 
817  AT("slow", start, stop, startst, stopst);
818  CLEAR(st);
819  SET1(st, startst);
820  SP("sstart", st, *p);
821  st = step(m->g, startst, stopst, st, NOTHING, st);
822  matchp = NULL;
823  for (;;) {
824  /* next character */
825  lastc = c;
826  c = (p == m->endp) ? OUT : *p;
827 
828  /* is there an EOL and/or BOL between lastc and c? */
829  flagch = '\0';
830  i = 0;
831  if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
832  (lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
833  flagch = BOL;
834  i = m->g->nbol;
835  }
836  if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
837  (c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
838  flagch = (flagch == BOL) ? BOLEOL : EOL;
839  i += m->g->neol;
840  }
841  if (i != 0) {
842  for (; i > 0; i--)
843  st = step(m->g, startst, stopst, st, flagch, st);
844  SP("sboleol", st, c);
845  }
846 
847  /* how about a word boundary? */
848  if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
849  (c != OUT && ISWORD(c))) {
850  flagch = BOW;
851  }
852  if ((lastc != OUT && ISWORD(lastc)) &&
853  (flagch == EOL || (c != OUT && !ISWORD(c)))) {
854  flagch = EOW;
855  }
856  if (flagch == BOW || flagch == EOW) {
857  st = step(m->g, startst, stopst, st, flagch, st);
858  SP("sboweow", st, c);
859  }
860 
861  /* are we done? */
862  if (ISSET(st, stopst))
863  matchp = p;
864  if (EQ(st, empty) || p == stop)
865  break; /* NOTE BREAK OUT */
866 
867  /* no, we must deal with this character */
868  ASSIGN(tmp, st);
869  ASSIGN(st, empty);
870  if (c == OUT) {
871  break;
872  }
873  st = step(m->g, startst, stopst, tmp, c, st);
874  SP("saft", st, c);
875  if (!EQ(step(m->g, startst, stopst, st, NOTHING, st), st)) {
876  break;
877  }
878  p++;
879  }
880 
881  return (matchp);
882 }
883 
884 /*
885  - step - map set of states reachable before char to set reachable after
886  */
887 static states
888 step(struct re_guts *g,
889  sopno start, /* start state within strip */
890  sopno stop, /* state after stop state within strip */
891  states bef, /* states reachable before */
892  int ch, /* character or NONCHAR code */
893  states aft) /* states already known reachable after */
894 {
895  cset *cs;
896  sop s;
897  sopno pc;
898  onestate here; /* note, macros know this name */
899  sopno look;
900  int i;
901 
902  for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
903  s = g->strip[pc];
904  switch (OP(s)) {
905  case OEND:
906  break;
907  case OCHAR:
908  /* only characters can match */
909  if (!NONCHAR(ch) || ch != (char)OPND(s)) {
910  if (ch == (char)OPND(s))
911  FWD(aft, bef, 1);
912  }
913  break;
914  case OBOL:
915  if (ch == BOL || ch == BOLEOL)
916  FWD(aft, bef, 1);
917  break;
918  case OEOL:
919  if (ch == EOL || ch == BOLEOL)
920  FWD(aft, bef, 1);
921  break;
922  case OBOW:
923  if (ch == BOW)
924  FWD(aft, bef, 1);
925  break;
926  case OEOW:
927  if (ch == EOW)
928  FWD(aft, bef, 1);
929  break;
930  case OANY:
931  if (!NONCHAR(ch))
932  FWD(aft, bef, 1);
933  break;
934  case OANYOF:
935  cs = &g->sets[OPND(s)];
936  if (!NONCHAR(ch) && CHIN(cs, ch))
937  FWD(aft, bef, 1);
938  break;
939  case OBACK_: /* ignored here */
940  case O_BACK:
941  FWD(aft, aft, 1);
942  break;
943  case OPLUS_: /* forward, this is just an empty */
944  FWD(aft, aft, 1);
945  break;
946  case O_PLUS: /* both forward and back */
947  FWD(aft, aft, 1);
948  i = ISSETBACK(aft, OPND(s));
949  BACK(aft, aft, OPND(s));
950  if (!i && ISSETBACK(aft, OPND(s))) {
951  /* oho, must reconsider loop body */
952  pc -= OPND(s) + 1;
953  INIT(here, pc);
954  }
955  break;
956  case OQUEST_: /* two branches, both forward */
957  FWD(aft, aft, 1);
958  FWD(aft, aft, OPND(s));
959  break;
960  case O_QUEST: /* just an empty */
961  FWD(aft, aft, 1);
962  break;
963  case OLPAREN: /* not significant here */
964  case ORPAREN:
965  FWD(aft, aft, 1);
966  break;
967  case OCH_: /* mark the first two branches */
968  FWD(aft, aft, 1);
969  if ((OP(g->strip[pc + OPND(s)]) != OOR2)) {
970  break;
971  }
972  FWD(aft, aft, OPND(s));
973  break;
974  case OOR1: /* done a branch, find the O_CH */
975  if (ISSTATEIN(aft, here)) {
976  for (look = 1;
977  OP(s = g->strip[pc + look]) != O_CH;
978  look += OPND(s)) {
979  if (OP(s) != OOR2) {
980  break;
981  }
982  }
983  FWD(aft, aft, look);
984  }
985  break;
986  case OOR2: /* propagate OCH_'s marking */
987  FWD(aft, aft, 1);
988  if (OP(g->strip[pc + OPND(s)]) != O_CH) {
989  if (OP(g->strip[pc + OPND(s)]) == OOR2) {
990  FWD(aft, aft, OPND(s));
991  }
992  }
993  break;
994  case O_CH: /* just empty */
995  FWD(aft, aft, 1);
996  break;
997  default: /* ooooops... */
998  eprintf("ops in regex.c\n");
999  break;
1000  }
1001  }
1002 
1003  return (aft);
1004 }
1005 
1006 #ifdef REDEBUG
1007 /*
1008  - print - print a set of states
1009  */
1010 static void
1011 print(struct match *m, char *caption, states st, int ch, FILE *d) {
1012  struct re_guts *g = m->g;
1013  int i;
1014  int first = 1;
1015 
1016  if (!(m->eflags & RZ_REGEX_TRACE))
1017  return;
1018 
1019  (void)fprintf(d, "%s", caption);
1020  if (ch != '\0')
1021  (void)fprintf(d, " %s", pchar(ch));
1022  for (i = 0; i < g->nstates; i++)
1023  if (ISSET(st, i)) {
1024  (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1025  first = 0;
1026  }
1027  (void)fprintf(d, "\n");
1028 }
1029 
1030 /*
1031  - at - print current situation
1032  */
1033 static void
1034 at(struct match *m, char *title, char *start, char *stop, sopno startst,
1035  sopno stopst) {
1036  if (!(m->eflags & RZ_REGEX_TRACE))
1037  return;
1038 
1039  (void)printf("%s %s-", title, pchar(*start));
1040  (void)printf("%s ", pchar(*stop));
1041  (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
1042 }
1043 
1044 #ifndef PCHARDONE
1045 #define PCHARDONE /* never again */
1046 /*
1047  - pchar - make a character printable
1048  *
1049  * Is this identical to regchar() over in debug.c? Well, yes. But a
1050  * duplicate here avoids having a debugging-capable regexec.o tied to
1051  * a matching debug.o, and this is convenient. It all disappears in
1052  * the non-debug compilation anyway, so it doesn't matter much.
1053  */
1054 static char * /* -> representation */
1055 pchar(int ch) {
1056  static char pbuf[10];
1057 
1058  if (isprint((ut8)ch) || ch == ' ')
1059  (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1060  else
1061  (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1062  return (pbuf);
1063 }
1064 #endif
1065 #endif
1066 
1067 #undef matcher
1068 #undef fast
1069 #undef slow
1070 #undef dissect
1071 #undef backref
1072 #undef step
1073 #undef print
1074 #undef at
1075 #undef match
1076 #undef nope
size_t len
Definition: 6502dis.c:15
#define OPND(x)
Definition: aarch64-tbl.h:33
lzma_index ** i
Definition: index.h:629
static ut32 stp(ArmOp *op, int k)
Definition: armass64.c:904
#define NULL
Definition: cris-opc.c:27
_Use_decl_annotations_ int __cdecl printf(const char *const _Format,...)
Definition: cs_driver.c:93
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags long
Definition: sflib.h:79
#define BOLEOL
Definition: engine.c:96
#define EOW
Definition: engine.c:99
static states step(struct re_guts *, sopno, sopno, states, int, states)
Definition: engine.c:888
static char * dissect(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:318
#define BOL
Definition: engine.c:94
static char * slow(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:806
#define MAX_RECURSION
Definition: engine.c:93
#define EOL
Definition: engine.c:95
static int matcher(struct re_guts *, char *, size_t, RzRegexMatch[], int)
Definition: engine.c:132
static char * fast(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:717
static char * backref(struct match *, char *, char *, sopno, sopno, sopno, int)
Definition: engine.c:511
#define BOW
Definition: engine.c:98
#define NOTE(s)
Definition: engine.c:125
#define NOTHING
Definition: engine.c:97
#define NONCHAR(c)
Definition: engine.c:101
#define SP(t, s, c)
Definition: engine.c:123
#define AT(t, p1, p2, s1, s2)
Definition: engine.c:124
struct @667 g
#define OP(v, w, x, y, z)
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
snprintf
Definition: kernel.h:364
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
void * malloc(size_t size)
Definition: malloc.c:123
string FILE
Definition: benchmark.py:21
#define INC
unsigned long sop
Definition: regex2.h:62
long sopno
Definition: regex2.h:63
#define O_CH
Definition: regex2.h:89
#define OBOL
Definition: regex2.h:74
#define OCH_
Definition: regex2.h:86
#define OOR2
Definition: regex2.h:88
#define OEND
Definition: regex2.h:72
#define OCHAR
Definition: regex2.h:73
#define OQUEST_
Definition: regex2.h:82
#define OEOL
Definition: regex2.h:75
#define OLPAREN
Definition: regex2.h:84
#define OBOW
Definition: regex2.h:90
#define OPLUS_
Definition: regex2.h:80
#define O_QUEST
Definition: regex2.h:83
#define CHIN(cs, c)
Definition: regex2.h:115
#define OANYOF
Definition: regex2.h:77
#define O_PLUS
Definition: regex2.h:81
#define SOP(op, opnd)
Definition: regex2.h:69
#define O_BACK
Definition: regex2.h:79
#define OBACK_
Definition: regex2.h:78
#define OEOW
Definition: regex2.h:91
#define ORPAREN
Definition: regex2.h:85
#define ISWORD(c)
Definition: regex2.h:158
#define OUT
Definition: regex2.h:157
#define OOR1
Definition: regex2.h:87
#define OANY
Definition: regex2.h:76
#define ASSIGN(d, s)
Definition: regexec.c:109
#define onestate
Definition: regexec.c:124
#define CLEAR(v)
Definition: regexec.c:105
#define ISSTATEIN(v, o)
Definition: regexec.c:127
#define SETUP(v)
Definition: regexec.c:123
#define BACK(dst, src, n)
Definition: regexec.c:131
#define STATESETUP(m, n)
Definition: regexec.c:114
#define FWD(dst, src, n)
Definition: regexec.c:130
#define SET1(v, n)
Definition: regexec.c:107
#define ISSETBACK(v, n)
Definition: regexec.c:132
#define STATETEARDOWN(m)
Definition: regexec.c:121
#define states
Definition: regexec.c:104
#define INIT(o, n)
Definition: regexec.c:125
#define ISSET(v, n)
Definition: regexec.c:108
#define eprintf(x, y...)
Definition: rlcc.c:7
static RzSocket * s
Definition: rtr.c:28
#define EQ(x, y)
#define RZ_REGEX_ESPACE
Definition: rz_regex.h:44
#define RZ_REGEX_STARTEND
Definition: rz_regex.h:56
struct rz_regmatch_t RzRegexMatch
#define RZ_REGEX_NEWLINE
Definition: rz_regex.h:26
#define RZ_REGEX_TRACE
Definition: rz_regex.h:57
#define RZ_REGEX_NOTEOL
Definition: rz_regex.h:55
#define RZ_REGEX_NOTBOL
Definition: rz_regex.h:54
#define RZ_REGEX_NOSUB
Definition: rz_regex.h:25
#define RZ_REGEX_BACKR
Definition: rz_regex.h:59
#define RZ_REGEX_NOMATCH
Definition: rz_regex.h:33
#define RZ_REGEX_INVARG
Definition: rz_regex.h:48
#define isprint(c)
Definition: safe-ctype.h:137
#define d(i)
Definition: sha256.c:44
#define c(i)
Definition: sha256.c:43
Definition: regex2.h:105
Definition: engine.c:71
RzRegexMatch * pmatch
Definition: engine.c:74
STATEVARS
Definition: engine.c:80
char * offp
Definition: engine.c:75
states fresh
Definition: engine.c:82
char ** lastpos
Definition: engine.c:79
states empty
Definition: engine.c:84
states st
Definition: engine.c:81
char * endp
Definition: engine.c:77
states tmp
Definition: engine.c:83
struct re_guts * g
Definition: engine.c:72
char * beginp
Definition: engine.c:76
char * coldp
Definition: engine.c:78
int eflags
Definition: engine.c:73
st64 rm_so
Definition: rz_regex.h:17
st64 rm_eo
Definition: rz_regex.h:18
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int sp
Definition: z80asm.c:91