Rizin
unix-like reverse engineering framework and cli tools
engine.c File Reference

Go to the source code of this file.

Classes

struct  match
 

Macros

#define MAX_RECURSION   100
 
#define BOL   (OUT + 1)
 
#define EOL   (BOL + 1)
 
#define BOLEOL   (BOL + 2)
 
#define NOTHING   (BOL + 3)
 
#define BOW   (BOL + 4)
 
#define EOW   (BOL + 5)
 
#define CODEMAX   (BOL + 5) /* highest code used */
 
#define NONCHAR(c)   ((c) > OUT)
 
#define NNONCHAR   (CODEMAX - OUT)
 
#define SP(t, s, c)   /* nothing */
 
#define AT(t, p1, p2, s1, s2)   /* nothing */
 
#define NOTE(s)   /* nothing */
 

Functions

static int matcher (struct re_guts *, char *, size_t, RzRegexMatch[], int)
 
static char * dissect (struct match *, char *, char *, sopno, sopno)
 
static char * backref (struct match *, char *, char *, sopno, sopno, sopno, int)
 
static char * fast (struct match *, char *, char *, sopno, sopno)
 
static char * slow (struct match *, char *, char *, sopno, sopno)
 
static states step (struct re_guts *, sopno, sopno, states, int, states)
 

Macro Definition Documentation

◆ AT

#define AT (   t,
  p1,
  p2,
  s1,
  s2 
)    /* nothing */

Definition at line 124 of file engine.c.

◆ BOL

#define BOL   (OUT + 1)

Definition at line 94 of file engine.c.

◆ BOLEOL

#define BOLEOL   (BOL + 2)

Definition at line 96 of file engine.c.

◆ BOW

#define BOW   (BOL + 4)

Definition at line 98 of file engine.c.

◆ CODEMAX

#define CODEMAX   (BOL + 5) /* highest code used */

Definition at line 100 of file engine.c.

◆ EOL

#define EOL   (BOL + 1)

Definition at line 95 of file engine.c.

◆ EOW

#define EOW   (BOL + 5)

Definition at line 99 of file engine.c.

◆ MAX_RECURSION

#define MAX_RECURSION   100

Definition at line 93 of file engine.c.

◆ NNONCHAR

#define NNONCHAR   (CODEMAX - OUT)

Definition at line 102 of file engine.c.

◆ NONCHAR

#define NONCHAR (   c)    ((c) > OUT)

Definition at line 101 of file engine.c.

◆ NOTE

#define NOTE (   s)    /* nothing */

Definition at line 125 of file engine.c.

◆ NOTHING

#define NOTHING   (BOL + 3)

Definition at line 97 of file engine.c.

◆ SP

#define SP (   t,
  s,
  c 
)    /* nothing */

Definition at line 123 of file engine.c.

Function Documentation

◆ backref()

static char * backref ( struct match m,
char *  start,
char *  stop,
sopno  startst,
sopno  stopst,
sopno  lev,
int  rec 
)
static

Definition at line 511 of file engine.c.

513 {
514  int i;
515  sopno ss; /* start sop of current subRE */
516  char *sp; /* start of string matched by it */
517  sopno ssub; /* start sop of subsubRE */
518  sopno esub; /* end sop of subsubRE */
519  char *ssp; /* start of string matched by subsubRE */
520  char *dp;
521  size_t len;
522  int hard;
523  sop s;
524  ut64 offsave;
525  cset *cs;
526 
527  AT("back", start, stop, startst, stopst);
528  sp = start;
529 
530  /* get as far as we can with easy stuff */
531  hard = 0;
532  for (ss = startst; !hard && ss < stopst; ss++)
533  switch (OP(s = m->g->strip[ss])) {
534  case OCHAR:
535  if (sp == stop || *sp++ != (char)OPND(s))
536  return (NULL);
537  break;
538  case OANY:
539  if (sp == stop)
540  return (NULL);
541  sp++;
542  break;
543  case OANYOF:
544  cs = &m->g->sets[OPND(s)];
545  if (sp == stop || !CHIN(cs, *sp++))
546  return (NULL);
547  break;
548  case OBOL:
549  if ((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
550  (sp < m->endp && *(sp - 1) == '\n' &&
551  (m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
552  } else
553  return (NULL);
554  break;
555  case OEOL:
556  if ((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
557  (sp < m->endp && *sp == '\n' &&
558  (m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
559  } else
560  return (NULL);
561  break;
562  case OBOW:
563  if (((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
564  (sp < m->endp && *(sp - 1) == '\n' &&
565  (m->g->cflags & RZ_REGEX_NEWLINE)) ||
566  (sp > m->beginp &&
567  !ISWORD((unsigned char)*(sp - 1)))) &&
568  (sp < m->endp && ISWORD((unsigned char)*sp))) { /* yes */
569  } else
570  return (NULL);
571  break;
572  case OEOW:
573  if (((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
574  (sp < m->endp && *sp == '\n' &&
575  (m->g->cflags & RZ_REGEX_NEWLINE)) ||
576  (sp < m->endp && !ISWORD((unsigned char)*sp))) &&
577  (sp > m->beginp && ISWORD((unsigned char)*(sp - 1)))) { /* yes */
578  } else
579  return (NULL);
580  break;
581  case O_QUEST:
582  break;
583  case OOR1: /* matches null but needs to skip */
584  ss++;
585  s = m->g->strip[ss];
586  do {
587  if (OP(s) == OOR2) {
588  ss += OPND(s);
589  }
590  } while (OP(s = m->g->strip[ss]) != O_CH);
591  /* note that the ss++ gets us past the O_CH */
592  break;
593  default: /* have to make a choice */
594  hard = 1;
595  break;
596  }
597  if (!hard) { /* that was it! */
598  if (sp != stop)
599  return (NULL);
600  return (sp);
601  }
602  ss--; /* adjust for the for's final increment */
603 
604  /* the hard stuff */
605  AT("hard", sp, stop, ss, stopst);
606  s = m->g->strip[ss];
607  switch (OP(s)) {
608  case OBACK_: /* the vilest depths */
609  i = OPND(s);
610  if (i > 0 && i <= m->g->nsub) {
611  if (m->pmatch[i].rm_eo == -1) {
612  return NULL;
613  }
614  }
615  if (m->pmatch[i].rm_so != -1) {
616  len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
617  if (len == 0 && rec++ > MAX_RECURSION)
618  return (NULL);
619  if (stop - m->beginp >= len) {
620  if (sp > stop - len) {
621  return (NULL); /* not enough left to match */
622  }
623  }
624  ssp = m->offp + m->pmatch[i].rm_so;
625  if (memcmp(sp, ssp, len) != 0)
626  return (NULL);
627  while (m->g->strip[ss] != SOP(O_BACK, i))
628  ss++;
629  return (backref(m, sp + len, stop, ss + 1, stopst, lev, rec));
630  }
631  break;
632  case OQUEST_: /* to null or not */
633  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
634  if (dp != NULL)
635  return (dp); /* not */
636  return (backref(m, sp, stop, ss + OPND(s) + 1, stopst, lev, rec));
637  break;
638  case OPLUS_:
639  if (m->lastpos && (lev + 1 <= m->g->nplus)) {
640  m->lastpos[lev + 1] = sp;
641  return (backref(m, sp, stop, ss + 1, stopst, lev + 1, rec));
642  }
643  break;
644  case O_PLUS:
645  if (sp == m->lastpos[lev]) /* last pass matched null */
646  return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
647  /* try another pass */
648  m->lastpos[lev] = sp;
649  dp = backref(m, sp, stop, ss - OPND(s) + 1, stopst, lev, rec);
650  if (!dp)
651  return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
652  else
653  return (dp);
654  break;
655  case OCH_: /* find the right one, if any */
656  ssub = ss + 1;
657  esub = ss + OPND(s) - 1;
658  if (OP(m->g->strip[esub]) != OOR1) {
659  break;
660  }
661  for (;;) { /* find first matching branch */
662  dp = backref(m, sp, stop, ssub, esub, lev, rec);
663  if (dp != NULL)
664  return (dp);
665  /* that one missed, try next one */
666  if (OP(m->g->strip[esub]) == O_CH)
667  return (NULL); /* there is none */
668  esub++;
669  if (OP(m->g->strip[esub]) != OOR2) {
670  break;
671  }
672  ssub = esub + 1;
673  esub += OPND(m->g->strip[esub]);
674  if (OP(m->g->strip[esub]) == OOR2)
675  esub--;
676  else if (OP(m->g->strip[esub]) != O_CH) {
677  break;
678  }
679  }
680  break;
681  case OLPAREN: /* must undo assignment if rest fails */
682  i = OPND(s);
683  if (i > 0 && i <= m->g->nsub) {
684  offsave = m->pmatch[i].rm_so;
685  m->pmatch[i].rm_so = sp - m->offp;
686  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
687  if (dp != NULL)
688  return (dp);
689  m->pmatch[i].rm_so = offsave;
690  return (NULL);
691  }
692  break;
693  case ORPAREN: /* must undo assignment if rest fails */
694  i = OPND(s);
695  if (i > 0 && i <= m->g->nsub) {
696  offsave = m->pmatch[i].rm_eo;
697  m->pmatch[i].rm_eo = sp - m->offp;
698  dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
699  if (dp != NULL)
700  return (dp);
701  m->pmatch[i].rm_eo = offsave;
702  return (NULL);
703  }
704  break;
705  default: /* uh oh */
706  break;
707  }
708 
709  /* NOTREACHED */
710  return NULL;
711 }
size_t len
Definition: 6502dis.c:15
#define OPND(x)
Definition: aarch64-tbl.h:33
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133
#define MAX_RECURSION
Definition: engine.c:93
static char * backref(struct match *, char *, char *, sopno, sopno, sopno, int)
Definition: engine.c:511
#define AT(t, p1, p2, s1, s2)
Definition: engine.c:124
struct @667 g
#define OP(v, w, x, y, z)
unsigned long sop
Definition: regex2.h:62
long sopno
Definition: regex2.h:63
#define O_CH
Definition: regex2.h:89
#define OBOL
Definition: regex2.h:74
#define OCH_
Definition: regex2.h:86
#define OOR2
Definition: regex2.h:88
#define OCHAR
Definition: regex2.h:73
#define OQUEST_
Definition: regex2.h:82
#define OEOL
Definition: regex2.h:75
#define OLPAREN
Definition: regex2.h:84
#define OBOW
Definition: regex2.h:90
#define OPLUS_
Definition: regex2.h:80
#define O_QUEST
Definition: regex2.h:83
#define CHIN(cs, c)
Definition: regex2.h:115
#define OANYOF
Definition: regex2.h:77
#define O_PLUS
Definition: regex2.h:81
#define SOP(op, opnd)
Definition: regex2.h:69
#define O_BACK
Definition: regex2.h:79
#define OBACK_
Definition: regex2.h:78
#define OEOW
Definition: regex2.h:91
#define ORPAREN
Definition: regex2.h:85
#define ISWORD(c)
Definition: regex2.h:158
#define OOR1
Definition: regex2.h:87
#define OANY
Definition: regex2.h:76
static RzSocket * s
Definition: rtr.c:28
#define RZ_REGEX_NEWLINE
Definition: rz_regex.h:26
#define RZ_REGEX_NOTEOL
Definition: rz_regex.h:55
#define RZ_REGEX_NOTBOL
Definition: rz_regex.h:54
Definition: regex2.h:105
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static int sp
Definition: z80asm.c:91

References AT, CHIN, test_evm::cs, match::endp, g, i, ISWORD, len, regress::m, MAX_RECURSION, NULL, O_BACK, O_CH, O_PLUS, O_QUEST, OANY, OANYOF, OBACK_, OBOL, OBOW, OCH_, OCHAR, OEOL, OEOW, OLPAREN, OOR1, OOR2, OP, OPLUS_, OPND, OQUEST_, ORPAREN, RZ_REGEX_NEWLINE, RZ_REGEX_NOTBOL, RZ_REGEX_NOTEOL, s, SOP, sp, start, and ut64().

Referenced by matcher().

◆ dissect()

static char * dissect ( struct match m,
char *  start,
char *  stop,
sopno  startst,
sopno  stopst 
)
static

Definition at line 318 of file engine.c.

318  {
319  int i;
320  sopno ss; /* start sop of current subRE */
321  sopno es; /* end sop of current subRE */
322  char *sp; /* start of string matched by it */
323  char *stp; /* string matched by it cannot pass here */
324  char *rest; /* start of rest of string */
325  char *tail; /* string unmatched by rest of RE */
326  sopno ssub; /* start sop of subsubRE */
327  sopno esub; /* end sop of subsubRE */
328  char *ssp; /* start of string matched by subsubRE */
329  char *sep; /* end of string matched by subsubRE */
330  char *oldssp; /* previous ssp */
331  char *dp;
332 
333  AT("diss", start, stop, startst, stopst);
334  sp = start;
335  for (ss = startst; ss < stopst; ss = es) {
336  /* identify end of subRE */
337  es = ss;
338  switch (OP(m->g->strip[es])) {
339  case OPLUS_:
340  case OQUEST_:
341  es += OPND(m->g->strip[es]);
342  break;
343  case OCH_:
344  while (OP(m->g->strip[es]) != O_CH)
345  es += OPND(m->g->strip[es]);
346  break;
347  }
348  es++;
349 
350  /* figure out what it matched */
351  switch (OP(m->g->strip[ss])) {
352  case OEND:
353  break;
354  case OCHAR:
355  sp++;
356  break;
357  case OBOL:
358  case OEOL:
359  case OBOW:
360  case OEOW:
361  break;
362  case OANY:
363  case OANYOF:
364  sp++;
365  break;
366  case OBACK_:
367  case O_BACK:
368  break;
369  /* cases where length of match is hard to find */
370  case OQUEST_:
371  stp = stop;
372  for (;;) {
373  /* how long could this one be? */
374  rest = slow(m, sp, stp, ss, es);
375  if (rest) { /* it did match */
376  /* could the rest match the rest? */
377  tail = slow(m, rest, stop, es, stopst);
378  if (tail == stop)
379  break; /* yes! */
380  /* no -- try a shorter match for this one */
381  stp = rest - 1;
382  }
383  }
384  ssub = ss + 1;
385  esub = es - 1;
386  /* did innards match? */
387  if (slow(m, sp, rest, ssub, esub) != NULL) {
388  dp = dissect(m, sp, rest, ssub, esub);
389  if (dp != rest)
390  return NULL;
391  } else if (sp != rest)
392  return NULL;
393  sp = rest;
394  break;
395  case OPLUS_:
396  stp = stop;
397  for (;;) {
398  /* how long could this one be? */
399  rest = slow(m, sp, stp, ss, es);
400  if (rest != NULL) { /* it did match */
401  /* could the rest match the rest? */
402  tail = slow(m, rest, stop, es, stopst);
403  if (tail == stop)
404  break; /* yes! */
405  /* no -- try a shorter match for this one */
406  stp = rest - 1;
407  }
408  }
409  ssub = ss + 1;
410  esub = es - 1;
411  ssp = sp;
412  oldssp = ssp;
413  for (;;) { /* find last match of innards */
414  sep = slow(m, ssp, rest, ssub, esub);
415  if (!sep || sep == ssp)
416  break; /* failed or matched null */
417  oldssp = ssp; /* on to next try */
418  ssp = sep;
419  }
420  if (!sep) {
421  /* last successful match */
422  sep = ssp;
423  ssp = oldssp;
424  }
425  if (sep == rest) { /* must exhaust substring */
426  if (slow(m, ssp, sep, ssub, esub) == rest) {
427  dp = dissect(m, ssp, sep, ssub, esub);
428  if (dp == sep) {
429  sp = rest;
430  }
431  }
432  }
433  break;
434  case OCH_:
435  stp = stop;
436  for (;;) {
437  /* how long could this one be? */
438  rest = slow(m, sp, stp, ss, es);
439  if (rest) { /* it did match */
440  /* could the rest match the rest? */
441  tail = slow(m, rest, stop, es, stopst);
442  if (tail == stop)
443  break; /* yes! */
444  /* no -- try a shorter match for this one */
445  stp = rest - 1;
446  }
447  }
448  ssub = ss + 1;
449  esub = ss + OPND(m->g->strip[ss]) - 1;
450  if (OP(m->g->strip[esub]) != OOR1) {
451  break;
452  }
453  for (;;) { /* find first matching branch */
454  if (slow(m, sp, rest, ssub, esub) == rest)
455  break; /* it matched all of it */
456  /* that one missed, try next one */
457  if (OP(m->g->strip[esub]) == OOR1) {
458  esub++;
459  if (OP(m->g->strip[esub]) == OOR2) {
460  ssub = esub + 1;
461  esub += OPND(m->g->strip[esub]);
462  if (OP(m->g->strip[esub]) == OOR2) {
463  esub--;
464  } else {
465  if (OP(m->g->strip[esub]) != O_CH) {
466  break;
467  }
468  }
469  }
470  }
471  }
472  dp = dissect(m, sp, rest, ssub, esub);
473  if (dp == rest) {
474  sp = rest;
475  }
476  break;
477  case O_PLUS:
478  case O_QUEST:
479  case OOR1:
480  case OOR2:
481  case O_CH:
482  break;
483  case OLPAREN:
484  i = OPND(m->g->strip[ss]);
485  if (i > 0 && i <= m->g->nsub) {
486  m->pmatch[i].rm_so = sp - m->offp;
487  }
488  break;
489  case ORPAREN:
490  i = OPND(m->g->strip[ss]);
491  if (i > 0 && i <= m->g->nsub) {
492  m->pmatch[i].rm_eo = sp - m->offp;
493  }
494  break;
495  default: /* uh oh */
496  break;
497  }
498  }
499 
500  if (sp == stop) {
501  return sp;
502  } else {
503  return NULL;
504  }
505 }
static ut32 stp(ArmOp *op, int k)
Definition: armass64.c:904
static char * dissect(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:318
static char * slow(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:806
#define OEND
Definition: regex2.h:72

References AT, g, i, regress::m, NULL, O_BACK, O_CH, O_PLUS, O_QUEST, OANY, OANYOF, OBACK_, OBOL, OBOW, OCH_, OCHAR, OEND, OEOL, OEOW, OLPAREN, OOR1, OOR2, OP, OPLUS_, OPND, OQUEST_, ORPAREN, slow(), sp, start, and stp().

Referenced by matcher().

◆ fast()

static char * fast ( struct match m,
char *  start,
char *  stop,
sopno  startst,
sopno  stopst 
)
static

Definition at line 717 of file engine.c.

717  {
718  states st = m->st;
719  states fresh = m->fresh;
720  states tmp = m->tmp;
721  char *p = start;
722  int c = (start == m->beginp) ? OUT : *(start - 1);
723  int lastc; /* previous c */
724  int flagch;
725  int i;
726  char *coldp; /* last p after which no match was underway */
727 
728  CLEAR(st);
729  SET1(st, startst);
730  st = step(m->g, startst, stopst, st, NOTHING, st);
731  ASSIGN(fresh, st);
732  SP("start", st, *p);
733  coldp = NULL;
734  for (;;) {
735  /* next character */
736  lastc = c;
737  c = (p == m->endp) ? OUT : *p;
738  if (EQ(st, fresh)) {
739  coldp = p;
740  }
741 
742  /* is there an EOL and/or BOL between lastc and c? */
743  flagch = '\0';
744  i = 0;
745  if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
746  (lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
747  flagch = BOL;
748  i = m->g->nbol;
749  }
750  if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
751  (c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
752  flagch = (flagch == BOL) ? BOLEOL : EOL;
753  i += m->g->neol;
754  }
755  if (i != 0) {
756  for (; i > 0; i--)
757  st = step(m->g, startst, stopst, st, flagch, st);
758  SP("boleol", st, c);
759  }
760 
761  /* how about a word boundary? */
762  if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
763  (c != OUT && ISWORD(c))) {
764  flagch = BOW;
765  }
766  if ((lastc != OUT && ISWORD(lastc)) &&
767  (flagch == EOL || (c != OUT && !ISWORD(c)))) {
768  flagch = EOW;
769  }
770  if (flagch == BOW || flagch == EOW) {
771  st = step(m->g, startst, stopst, st, flagch, st);
772  SP("boweow", st, c);
773  }
774 
775  /* are we done? */
776  if (ISSET(st, stopst) || p == stop)
777  break; /* NOTE BREAK OUT */
778 
779  /* no, we must deal with this character */
780  ASSIGN(tmp, st);
781  ASSIGN(st, fresh);
782  if (c == OUT) {
783  break;
784  }
785  st = step(m->g, startst, stopst, tmp, c, st);
786  SP("aft", st, c);
787  ASSIGN(tmp, st);
788  if (!EQ(step(m->g, startst, stopst, tmp, NOTHING, tmp), st)) {
789  break;
790  }
791  p++;
792  }
793 
794  if (coldp) {
795  m->coldp = coldp;
796  if (ISSET(st, stopst))
797  return (p + 1);
798  }
799  return NULL;
800 }
#define BOLEOL
Definition: engine.c:96
#define EOW
Definition: engine.c:99
static states step(struct re_guts *, sopno, sopno, states, int, states)
Definition: engine.c:888
#define BOL
Definition: engine.c:94
#define EOL
Definition: engine.c:95
#define BOW
Definition: engine.c:98
#define NOTHING
Definition: engine.c:97
#define SP(t, s, c)
Definition: engine.c:123
void * p
Definition: libc.cpp:67
#define OUT
Definition: regex2.h:157
#define ASSIGN(d, s)
Definition: regexec.c:109
#define CLEAR(v)
Definition: regexec.c:105
#define SET1(v, n)
Definition: regexec.c:107
#define states
Definition: regexec.c:104
#define ISSET(v, n)
Definition: regexec.c:108
#define EQ(x, y)
#define c(i)
Definition: sha256.c:43

References ASSIGN, BOL, BOLEOL, BOW, c, CLEAR, match::coldp, EOL, EOW, EQ, match::fresh, i, ISSET, ISWORD, regress::m, NOTHING, NULL, OUT, p, RZ_REGEX_NEWLINE, RZ_REGEX_NOTBOL, RZ_REGEX_NOTEOL, SET1, SP, match::st, start, states, step(), and autogen_x86imm::tmp.

Referenced by _r_list_half_split(), _sdb_list_split(), and matcher().

◆ matcher()

static int matcher ( struct re_guts g,
char *  string,
size_t  nmatch,
RzRegexMatch  pmatch[],
int  eflags 
)
static

Definition at line 132 of file engine.c.

133  {
134  char *endp;
135  int i;
136  struct match mv;
137  struct match *m = &mv;
138  char *dp;
139  const sopno gf = g->firststate + 1; /* +1 for OEND */
140  const sopno gl = g->laststate;
141  char *start;
142  char *stop;
143 
144  /* simplify the situation where possible */
145  if (g->cflags & RZ_REGEX_NOSUB)
146  nmatch = 0;
147  if (eflags & RZ_REGEX_STARTEND) {
148  start = string + pmatch[0].rm_so;
149  stop = string + pmatch[0].rm_eo;
150  } else {
151  start = string;
152  stop = start + strlen(start);
153  }
154  if (stop < start)
155  return (RZ_REGEX_INVARG);
156 
157  /* prescreening; this does wonders for this rather slow code */
158  if (g->must != NULL) {
159  for (dp = start; dp < stop; dp++)
160  if (*dp == g->must[0] && stop - dp >= g->mlen &&
161  memcmp(dp, g->must, (size_t)g->mlen) == 0)
162  break;
163  if (dp == stop) /* we didn't find g->must */
164  return (RZ_REGEX_NOMATCH);
165  }
166 
167  /* match struct setup */
168  m->g = g;
169  m->eflags = eflags;
170  m->pmatch = NULL;
171  m->lastpos = NULL;
172  m->offp = string;
173  m->beginp = start;
174  m->endp = stop;
175 
176  if (m->g->nstates * 4 < m->g->nstates)
177  return RZ_REGEX_NOMATCH;
178  STATESETUP(m, 4);
179  SETUP(m->st);
180  SETUP(m->fresh);
181  SETUP(m->tmp);
182  SETUP(m->empty);
183  CLEAR(m->empty);
184 
185  /* this loop does only one repetition except for backrefs */
186  for (;;) {
187  endp = fast(m, start, stop, gf, gl);
188  if (!endp) { /* a miss */
189  free(m->pmatch);
190  free(m->lastpos);
191  STATETEARDOWN(m);
192  return (RZ_REGEX_NOMATCH);
193  }
194  if (nmatch == 0 && !g->backrefs)
195  break; /* no further info needed */
196 
197  /* where? */
198  if (!m->coldp) {
199  break;
200  }
201  for (;;) {
202  NOTE("finding start");
203  endp = slow(m, m->coldp, stop, gf, gl);
204  if (endp || m->coldp > m->endp) {
205  break;
206  }
207  m->coldp++;
208  }
209  if (nmatch == 1 && !g->backrefs)
210  break; /* no further info needed */
211 
212  /* oh my, he wants the subexpressions... */
213  if (!m->pmatch) {
214  if ((m->g->nsub + 1) * sizeof(RzRegexMatch) < m->g->nsub) {
215  return RZ_REGEX_ESPACE;
216  }
217  m->pmatch = (RzRegexMatch *)malloc((m->g->nsub + 1) *
218  sizeof(RzRegexMatch));
219  }
220  if (!m->pmatch) {
221  STATETEARDOWN(m);
222  return (RZ_REGEX_ESPACE);
223  }
224  for (i = 1; i <= m->g->nsub; i++)
225  m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
226  if (!g->backrefs && !(m->eflags & RZ_REGEX_BACKR)) {
227  NOTE("dissecting");
228  dp = dissect(m, m->coldp, endp, gf, gl);
229  } else {
230  if (g->nplus > 0 && !m->lastpos) {
231  if ((g->nplus + 1) * sizeof(char *) < g->nplus) {
232  free(m->pmatch);
233  STATETEARDOWN(m);
234  return RZ_REGEX_ESPACE;
235  }
236  m->lastpos = (char **)malloc((g->nplus + 1) *
237  sizeof(char *));
238  }
239  if (g->nplus > 0 && !m->lastpos) {
240  free(m->pmatch);
241  STATETEARDOWN(m);
242  return (RZ_REGEX_ESPACE);
243  }
244  NOTE("backref dissect");
245  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
246  }
247  if (dp) {
248  break;
249  }
250  /* uh-oh... we couldn't find a subexpression-level match */
251  if (!g->backrefs) { /* must be back references doing it */
252  break;
253  }
254  if (g->nplus || !m->lastpos) {
255  break;
256  }
257  for (;;) {
258  if (dp != NULL || endp <= m->coldp)
259  break; /* defeat */
260  NOTE("backoff");
261  endp = slow(m, m->coldp, endp - 1, gf, gl);
262  if (!endp)
263  break; /* defeat */
264  /* try it on a shorter possibility */
265 #ifndef NDEBUG
266  for (i = 1; i <= m->g->nsub; i++) {
267  if (m->pmatch[i].rm_so != -1) {
268  break;
269  }
270  if (m->pmatch[i].rm_eo != -1) {
271  break;
272  }
273  }
274 #endif
275  NOTE("backoff dissect");
276  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
277  }
278  if (dp != NULL || dp != endp) /* found a shorter one */
279  break;
280 
281  /* despite initial appearances, there is no match here */
282  NOTE("false alarm");
283  if (m->coldp == stop)
284  break;
285  start = m->coldp + 1; /* recycle starting later */
286  }
287 
288  /* fill in the details if requested */
289  if (nmatch > 0) {
290  pmatch[0].rm_so = m->coldp - m->offp;
291  pmatch[0].rm_eo = endp - m->offp;
292  }
293  if (nmatch > 1) {
294  if (m->pmatch) {
295  for (i = 1; i < nmatch; i++) {
296  if (i <= m->g->nsub) {
297  pmatch[i] = m->pmatch[i];
298  } else {
299  pmatch[i].rm_so = -1;
300  pmatch[i].rm_eo = -1;
301  }
302  }
303  }
304  }
305 
306  if (m->pmatch != NULL)
307  free((char *)m->pmatch);
308  if (m->lastpos != NULL)
309  free((char *)m->lastpos);
310  STATETEARDOWN(m);
311  return (0);
312 }
static char * fast(struct match *, char *, char *, sopno, sopno)
Definition: engine.c:717
#define NOTE(s)
Definition: engine.c:125
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
void * malloc(size_t size)
Definition: malloc.c:123
#define SETUP(v)
Definition: regexec.c:123
#define STATESETUP(m, n)
Definition: regexec.c:114
#define STATETEARDOWN(m)
Definition: regexec.c:121
#define RZ_REGEX_ESPACE
Definition: rz_regex.h:44
#define RZ_REGEX_STARTEND
Definition: rz_regex.h:56
struct rz_regmatch_t RzRegexMatch
#define RZ_REGEX_NOSUB
Definition: rz_regex.h:25
#define RZ_REGEX_BACKR
Definition: rz_regex.h:59
#define RZ_REGEX_NOMATCH
Definition: rz_regex.h:33
#define RZ_REGEX_INVARG
Definition: rz_regex.h:48
Definition: engine.c:71
RzRegexMatch * pmatch
Definition: engine.c:74
char * endp
Definition: engine.c:77
char * coldp
Definition: engine.c:78
int eflags
Definition: engine.c:73
st64 rm_so
Definition: rz_regex.h:17
st64 rm_eo
Definition: rz_regex.h:18
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4

References backref(), CLEAR, match::coldp, dissect(), match::eflags, match::endp, fast(), free(), g, i, if(), regress::m, malloc(), NOTE, NULL, match::pmatch, rz_regmatch_t::rm_eo, rz_regmatch_t::rm_so, RZ_REGEX_BACKR, RZ_REGEX_ESPACE, RZ_REGEX_INVARG, RZ_REGEX_NOMATCH, RZ_REGEX_NOSUB, RZ_REGEX_STARTEND, SETUP, slow(), start, STATESETUP, and STATETEARDOWN.

◆ slow()

static char * slow ( struct match m,
char *  start,
char *  stop,
sopno  startst,
sopno  stopst 
)
static

Definition at line 806 of file engine.c.

806  {
807  states st = m->st;
808  states empty = m->empty;
809  states tmp = m->tmp;
810  char *p = start;
811  int c = (start == m->beginp) ? OUT : *(start - 1);
812  int lastc; /* previous c */
813  int flagch;
814  int i;
815  char *matchp; /* last p at which a match ended */
816 
817  AT("slow", start, stop, startst, stopst);
818  CLEAR(st);
819  SET1(st, startst);
820  SP("sstart", st, *p);
821  st = step(m->g, startst, stopst, st, NOTHING, st);
822  matchp = NULL;
823  for (;;) {
824  /* next character */
825  lastc = c;
826  c = (p == m->endp) ? OUT : *p;
827 
828  /* is there an EOL and/or BOL between lastc and c? */
829  flagch = '\0';
830  i = 0;
831  if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
832  (lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
833  flagch = BOL;
834  i = m->g->nbol;
835  }
836  if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
837  (c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
838  flagch = (flagch == BOL) ? BOLEOL : EOL;
839  i += m->g->neol;
840  }
841  if (i != 0) {
842  for (; i > 0; i--)
843  st = step(m->g, startst, stopst, st, flagch, st);
844  SP("sboleol", st, c);
845  }
846 
847  /* how about a word boundary? */
848  if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
849  (c != OUT && ISWORD(c))) {
850  flagch = BOW;
851  }
852  if ((lastc != OUT && ISWORD(lastc)) &&
853  (flagch == EOL || (c != OUT && !ISWORD(c)))) {
854  flagch = EOW;
855  }
856  if (flagch == BOW || flagch == EOW) {
857  st = step(m->g, startst, stopst, st, flagch, st);
858  SP("sboweow", st, c);
859  }
860 
861  /* are we done? */
862  if (ISSET(st, stopst))
863  matchp = p;
864  if (EQ(st, empty) || p == stop)
865  break; /* NOTE BREAK OUT */
866 
867  /* no, we must deal with this character */
868  ASSIGN(tmp, st);
869  ASSIGN(st, empty);
870  if (c == OUT) {
871  break;
872  }
873  st = step(m->g, startst, stopst, tmp, c, st);
874  SP("saft", st, c);
875  if (!EQ(step(m->g, startst, stopst, st, NOTHING, st), st)) {
876  break;
877  }
878  p++;
879  }
880 
881  return (matchp);
882 }

References ASSIGN, AT, BOL, BOLEOL, BOW, c, CLEAR, match::empty, EOL, EOW, EQ, i, ISSET, ISWORD, regress::m, NOTHING, NULL, OUT, p, RZ_REGEX_NEWLINE, RZ_REGEX_NOTBOL, RZ_REGEX_NOTEOL, SET1, SP, match::st, start, states, step(), and autogen_x86imm::tmp.

Referenced by _r_list_half_split(), _sdb_list_split(), dissect(), and matcher().

◆ step()

static states step ( struct re_guts g,
sopno  start,
sopno  stop,
states  bef,
int  ch,
states  aft 
)
static

Definition at line 888 of file engine.c.

894 {
895  cset *cs;
896  sop s;
897  sopno pc;
898  onestate here; /* note, macros know this name */
899  sopno look;
900  int i;
901 
902  for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
903  s = g->strip[pc];
904  switch (OP(s)) {
905  case OEND:
906  break;
907  case OCHAR:
908  /* only characters can match */
909  if (!NONCHAR(ch) || ch != (char)OPND(s)) {
910  if (ch == (char)OPND(s))
911  FWD(aft, bef, 1);
912  }
913  break;
914  case OBOL:
915  if (ch == BOL || ch == BOLEOL)
916  FWD(aft, bef, 1);
917  break;
918  case OEOL:
919  if (ch == EOL || ch == BOLEOL)
920  FWD(aft, bef, 1);
921  break;
922  case OBOW:
923  if (ch == BOW)
924  FWD(aft, bef, 1);
925  break;
926  case OEOW:
927  if (ch == EOW)
928  FWD(aft, bef, 1);
929  break;
930  case OANY:
931  if (!NONCHAR(ch))
932  FWD(aft, bef, 1);
933  break;
934  case OANYOF:
935  cs = &g->sets[OPND(s)];
936  if (!NONCHAR(ch) && CHIN(cs, ch))
937  FWD(aft, bef, 1);
938  break;
939  case OBACK_: /* ignored here */
940  case O_BACK:
941  FWD(aft, aft, 1);
942  break;
943  case OPLUS_: /* forward, this is just an empty */
944  FWD(aft, aft, 1);
945  break;
946  case O_PLUS: /* both forward and back */
947  FWD(aft, aft, 1);
948  i = ISSETBACK(aft, OPND(s));
949  BACK(aft, aft, OPND(s));
950  if (!i && ISSETBACK(aft, OPND(s))) {
951  /* oho, must reconsider loop body */
952  pc -= OPND(s) + 1;
953  INIT(here, pc);
954  }
955  break;
956  case OQUEST_: /* two branches, both forward */
957  FWD(aft, aft, 1);
958  FWD(aft, aft, OPND(s));
959  break;
960  case O_QUEST: /* just an empty */
961  FWD(aft, aft, 1);
962  break;
963  case OLPAREN: /* not significant here */
964  case ORPAREN:
965  FWD(aft, aft, 1);
966  break;
967  case OCH_: /* mark the first two branches */
968  FWD(aft, aft, 1);
969  if ((OP(g->strip[pc + OPND(s)]) != OOR2)) {
970  break;
971  }
972  FWD(aft, aft, OPND(s));
973  break;
974  case OOR1: /* done a branch, find the O_CH */
975  if (ISSTATEIN(aft, here)) {
976  for (look = 1;
977  OP(s = g->strip[pc + look]) != O_CH;
978  look += OPND(s)) {
979  if (OP(s) != OOR2) {
980  break;
981  }
982  }
983  FWD(aft, aft, look);
984  }
985  break;
986  case OOR2: /* propagate OCH_'s marking */
987  FWD(aft, aft, 1);
988  if (OP(g->strip[pc + OPND(s)]) != O_CH) {
989  if (OP(g->strip[pc + OPND(s)]) == OOR2) {
990  FWD(aft, aft, OPND(s));
991  }
992  }
993  break;
994  case O_CH: /* just empty */
995  FWD(aft, aft, 1);
996  break;
997  default: /* ooooops... */
998  eprintf("ops in regex.c\n");
999  break;
1000  }
1001  }
1002 
1003  return (aft);
1004 }
#define NONCHAR(c)
Definition: engine.c:101
#define INC
#define onestate
Definition: regexec.c:124
#define ISSTATEIN(v, o)
Definition: regexec.c:127
#define BACK(dst, src, n)
Definition: regexec.c:131
#define FWD(dst, src, n)
Definition: regexec.c:130
#define ISSETBACK(v, n)
Definition: regexec.c:132
#define INIT(o, n)
Definition: regexec.c:125
#define eprintf(x, y...)
Definition: rlcc.c:7

References BACK, BOL, BOLEOL, BOW, CHIN, test_evm::cs, EOL, EOW, eprintf, FWD, g, i, INC, INIT, ISSETBACK, ISSTATEIN, NONCHAR, O_BACK, O_CH, O_PLUS, O_QUEST, OANY, OANYOF, OBACK_, OBOL, OBOW, OCH_, OCHAR, OEND, OEOL, OEOW, OLPAREN, onestate, OOR1, OOR2, OP, OPLUS_, OPND, OQUEST_, ORPAREN, pc, s, and start.

Referenced by _pointer_table(), annotated_hexdump(), BMK_findMaxMem(), cmd_print_bars(), combine_sequences(), DEFINE_HANDLE_TS_FCN_AND_SYMBOL(), fast(), LZ4_compress_generic_validated(), LZ4HC_InsertAndGetWiderMatch(), main(), qnxr_send_vcont(), rz_big_and(), rz_big_or(), rz_big_xor(), rz_cmd_help(), rz_core_analysis_get_stats(), rz_core_cmd_foreach(), rz_core_print_hexdump(), rz_core_write_seq_at(), rz_print_fill(), rz_print_hexdump_str(), rz_print_hexii(), rz_range_percent(), rz_write_op_sequence_handler(), slow(), ts_query__add_negated_fields(), ts_query__analyze_patterns(), ts_query__parse_pattern(), ts_query__step_is_fallible(), ts_query_cursor__add_state(), ts_query_cursor__advance(), ts_query_cursor__capture(), ts_query_cursor__first_in_progress_capture(), ts_query_disable_capture(), and ts_query_new().