rz-bindgen/doxygen/regcomp_8c_source.html

 /*  $OpenBSD: regcomp.c,v 1.20 2010/11/21 00:02:30 tedu Exp $ */

 /*-

  * Copyright (c) 1992, 1993, 1994 Henry Spencer.

  * Copyright (c) 1992, 1993, 1994

  *  The Regents of the University of California.  All rights reserved.

  *

  * This code is derived from software contributed to Berkeley by

  * Henry Spencer.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions

  * are met:

  * 1. Redistributions of source code must retain the above copyright

  *    notice, this list of conditions and the following disclaimer.

  * 2. Redistributions in binary form must reproduce the above copyright

  *    notice, this list of conditions and the following disclaimer in the

  *    documentation and/or other materials provided with the distribution.

  * 3. Neither the name of the University nor the names of its contributors

  *    may be used to endorse or promote products derived from this software

  *    without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

  * SUCH DAMAGE.

  *

  *  @(#)regcomp.c   8.5 (Berkeley) 3/20/94

  */


 #include <sys/types.h>

 #include <stdio.h>

 #include <string.h>

 #include <ctype.h>

 #include <limits.h>

 #include <stdlib.h>

 #include "rz_regex.h"

 #include "rz_util/rz_str.h"

 #include "rz_util/rz_assert.h"


 #include "utils.h"

 #include "regex2.h"


 #include "cclass.h"

 #include "cname.h"


 /*

  * parse structure, passed up and down to avoid global variables and

  * other clumsinesses

  */

 struct parse {

     char *next; /* next character in RE */

     char *end; /* end of string (-> NUL normally) */

     int error; /* has an error been seen? */

     sop *strip; /* malloced strip */

     sopno ssize; /* malloced strip size (allocated) */

     sopno slen; /* malloced strip length (used) */

     int ncsalloc; /* number of csets allocated */

     struct re_guts *g;

 #define NPAREN 10 /* we need to remember () 1-9 for back refs */

     sopno pbegin[NPAREN]; /* -> ( ([0] unused) */

     sopno pend[NPAREN]; /* -> ) ([0] unused) */

 };


 static void p_ere(struct parse *, int);

 static void p_ere_exp(struct parse *);

 static void p_str(struct parse *);

 static void p_bre(struct parse *, int, int);

 static int p_simp_re(struct parse *, int);

 static int p_count(struct parse *);

 static void p_bracket(struct parse *);

 static void p_b_term(struct parse *, cset *);

 static void p_b_cclass(struct parse *, cset *);

 static void p_b_eclass(struct parse *, cset *);

 static char p_b_symbol(struct parse *);

 static char p_b_coll_elem(struct parse *, int);

 static char othercase(int);

 static void bothcases(struct parse *, int);

 static void ordinary(struct parse *, int);

 static void special(struct parse *, int);

 static void nonnewline(struct parse *);

 static void repeat(struct parse *, sopno, int, int);

 static int seterr(struct parse *, int);

 static cset *allocset(struct parse *);

 static void freeset(struct parse *, cset *);

 static int freezeset(struct parse *, cset *);

 static int firstch(struct parse *, cset *);

 static int nch(struct parse *, cset *);

 static void mcadd(struct parse *, cset *, char *);

 static void mcinvert(struct parse *, cset *);

 static void mccase(struct parse *, cset *);

 static int isinsets(struct re_guts *, int);

 static int samesets(struct re_guts *, int, int);

 static void categorize(struct parse *, struct re_guts *);

 static sopno dupl(struct parse *, sopno, sopno);

 static void doemit(struct parse *, sop, size_t);

 static void doinsert(struct parse *, sop, size_t, sopno);

 static void dofwd(struct parse *, sopno, sop);

 static void enlarge(struct parse *, sopno);

 static void stripsnug(struct parse *, struct re_guts *);

 static void findmust(struct parse *, struct re_guts *);

 static sopno pluscount(struct parse *, struct re_guts *);


 static char nuls[10]; /* place to point scanner in event of error */


 /*

  * macros for use with parse structure

  * BEWARE:  these know that the parse structure is named `p' !!!

  */

 #define PEEK()           (*p->next)

 #define PEEK2()          (*(p->next + 1))

 #define MORE()           (p->next < p->end)

 #define MORE2()          (p->next + 1 < p->end)

 #define SEE(c)           (MORE() && PEEK() == (c))

 #define SEETWO(a, b)     (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))

 #define EAT(c)           ((SEE(c)) ? (NEXT(), 1) : 0)

 #define EATTWO(a, b)     ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)

 #define NEXT()           (p->next++)

 #define NEXT2()          (p->next += 2)

 #define NEXTn(n)         (p->next += (n))

 #define GETNEXT()        (*p->next++)

 #define SETERROR(e)      seterr(p, (e))

 #define REQUIRE(co, e)   (void)((co) || SETERROR(e))

 #define MUSTSEE(c, e)    (REQUIRE(MORE() && PEEK() == (c), e))

 #define MUSTEAT(c, e)    (REQUIRE(MORE() && GETNEXT() == (c), e))

 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))

 #define EMIT(op, sopnd)  doemit(p, (sop)(op), (size_t)(sopnd))

 #define INSERT(op, pos)  doinsert(p, (sop)(op), HERE() - (pos) + 1, pos)

 #define AHEAD(pos)       dofwd(p, pos, HERE() - (pos))

 #define ASTERN(sop, pos) EMIT(sop, HERE() - (pos))

 #define HERE()           (p->slen)

 #define THERE()          (p->slen - 1)

 #define THERETHERE()     (p->slen - 2)

 #define DROP(n)          (p->slen -= (n))


 RZ_API int rz_regex_match(const char *pattern, const char *flags, const char *text) {

     int ret;

     RzRegex rx;

     int re_flags = rz_regex_flags(flags);

     if (rz_regex_comp(&rx, pattern, re_flags)) {

         eprintf("FAIL TO COMPILE %s\n", pattern);

         return 0;

     }

     ret = rz_regex_exec(&rx, text, 0, 0, re_flags);

     rz_regex_fini(&rx);

     return ret ? 0 : 1;

 }


 RZ_API RzList *rz_regex_get_match_list(const char *pattern, const char *flags, const char *text) {

     RzList *list = rz_list_newf(free);

     RzRegex rx;

     RzRegexMatch match;

     char *entry;

     size_t entry_len = 0;

     int re_flags = rz_regex_flags(flags);

     if (rz_regex_comp(&rx, pattern, re_flags)) {

         eprintf("Failed to compile regexp: %s\n", pattern);

         return NULL;

     }


     /* Initialize the boundaries for RZ_REGEX_STARTEND */

     match.rm_so = 0;

     match.rm_eo = strlen(text);

     while (!rz_regex_exec(&rx, text, 1, &match, re_flags | RZ_REGEX_STARTEND)) {

         entry_len = match.rm_eo - match.rm_so + 1;

         entry = RZ_NEWS0(char, entry_len);

         rz_str_ncpy(entry, text + match.rm_so, entry_len);

         rz_list_append(list, entry);

         /* Update the boundaries for RZ_REGEX_STARTEND */

         match.rm_so = match.rm_eo;

         match.rm_eo = strlen(text);

     }

     rz_regex_fini(&rx);

     return list;

 }


 RZ_API RzRegex *rz_regex_new(const char *pattern, const char *flags) {

     rz_return_val_if_fail(pattern, NULL);

     RzRegex *r, rx = { 0 };

     if (rz_regex_comp(&rx, pattern, rz_regex_flags(flags))) {

         return NULL;

     }

     r = RZ_NEW(RzRegex);

     if (!r) {

         return NULL;

     }

     memcpy(r, &rx, sizeof(RzRegex));

     return r;

 }


 RZ_API int rz_regex_flags(const char *f) {

     int flags = 0;

     if (!f || !*f) {

         return 0;

     }

     if (strchr(f, 'e')) {

         flags |= RZ_REGEX_EXTENDED;

     }

     if (strchr(f, 'i')) {

         flags |= RZ_REGEX_ICASE;

     }

     if (strchr(f, 's')) {

         flags |= RZ_REGEX_NOSUB;

     }

     if (strchr(f, 'n')) {

         flags |= RZ_REGEX_NEWLINE;

     }

     if (strchr(f, 'N')) {

         flags |= RZ_REGEX_NOSPEC;

     }

     if (strchr(f, 'p')) {

         flags |= RZ_REGEX_PEND;

     }

     if (strchr(f, 'd')) {

         flags |= RZ_REGEX_DUMP;

     }

     return flags;

 }


 RZ_API void rz_regex_fini(RzRegex *preg) {

     struct re_guts *g;

     if (!preg) {

         return;

     }

     if (preg->re_magic != MAGIC1) { /* oops */

         return; /* nice to complain, but hard */

     }


     g = preg->re_g;

     if (!g || g->magic != MAGIC2) { /* oops again */

         return;

     }

     preg->re_magic = 0; /* mark it invalid */

     g->magic = 0; /* mark it invalid */


     free(g->strip);

     free(g->sets);

     free(g->setbits);

     free(g->must);

     free(g);

 }


 RZ_API void rz_regex_free(RzRegex *preg) {

     rz_regex_fini(preg);

     free(preg);

 }


 /*

  - regcomp - interface for parser and compilation

  - 0 success, otherwise RZ_REGEX_something

  */

 RZ_API int rz_regex_comp(RzRegex *preg, const char *pattern, int cflags) {

     struct parse pa;

     struct re_guts *g;

     struct parse *p = &pa;

     int i;

     size_t len;

 #ifdef REDEBUG

 #define GOODFLAGS(f) (f)

 #else

 #define GOODFLAGS(f) ((f) & ~RZ_REGEX_DUMP)

 #endif

     cflags = GOODFLAGS(cflags);

     if (!preg || ((cflags & RZ_REGEX_EXTENDED) && (cflags & RZ_REGEX_NOSPEC))) {

         return RZ_REGEX_INVARG;

     }

     if (cflags & RZ_REGEX_PEND) {

         if (preg->re_endp < pattern) {

             return RZ_REGEX_INVARG;

         }

         len = preg->re_endp - pattern;

     } else {

         len = strlen((char *)pattern);

     }

     /* do the mallocs early so failure handling is easy */

     g = calloc(1, sizeof(struct re_guts) + (NC - 1));

     if (!g) {

         return RZ_REGEX_ESPACE;

     }

     /*

      * Limit the pattern space to avoid a 32-bit overflow on buffer

      * extension.  Also avoid any signed overflow in case of conversion

      * so make the real limit based on a 31-bit overflow.

      *

      * Likely not applicable on 64-bit systems but handle the case

      * generically (who are we to stop people from using ~715MB+

      * patterns?).

      */

     size_t maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;

     if (len >= maxlen) {

         free(g);

         return RZ_REGEX_ESPACE;

     }

     preg->re_flags = cflags;

     p->ssize = len / (size_t)2 * (size_t)3 + (size_t)1; /* ugh */

     if (p->ssize < len) {

         free(g);

         return RZ_REGEX_ESPACE;

     }


     p->strip = (sop *)calloc(p->ssize, sizeof(sop));

     if (!p->strip) {

         free(g);

         return RZ_REGEX_ESPACE;

     }

     p->slen = 0;

     if (!p->strip) {

         free(g);

         return RZ_REGEX_ESPACE;

     }


     /* set things up */

     p->g = g;

     p->next = (char *)pattern; /* convenience; we do not modify it */

     p->end = p->next + len;

     p->error = 0;

     p->ncsalloc = 0;

     for (i = 0; i < NPAREN; i++) {

         p->pbegin[i] = 0;

         p->pend[i] = 0;

     }

     g->csetsize = NC;

     g->sets = NULL;

     g->setbits = NULL;

     g->ncsets = 0;

     g->cflags = cflags;

     g->iflags = 0;

     g->nbol = 0;

     g->neol = 0;

     g->must = NULL;

     g->mlen = 0;

     g->nsub = 0;

     g->ncategories = 1; /* category 0 is "everything else" */

     g->categories = &g->catspace[-(CHAR_MIN)];

     (void)memset((char *)g->catspace, 0, NC * sizeof(cat_t));

     g->backrefs = 0;


     /* do it */

     EMIT(OEND, 0);

     g->firststate = THERE();

     if (cflags & RZ_REGEX_EXTENDED) {

         p_ere(p, OUT);

     } else if (cflags & RZ_REGEX_NOSPEC) {

         p_str(p);

     } else {

         p_bre(p, OUT, OUT);

     }

     EMIT(OEND, 0);

     g->laststate = THERE();


     /* tidy up loose ends and fill things in */

     categorize(p, g);

     stripsnug(p, g);

     findmust(p, g);

     g->nplus = pluscount(p, g);

     g->magic = MAGIC2;

     preg->re_nsub = g->nsub;

     preg->re_g = g;

     preg->re_magic = MAGIC1;

 #ifndef REDEBUG

     /* not debugging, so can't rely on the asssert() in regexec() */

     if (g->iflags & BAD) {

         SETERROR(RZ_REGEX_ASSERT);

     }

 #endif

     if (p->error) {

         rz_regex_fini(preg);

     }

     return p->error;

 }


 /*

  - p_ere - ERE parser top level, concatenation and alternation

  */

 static void p_ere(struct parse *p, int stop) { /* character this ERE should end at */

     bool isFirst = true;

     sopno prevback = 0;

     sopno prevfwd = 0;

     sopno conc = 0;

     char c;


     for (;;) {

         /* do a bunch of concatenated expressions */

         conc = HERE();

         while (MORE() && (c = PEEK()) != '|' && c != stop) {

             p_ere_exp(p);

         }

         REQUIRE(HERE() != conc, RZ_REGEX_EMPTY); /* require nonempty */


         if (!EAT('|')) {

             break; /* NOTE BREAK OUT */

         }

         if (isFirst) {

             INSERT(OCH_, conc); /* offset is wrong */

             prevfwd = conc;

             prevback = conc;

             isFirst = false;

         }

         ASTERN(OOR1, prevback);

         prevback = THERE();

         AHEAD(prevfwd); /* fix previous offset */

         prevfwd = HERE();

         EMIT(OOR2, 0); /* offset is very wrong */

     }


     if (!isFirst) { /* tail-end fixups */

         AHEAD(prevfwd);

         ASTERN(O_CH, prevback);

     }

     // asert(!MORE() || SEE(stop));

 }


 /*

  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op

  */

 static void p_ere_exp(struct parse *p) {

     char c;

     sopno pos;

     int count;

     int count2;

     sopno subno;

     int wascaret = 0;


     if (!MORE()) { /* caller should have ensured this */

         return;

     }

     c = GETNEXT();


     pos = HERE();

     switch (c) {

     case '(':

         REQUIRE(MORE(), RZ_REGEX_EPAREN);

         p->g->nsub++;

         subno = p->g->nsub;

         if (subno < NPAREN) {

             p->pbegin[subno] = HERE();

         }

         EMIT(OLPAREN, subno);

         if (!SEE(')')) {

             p_ere(p, ')');

         }

         if (subno < NPAREN) {

             p->pend[subno] = HERE();

             if (!p->pend[subno]) {

                 break;

             }

         }

         EMIT(ORPAREN, subno);

         MUSTEAT(')', RZ_REGEX_EPAREN);

         break;

     case '^':

         EMIT(OBOL, 0);

         p->g->iflags |= USEBOL;

         p->g->nbol++;

         wascaret = 1;

         break;

     case '$':

         EMIT(OEOL, 0);

         p->g->iflags |= USEEOL;

         p->g->neol++;

         break;

     case '|':

         SETERROR(RZ_REGEX_EMPTY);

         break;

     case '*':

     case '+':

     case '?':

         SETERROR(RZ_REGEX_BADRPT);

         break;

     case '.':

         if (p->g->cflags & RZ_REGEX_NEWLINE) {

             nonnewline(p);

         } else {

             EMIT(OANY, 0);

         }

         break;

     case '[':

         p_bracket(p);

         break;

     case '\\':

         REQUIRE(MORE(), RZ_REGEX_EESCAPE);

         c = GETNEXT();

         if (!isalpha(c)) {

             ordinary(p, c);

         } else {

             special(p, c);

         }

         break;

     case '{': /* okay as ordinary except if digit follows */

         REQUIRE(!MORE() || !isdigit((ut8)PEEK()), RZ_REGEX_BADRPT);

         /* FALLTHROUGH */

     default:

         ordinary(p, c);

         break;

     }


     if (!MORE()) {

         return;

     }

     c = PEEK();

     /* we call { a repetition if followed by a digit */

     if (!(c == '*' || c == '+' || c == '?' ||

             (c == '{' && MORE2() && isdigit((ut8)PEEK2())))) {

         return; /* no repetition, we're done */

     }

     NEXT();


     REQUIRE(!wascaret, RZ_REGEX_BADRPT);

     switch (c) {

     case '*': /* implemented as +? */

         /* this case does not require the (y|) trick, noKLUDGE */

         INSERT(OPLUS_, pos);

         ASTERN(O_PLUS, pos);

         INSERT(OQUEST_, pos);

         ASTERN(O_QUEST, pos);

         break;

     case '+':

         INSERT(OPLUS_, pos);

         ASTERN(O_PLUS, pos);

         break;

     case '?':

         /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */

         INSERT(OCH_, pos); /* offset slightly wrong */

         ASTERN(OOR1, pos); /* this one's right */

         AHEAD(pos); /* fix the OCH_ */

         EMIT(OOR2, 0); /* offset very wrong... */

         AHEAD(THERE()); /* ...so fix it */

         ASTERN(O_CH, THERETHERE());

         break;

     case '{':

         count = p_count(p);

         if (EAT(',')) {

             if (isdigit((ut8)PEEK())) {

                 count2 = p_count(p);

                 REQUIRE(count <= count2, RZ_REGEX_BADBR);

             } else { /* single number with comma */

                 count2 = INTFINITY;

             }

         } else { /* just a single number */

             count2 = count;

         }

         repeat(p, pos, count, count2);

         if (!EAT('}')) { /* error heuristics */

             while (MORE() && PEEK() != '}') {

                 NEXT();

             }

             REQUIRE(MORE(), RZ_REGEX_EBRACE);

             SETERROR(RZ_REGEX_BADBR);

         }

         break;

     }


     if (!MORE()) {

         return;

     }

     c = PEEK();

     if (!(c == '*' || c == '+' || c == '?' ||

             (c == '{' && MORE2() && isdigit((ut8)PEEK2())))) {

         return;

     }

     SETERROR(RZ_REGEX_BADRPT);

 }


 /*

  - p_str - string (no metacharacters) "parser"

  */

 static void p_str(struct parse *p) {

     REQUIRE(MORE(), RZ_REGEX_EMPTY);

     while (MORE()) {

         ordinary(p, GETNEXT());

     }

 }


 /*

  - p_bre - BRE parser top level, anchoring and concatenation

  * Giving end1 as OUT essentially eliminates the end1/end2 check.

  *

  * This implementation is a bit of a kludge, in that a trailing $ is first

  * taken as an ordinary character and then revised to be an anchor.  The

  * only undesirable side effect is that '$' gets included as a character

  * category in such cases.  This is fairly harmless; not worth fixing.

  * The amount of lookahead needed to avoid this kludge is excessive.

  */

 static void p_bre(struct parse *p,

     int end1, /* first terminating character */

     int end2) /* second terminating character */

 {

     sopno start = HERE();

     int first = 1; /* first subexpression? */

     int wasdollar = 0;


     if (EAT('^')) {

         EMIT(OBOL, 0);

         p->g->iflags |= USEBOL;

         p->g->nbol++;

     }

     while (MORE() && !SEETWO(end1, end2)) {

         wasdollar = p_simp_re(p, first);

         first = 0;

     }

     if (wasdollar) { /* oops, that was a trailing anchor */

         DROP(1);

         EMIT(OEOL, 0);

         p->g->iflags |= USEEOL;

         p->g->neol++;

     }


     REQUIRE(HERE() != start, RZ_REGEX_EMPTY); /* require nonempty */

 }


 /*

  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition

  */

 static int /* was the simple RE an unbackslashed $? */

 p_simp_re(struct parse *p,

     int starordinary) /* is a leading * an ordinary character? */

 {

     int c;

     int count;

     int count2;

     sopno pos;

     int i;

     sopno subno;

 #define BACKSL (1 << CHAR_BIT)


     pos = HERE(); /* repetion op, if any, covers from here */


     if (!MORE()) { /* caller should have ensured this */

         return 0;

     }

     c = GETNEXT();

     if (c == '\\') {

         REQUIRE(MORE(), RZ_REGEX_EESCAPE);

         c = BACKSL | GETNEXT();

     }

     switch (c) {

     case '.':

         if (p->g->cflags & RZ_REGEX_NEWLINE) {

             nonnewline(p);

         } else {

             EMIT(OANY, 0);

         }

         break;

     case '[':

         p_bracket(p);

         break;

     case BACKSL | '{':

         SETERROR(RZ_REGEX_BADRPT);

         break;

     case BACKSL | '(':

         p->g->nsub++;

         subno = p->g->nsub;

         if (subno < NPAREN) {

             p->pbegin[subno] = HERE();

         }

         EMIT(OLPAREN, subno);

         /* the MORE here is an error heuristic */

         if (MORE() && !SEETWO('\\', ')')) {

             p_bre(p, '\\', ')');

         }

         if (subno < NPAREN) {

             p->pend[subno] = HERE();

             if (!p->pend[subno]) {

                 break;

             }

         }

         EMIT(ORPAREN, subno);

         REQUIRE(EATTWO('\\', ')'), RZ_REGEX_EPAREN);

         break;

     case BACKSL | ')': /* should not get here -- must be user */

     case BACKSL | '}':

         SETERROR(RZ_REGEX_EPAREN);

         break;

     case BACKSL | '1':

     case BACKSL | '2':

     case BACKSL | '3':

     case BACKSL | '4':

     case BACKSL | '5':

     case BACKSL | '6':

     case BACKSL | '7':

     case BACKSL | '8':

     case BACKSL | '9':

         i = (c & ~BACKSL) - '0';

         if (p->pend[i] != 0) {

             if (i <= p->g->nsub) {

                 EMIT(OBACK_, i);

                 if (p->pbegin[i] != 0 && OP(p->strip[p->pbegin[i]]) == OLPAREN &&

                     OP(p->strip[p->pend[i]]) == ORPAREN) {

                     (void)dupl(p, p->pbegin[i] + 1, p->pend[i]);

                     EMIT(O_BACK, i);

                 }

             }

         } else {

             SETERROR(RZ_REGEX_ESUBREG);

         }

         p->g->backrefs = 1;

         break;

     case '*':

         REQUIRE(starordinary, RZ_REGEX_BADRPT);

         /* FALLTHROUGH */

     default:

         ordinary(p, (char)c);

         break;

     }


     if (EAT('*')) { /* implemented as +? */

         /* this case does not require the (y|) trick, noKLUDGE */

         INSERT(OPLUS_, pos);

         ASTERN(O_PLUS, pos);

         INSERT(OQUEST_, pos);

         ASTERN(O_QUEST, pos);

     } else if (EATTWO('\\', '{')) {

         count = p_count(p);

         if (EAT(',')) {

             if (MORE() && isdigit((ut8)PEEK())) {

                 count2 = p_count(p);

                 REQUIRE(count <= count2, RZ_REGEX_BADBR);

             } else { /* single number with comma */

                 count2 = INTFINITY;

             }

         } else { /* just a single number */

             count2 = count;

         }

         repeat(p, pos, count, count2);

         if (!EATTWO('\\', '}')) { /* error heuristics */

             while (MORE() && !SEETWO('\\', '}')) {

                 NEXT();

             }

             REQUIRE(MORE(), RZ_REGEX_EBRACE);

             SETERROR(RZ_REGEX_BADBR);

         }

     } else if (c == '$') { /* $ (but not \$) ends it */

         return (1);

     }


     return (0);

 }


 /*

  - p_count - parse a repetition count

  */

 static int /* the value */

 p_count(struct parse *p) {

     int count = 0;

     int ndigits = 0;


     while (MORE() && isdigit((ut8)PEEK()) && count <= DUPMAX) {

         count = count * 10 + (GETNEXT() - '0');

         ndigits++;

     }


     REQUIRE(ndigits > 0 && count <= DUPMAX, RZ_REGEX_BADBR);

     return (count);

 }


 /*

  - p_bracket - parse a bracketed character list

  *

  * Note a significant property of this code:  if the allocset() did SETERROR,

  * no set operations are done.

  */

 static void p_bracket(struct parse *p) {

     cset *cs;

     int invert = 0;


     /* Dept of Truly Sickening Special-Case Kludges */

     if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {

         EMIT(OBOW, 0);

         NEXTn(6);

         return;

     }

     if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {

         EMIT(OEOW, 0);

         NEXTn(6);

         return;

     }


     if (!(cs = allocset(p))) {

         /* allocset did set error status in p */

         return;

     }


     if (EAT('^')) {

         invert++; /* make note to invert set at end */

     }

     if (EAT(']')) {

         CHadd(cs, ']');

     } else if (EAT('-')) {

         CHadd(cs, '-');

     }

     while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) {

         p_b_term(p, cs);

     }

     if (EAT('-')) {

         CHadd(cs, '-');

     }

     MUSTEAT(']', RZ_REGEX_EBRACK);


     if (p->error != 0) { /* don't mess things up further */

         freeset(p, cs);

         return;

     }


     if (p->g->cflags & RZ_REGEX_ICASE) {

         int i;

         int ci;


         for (i = p->g->csetsize - 1; i >= 0; i--) {

             if (CHIN(cs, i) && isalpha(i)) {

                 ci = othercase(i);

                 if (ci != i) {

                     CHadd(cs, ci);

                 }

             }

         }

         if (cs->multis != NULL) {

             mccase(p, cs);

         }

     }

     if (invert) {

         int i;


         for (i = p->g->csetsize - 1; i >= 0; i--) {

             if (CHIN(cs, i)) {

                 CHsub(cs, i);

             } else {

                 CHadd(cs, i);

             }

         }

         if (p->g->cflags & RZ_REGEX_NEWLINE) {

             CHsub(cs, '\n');

         }

         if (cs->multis != NULL) {

             mcinvert(p, cs);

         }

     }


     if (cs->multis) { /* xxx */

         return;

     }


     if (nch(p, cs) == 1) { /* optimize singleton sets */

         ordinary(p, firstch(p, cs));

         freeset(p, cs);

     } else {

         EMIT(OANYOF, freezeset(p, cs));

     }

 }


 /*

  - p_b_term - parse one term of a bracketed character list

  */

 static void p_b_term(struct parse *p, cset *cs) {

     char c;

     char start = 0, finish;

     int i;


     /* classify what we've got */

     switch ((MORE()) ? PEEK() : '\0') {

     case '[':

         c = (MORE2()) ? PEEK2() : '\0';

         break;

     case '-':

         SETERROR(RZ_REGEX_ERANGE);

         return; /* NOTE RETURN */

         break;

     default:

         c = '\0';

         break;

     }


     switch (c) {

     case ':': /* character class */

         NEXT2();

         REQUIRE(MORE(), RZ_REGEX_EBRACK);

         c = PEEK();

         REQUIRE(c != '-' && c != ']', RZ_REGEX_ECTYPE);

         p_b_cclass(p, cs);

         REQUIRE(MORE(), RZ_REGEX_EBRACK);

         REQUIRE(EATTWO(':', ']'), RZ_REGEX_ECTYPE);

         break;

     case '=': /* equivalence class */

         NEXT2();

         REQUIRE(MORE(), RZ_REGEX_EBRACK);

         c = PEEK();

         REQUIRE(c != '-' && c != ']', RZ_REGEX_ECOLLATE);

         p_b_eclass(p, cs);

         REQUIRE(MORE(), RZ_REGEX_EBRACK);

         REQUIRE(EATTWO('=', ']'), RZ_REGEX_ECOLLATE);

         break;

     default: /* symbol, ordinary character, or range */

         /* xxx revision needed for multichar stuff */

         start = p_b_symbol(p);

         if (SEE('-') && MORE2() && PEEK2() != ']') {

             /* range */

             NEXT();

             if (EAT('-')) {

                 finish = '-';

             } else {

                 finish = p_b_symbol(p);

             }

         } else {

             finish = start;

         }

         /* xxx what about signed chars here... */

         REQUIRE(start <= finish, RZ_REGEX_ERANGE);

         for (i = start; i <= finish; i++) {

             CHadd(cs, i);

         }

         break;

     }

 }


 /*

  - p_b_cclass - parse a character-class name and deal with it

  */

 static void p_b_cclass(struct parse *p, cset *cs) {

     char *sp = p->next;

     struct cclass *cp;

     size_t len;

     char *u;

     char c;


     while (MORE() && isalpha((unsigned char)PEEK())) {

         NEXT();

     }

     len = p->next - sp;

     for (cp = cclasses; cp->name != NULL; cp++) {

         if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') {

             break;

         }

     }

     if (!cp->name) {

         /* oops, didn't find it */

         SETERROR(RZ_REGEX_ECTYPE);

         return;

     }


     u = cp->chars;

     while ((c = *u++) != '\0') {

         CHadd(cs, c);

     }

     for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) {

         MCadd(p, cs, u);

     }

 }


 /*

  - p_b_eclass - parse an equivalence-class name and deal with it

  *

  * This implementation is incomplete. xxx

  */

 static void p_b_eclass(struct parse *p, cset *cs) {

     char c;


     c = p_b_coll_elem(p, '=');

     CHadd(cs, c);

 }


 /*

  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol

  */

 static char /* value of symbol */

 p_b_symbol(struct parse *p) {

     char value;


     REQUIRE(MORE(), RZ_REGEX_EBRACK);

     if (!EATTWO('[', '.')) {

         return (GETNEXT());

     }


     /* collating symbol */

     value = p_b_coll_elem(p, '.');

     REQUIRE(EATTWO('.', ']'), RZ_REGEX_ECOLLATE);

     return (value);

 }


 /*

  - p_b_coll_elem - parse a collating-element name and look it up

  */

 static char /* value of collating element */

 p_b_coll_elem(struct parse *p,

     int endc) /* name ended by endc,']' */

 {

     char *sp = p->next;

     struct cname *cp;

     int len;


     while (MORE() && !SEETWO(endc, ']')) {

         NEXT();

     }

     if (!MORE()) {

         SETERROR(RZ_REGEX_EBRACK);

         return (0);

     }

     len = p->next - sp;

     for (cp = cnames; cp->name != NULL; cp++) {

         if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') {

             return (cp->code); /* known name */

         }

     }

     if (len == 1) {

         return (*sp); /* single character */

     }

     SETERROR(RZ_REGEX_ECOLLATE); /* neither */

     return (0);

 }


 /*

  - othercase - return the case counterpart of an alphabetic

  */

 static char /* if no counterpart, return ch */

 othercase(int ch) {

     ch = (ut8)ch;

     if (isalpha(ch)) {

         if (isupper(ch)) {

             return ((ut8)tolower(ch));

         } else if (islower(ch)) {

             return ((ut8)toupper(ch));

         } else { /* peculiar, but could happen */

             return (ch);

         }

     }

     return ch;

 }


 /*

  - bothcases - emit a dualcase version of a two-case character

  *

  * Boy, is this implementation ever a kludge...

  */

 static void bothcases(struct parse *p, int ch) {

     char *oldnext = p->next;

     char *oldend = p->end;

     char bracket[3];


     ch = (ut8)ch;

     if (othercase(ch) != ch) { /* p_bracket() would recurse */

         p->next = bracket;

         p->end = bracket + 2;

         bracket[0] = ch;

         bracket[1] = ']';

         bracket[2] = '\0';

         p_bracket(p);

         if (p->next == bracket + 2) {

             p->next = oldnext;

             p->end = oldend;

         }

     }

 }


 /*

  - ordinary - emit an ordinary character

  */

 static void

 ordinary(struct parse *p, int ch) {

     cat_t *cap = p->g->categories;


     if ((p->g->cflags & RZ_REGEX_ICASE) && isalpha((ut8)ch) && othercase(ch) != ch) {

         bothcases(p, ch);

     } else {

         EMIT(OCHAR, (ut8)ch);

         if (cap[ch] == 0) {

             cap[ch] = p->g->ncategories++;

         }

     }

 }


 static void

 special(struct parse *p, int ch) {

     char *oldnext = p->next;

     char *oldend = p->end;

     char bracket[16] = { 0 };

     char digits[3] = { 0 };

     char c;

     int num = 0;

     switch (ch) {

     case 'x':

         digits[0] = GETNEXT();

         digits[1] = GETNEXT();

         c = (char)strtol(digits, NULL, 16);

         ordinary(p, c);

         return;

     case 'n':

         ordinary(p, '\n');

         return;

     case 't':

         ordinary(p, '\t');

         return;

     case 'r':

         ordinary(p, '\r');

         return;

     case 's':

         num = 5;

         memcpy(bracket, "\t\r\n ]", num);

         break;

     case 'd':

         num = 4;

         memcpy(bracket, "0-9]", num);

         break;

     case 'w':

         num = 4;

         memcpy(bracket, "a-z]", num);

         break;

     default:

         SETERROR(RZ_REGEX_INVARG);

         return;

     }


     p->next = bracket;

     p->end = bracket + num;


     p_bracket(p);


     if (p->next == bracket + num) {

         p->next = oldnext;

         p->end = oldend;

     }

 }


 /*

  - nonnewline - emit RZ_REGEX_NEWLINE version of OANY

  *

  * Boy, is this implementation ever a kludge...

  */

 static void

 nonnewline(struct parse *p) {

     char *oldnext = p->next;

     char *oldend = p->end;

     char bracket[4];


     p->next = bracket;

     p->end = bracket + 3;

     bracket[0] = '^';

     bracket[1] = '\n';

     bracket[2] = ']';

     bracket[3] = '\0';

     p_bracket(p);

     if (p->next == bracket + 3) {

         p->next = oldnext;

         p->end = oldend;

     }

 }


 /*

  - repeat - generate code for a bounded repetition, recursively if needed

  */

 static void

 repeat(struct parse *p,

     sopno start, /* operand from here to end of strip */

     int from, /* repeated from this number */

     int to) /* to this number of times (maybe INTFINITY) */

 {

     sopno finish = HERE();

 #define N         2

 #define INF       3

 #define REP(f, t) ((f)*8 + (t))

 #define MAP(n)    (((n) <= 1) ? (n) : ((n) == INTFINITY) ? INF \

                              : N)

     sopno copy;


     if (p->error != 0) { /* head off possible runaway recursion */

         return;

     }


     if (from > to) {

         return;

     }


     switch (REP(MAP(from), MAP(to))) {

     case REP(0, 0): /* must be user doing this */

         DROP(finish - start); /* drop the operand */

         break;

     case REP(0, 1): /* as x{1,1}? */

     case REP(0, N): /* as x{1,n}? */

     case REP(0, INF): /* as x{1,}? */

         /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */

         INSERT(OCH_, start); /* offset is wrong... */

         repeat(p, start + 1, 1, to);

         ASTERN(OOR1, start);

         AHEAD(start); /* ... fix it */

         EMIT(OOR2, 0);

         AHEAD(THERE());

         ASTERN(O_CH, THERETHERE());

         break;

     case REP(1, 1): /* trivial case */

         /* done */

         break;

     case REP(1, N): /* as x?x{1,n-1} */

         /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */

         INSERT(OCH_, start);

         ASTERN(OOR1, start);

         AHEAD(start);

         EMIT(OOR2, 0); /* offset very wrong... */

         AHEAD(THERE()); /* ...so fix it */

         ASTERN(O_CH, THERETHERE());

         copy = dupl(p, start + 1, finish + 1);

         if (copy == finish + 4) {

             repeat(p, copy, 1, to - 1);

         }

         break;

     case REP(1, INF): /* as x+ */

         INSERT(OPLUS_, start);

         ASTERN(O_PLUS, start);

         break;

     case REP(N, N): /* as xx{m-1,n-1} */

         copy = dupl(p, start, finish);

         repeat(p, copy, from - 1, to - 1);

         break;

     case REP(N, INF): /* as xx{n-1,INF} */

         copy = dupl(p, start, finish);

         repeat(p, copy, from - 1, to);

         break;

     default: /* "can't happen" */

         SETERROR(RZ_REGEX_ASSERT); /* just in case */

         break;

     }

 }


 /*

  - seterr - set an error condition

  */

 static int /* useless but makes type checking happy */

 seterr(struct parse *p, int e) {

     if (p->error == 0) { /* keep earliest error condition */

         p->error = e;

     }

     p->next = nuls; /* try to bring things to a halt */

     p->end = nuls;

     return (0); /* make the return value well-defined */

 }


 /*

  - allocset - allocate a set of characters for []

  */

 static cset *allocset(struct parse *p) {

     int no = p->g->ncsets++;

     size_t nc;

     size_t nbytes;

     cset *cs;

     size_t css = (size_t)p->g->csetsize;

     int i;


     if (no >= p->ncsalloc) { /* need another column of space */

         void *ptr;


         p->ncsalloc += CHAR_BIT;

         nc = p->ncsalloc;

         if (nc % CHAR_BIT) {

             goto nomem;

         }

         nbytes = nc / CHAR_BIT * css;


         ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));

         if (!ptr) {

             goto nomem;

         }

         p->g->sets = ptr;


         ptr = (ut8 *)realloc((char *)p->g->setbits, nbytes);

         if (!ptr) {

             goto nomem;

         }

         p->g->setbits = ptr;


         for (i = 0; i < no; i++) {

             p->g->sets[i].ptr = p->g->setbits + css * (i / CHAR_BIT);

         }


         (void)memset((char *)p->g->setbits + (nbytes - css), 0, css);

     }

     /* XXX should not happen */

     if (!p->g->sets || !p->g->setbits) {

         goto nomem;

     }


     cs = &p->g->sets[no];

     cs->ptr = p->g->setbits + css * ((no) / CHAR_BIT);

     cs->mask = 1 << ((no) % CHAR_BIT);

     cs->hash = 0;

     cs->smultis = 0;

     cs->multis = NULL;


     return (cs);

 nomem:

     RZ_FREE(p->g->sets);

     RZ_FREE(p->g->setbits);


     SETERROR(RZ_REGEX_ESPACE);

     /* caller's responsibility not to do set ops */

     return (NULL);

 }


 /*

  - freeset - free a now-unused set

  */

 static void freeset(struct parse *p, cset *cs) {

     int i;

     cset *top = &p->g->sets[p->g->ncsets];

     size_t css = (size_t)p->g->csetsize;


     for (i = 0; i < css; i++) {

         CHsub(cs, i);

     }

     if (cs == top - 1) { /* recover only the easy case */

         p->g->ncsets--;

     }

 }


 /*

  - freezeset - final processing on a set of characters

  *

  * The main task here is merging identical sets.  This is usually a waste

  * of time (although the hash code minimizes the overhead), but can win

  * big if RZ_REGEX_ICASE is being used.  RZ_REGEX_ICASE, by the way, is why the hash

  * is done using addition rather than xor -- all ASCII [aA] sets xor to

  * the same value!

  */

 static int /* set number */

 freezeset(struct parse *p, cset *cs) {

     ut8 h = cs->hash;

     int i;

     cset *top = &p->g->sets[p->g->ncsets];

     cset *cs2;

     size_t css = (size_t)p->g->csetsize;


     /* look for an earlier one which is the same */

     for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) {

         if (cs2->hash == h && cs2 != cs) {

             /* maybe */

             for (i = 0; i < css; i++) {

                 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) {

                     break; /* no */

                 }

             }

             if (i == css) {

                 break; /* yes */

             }

         }

     }


     if (cs2 < top) { /* found one */

         freeset(p, cs);

         cs = cs2;

     }


     return ((int)(cs - p->g->sets));

 }


 /*

  - firstch - return first character in a set (which must have at least one)

  */

 static int /* character; there is no "none" value */

 firstch(struct parse *p, cset *cs) {

     int i;

     size_t css = (size_t)p->g->csetsize;


     for (i = 0; i < css; i++) {

         if (CHIN(cs, i)) {

             return ((char)i);

         }

     }

     return (0); /* arbitrary */

 }


 /*

  - nch - number of characters in a set

  */

 static int nch(struct parse *p, cset *cs) {

     int i;

     size_t css = (size_t)p->g->csetsize;

     int n = 0;


     for (i = 0; i < css; i++) {

         if (CHIN(cs, i)) {

             n++;

         }

     }

     return (n);

 }


 /*

  - mcadd - add a collating element to a cset

  */

 static void mcadd(struct parse *p, cset *cs, char *cp) {

     size_t oldend = cs->smultis;

     void *np;


     cs->smultis += strlen(cp) + 1;

     np = realloc(cs->multis, cs->smultis);

     if (!np) {

         if (cs->multis) {

             free(cs->multis);

         }

         cs->multis = NULL;

         SETERROR(RZ_REGEX_ESPACE);

         return;

     }

     cs->multis = np;


     STRLCPY(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);

 }


 /*

  - mcinvert - invert the list of collating elements in a cset

  *

  * This would have to know the set of possibilities.  Implementation

  * is deferred.

  */

 /* ARGSUSED */

 static void mcinvert(struct parse *p, cset *cs) {

     // asert(!cs->multis);  /* xxx */

     return;

 }


 /*

  - mccase - add case counterparts of the list of collating elements in a cset

  *

  * This would have to know the set of possibilities.  Implementation

  * is deferred.

  */

 /* ARGSUSED */

 static void mccase(struct parse *p, cset *cs) {

     // asert(!cs->multis);  /* xxx */

     return;

 }


 /*

  - isinsets - is this character in any sets?

  */

 static int /* predicate */

 isinsets(struct re_guts *g, int c) {

     ut8 *col;

     int i;

     int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;

     unsigned uc = (ut8)c;


     for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) {

         if (col[uc] != 0) {

             return (1);

         }

     }

     return (0);

 }


 /*

  - samesets - are these two characters in exactly the same sets?

  */

 static int /* predicate */

 samesets(struct re_guts *g, int c1, int c2) {

     ut8 *col;

     int i;

     int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;

     unsigned uc1 = (ut8)c1;

     unsigned uc2 = (ut8)c2;


     for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) {

         if (col[uc1] != col[uc2]) {

             return (0);

         }

     }

     return (1);

 }


 /*

  - categorize - sort out character categories

  */

 static void

 categorize(struct parse *p, struct re_guts *g) {

     cat_t *cats = g ? g->categories : NULL;

     int c;

     int c2;

     cat_t cat;


     /* avoid making error situations worse */

     if (!p || p->error != 0 || !cats) {

         return;

     }


     for (c = CHAR_MIN; c <= CHAR_MAX; c++) {

         if (*(cats + c) && isinsets(g, c)) {

             cat = g->ncategories++;

             cats[c] = cat;

             for (c2 = c + 1; c2 <= CHAR_MAX; c2++) {

                 if (cats[c2] == 0 && samesets(g, c, c2)) {

                     cats[c2] = cat;

                 }

             }

         }

     }

 }


 /*

  - dupl - emit a duplicate of a bunch of sops

  */

 static sopno /* start of duplicate */

 dupl(struct parse *p,

     sopno start, /* from here */

     sopno finish) /* to this less one */

 {

     sopno ret = HERE();

     sopno len = finish - start;


     if (finish >= start) {

         if (len == 0) {

             return (ret);

         }

         enlarge(p, p->ssize + len); /* this many unexpected additions */

         if (p->ssize >= p->slen + len) {

             (void)memcpy((char *)(p->strip + p->slen),

                 (char *)(p->strip + start), (size_t)len * sizeof(sop));

             p->slen += len;

             return (ret);

         }

     }

     return ret;

 }


 /*

  - doemit - emit a strip operator

  *

  * It might seem better to implement this as a macro with a function as

  * hard-case backup, but it's just too big and messy unless there are

  * some changes to the data structures.  Maybe later.

  */

 static void

 doemit(struct parse *p, sop op, size_t opnd) {

     /* avoid making error situations worse */

     if (p->error != 0) {

         return;

     }


     /* deal with oversize operands ("can't happen", more or less) */

     if (opnd < 1 << OPSHIFT) {


         /* deal with undersized strip */

         if (p->slen >= p->ssize) {

             enlarge(p, (p->ssize + 1) / 2 * 3); /* +50% */

         }

         if (p->slen < p->ssize) {

             /* finally, it's all reduced to the easy case */

             p->strip[p->slen++] = SOP(op, opnd);

         }

     }

 }


 /*

  - doinsert - insert a sop into the strip

  */

 static void

 doinsert(struct parse *p, sop op, size_t opnd, sopno pos) {

     sopno sn;

     sop s;

     int i;


     /* avoid making error situations worse */

     if (p->error != 0) {

         return;

     }


     sn = HERE();

     EMIT(op, opnd); /* do checks, ensure space */

     if (HERE() != sn + 1) {

         return;

     }

     s = p->strip[sn];


     /* adjust paren pointers */

     if (pos > 0) {

         for (i = 1; i < NPAREN; i++) {

             if (p->pbegin[i] >= pos) {

                 p->pbegin[i]++;

             }

             if (p->pend[i] >= pos) {

                 p->pend[i]++;

             }

         }

     }


     memmove((char *)&p->strip[pos + 1], (char *)&p->strip[pos],

         (HERE() - pos - 1) * sizeof(sop));

     p->strip[pos] = s;

 }


 /*

  - dofwd - complete a forward reference

  */

 static void

 dofwd(struct parse *p, sopno pos, sop value) {

     /* avoid making error situations worse */

     if (p->error != 0) {

         return;

     }


     if (value < 1 << OPSHIFT) {

         p->strip[pos] = OP(p->strip[pos]) | value;

     }

 }


 /*

  - enlarge - enlarge the strip

  */

 static void

 enlarge(struct parse *p, sopno size) {

     sop *sp;


     if (p->ssize >= size) {

         return;

     }


     sp = (sop *)realloc(p->strip, size * sizeof(sop));

     if (!sp) {

         SETERROR(RZ_REGEX_ESPACE);

         return;

     }

     p->strip = sp;

     p->ssize = size;

 }


 /*

  - stripsnug - compact the strip

  */

 static void

 stripsnug(struct parse *p, struct re_guts *g) {

     g->nstates = p->slen;

     g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));

     if (!g->strip) {

         SETERROR(RZ_REGEX_ESPACE);

         g->strip = p->strip;

     }

 }


 /*

  - findmust - fill in must and mlen with longest mandatory literal string

  *

  * This algorithm could do fancy things like analyzing the operands of |

  * for common subsequences.  Someday.  This code is simple and finds most

  * of the interesting cases.

  *

  * Note that must and mlen got initialized during setup.

  */

 static void

 findmust(struct parse *p, struct re_guts *g) {

     sop *scan;

     sop *start = NULL; /* start initialized in the default case, after that */

     sop *newstart = NULL; /* newstart was initialized in the OCHAR case */

     sopno newlen;

     sop s;

     char *cp;

     sopno i;


     /* avoid making error situations worse */

     if (p->error != 0) {

         return;

     }


     /* find the longest OCHAR sequence in strip */

     newlen = 0;

     start = scan = g->strip + 1;

     do {

         s = *scan++;

         switch (OP(s)) {

         case OCHAR: /* sequence member */

             if (newlen == 0) { /* new sequence */

                 newstart = scan - 1;

             }

             newlen++;

             break;

         case OPLUS_: /* things that don't break one */

         case OLPAREN:

         case ORPAREN:

             break;

         case OQUEST_: /* things that must be skipped */

         case OCH_:

             scan--;

             do {

                 scan += OPND(s);

                 s = *scan;

                 /* asert() interferes w debug printouts */

                 if (OP(s) != O_QUEST && OP(s) != O_CH &&

                     OP(s) != OOR2) {

                     g->iflags |= BAD;

                     return;

                 }

             } while (OP(s) != O_QUEST && OP(s) != O_CH);

             /* fallthrough */

         default: /* things that break a sequence */

             if (newlen > g->mlen) { /* ends one */

                 start = newstart;

                 g->mlen = newlen;

             }

             newlen = 0;

             break;

         }

     } while (OP(s) != OEND);


     if (g->mlen == 0) { /* there isn't one */

         return;

     }


     /* turn it into a character string */

     g->must = malloc((size_t)g->mlen + 1);

     if (!g->must) { /* argh; just forget it */

         g->mlen = 0;

         return;

     }

     cp = g->must;

     scan = start;

     for (i = g->mlen; i > 0; i--) {

         while (OP(s = *scan++) != OCHAR) {

             continue;

         }

         if (cp < g->must + g->mlen) {

             *cp++ = (char)OPND(s);

         }

     }

     if (cp == g->must + g->mlen) {

         *cp++ = '\0'; /* just on general principles */

     }

 }


 /*

  - pluscount - count + nesting

  */

 static sopno /* nesting depth */

 pluscount(struct parse *p, struct re_guts *g) {

     sop *scan;

     sop s;

     sopno plusnest = 0;

     sopno maxnest = 0;


     if (p->error != 0) {

         return (0); /* there may not be an OEND */

     }


     scan = g->strip + 1;

     do {

         s = *scan++;

         switch (OP(s)) {

         case OPLUS_:

             plusnest++;

             break;

         case O_PLUS:

             if (plusnest > maxnest) {

                 maxnest = plusnest;

             }

             plusnest--;

             break;

         }

     } while (OP(s) != OEND);

     if (plusnest != 0) {

         g->iflags |= BAD;

     }

     return (maxnest);

 }

len
size_t len
Definition: 6502dis.c:15

OPND
#define OPND(x)
Definition: aarch64-tbl.h:33

invert
static unsigned invert(unsigned x)
Definition: aesdata.c:73

e
#define e(frag)
Definition: analysis_8051.c:218

finish
static bool finish(void *user)
Definition: analysis_pyc.c:133

i
lzma_index ** i
Definition: index.h:629

cset
static RzILOpEffect * cset(cs_insn *insn)
Definition: arm_il64.c:899

c1
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c1
Definition: basic-arm-instructions.s.cs:286

c2
lsl lsr asr ror lsl lsr asr ror lsl lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror lsl lsr asr ror c2
Definition: basic-arm-instructions.s.cs:285

CHAR_BIT
#define CHAR_BIT
Definition: readbits.h:99

cclass.h

cclasses
static struct cclass cclasses[]

value
static int value
Definition: cmd_api.c:93

cname.h

cnames
static struct cname cnames[]

RZ_API
#define RZ_API
Definition: core_plugin_example.c:36

NULL
#define NULL
Definition: cris-opc.c:27

r
#define r
Definition: crypto_rc6.c:12

nbytes
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void nbytes
Definition: sflib.h:113

count
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98

start
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
Definition: sflib.h:133

ut8
#define ut8
Definition: dcpu16.h:8

g
struct @667 g

OP
#define OP(v, w, x, y, z)

match
unsigned char match[65280+2]
Definition: gun.c:165

free
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130

size
voidpf void uLong size
Definition: ioapi.h:138

ut8
uint8_t ut8
Definition: lh5801.h:11

memset
return memset(p, 0, total)

p
void * p
Definition: libc.cpp:67

memcpy
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))

list
static void list(RzEgg *egg)
Definition: rz-gg.c:52

rz_list_newf
RZ_API RZ_OWN RzList * rz_list_newf(RzListFree f)
Returns a new initialized RzList pointer and sets the free method.
Definition: list.c:248

rz_list_append
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288

NC
#define NC
Definition: utils.h:42

STRLCPY
#define STRLCPY(x, y, z)
Definition: utils.h:44

DUPMAX
#define DUPMAX
Definition: utils.h:39

INTFINITY
#define INTFINITY
Definition: utils.h:41

realloc
void * realloc(void *ptr, size_t size)
Definition: malloc.c:144

malloc
void * malloc(size_t size)
Definition: malloc.c:123

calloc
void * calloc(size_t number, size_t size)
Definition: malloc.c:102

num
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode static oldfd struct tms static buf static getgid static geteuid const char static filename static arg static mask struct ustat static ubuf static getppid static setsid static egid sigset_t static set struct timeval struct timezone static tz fd_set fd_set fd_set struct timeval static timeout const char char static bufsiz const char static swapflags void static offset const char static length static mode static who const char struct statfs static buf unsigned unsigned num
Definition: sflib.h:126

n
int n
Definition: mipsasm.c:19

create_tags_rz.text
string text
Definition: create_tags_rz.py:13

test_evm.cs
cs
Definition: test_evm.py:8

mccase
static void mccase(struct parse *, cset *)
Definition: regcomp.c:1429

MAP
#define MAP(n)

N
#define N

samesets
static int samesets(struct re_guts *, int, int)
Definition: regcomp.c:1456

stripsnug
static void stripsnug(struct parse *, struct re_guts *)
Definition: regcomp.c:1630

NEXT2
#define NEXT2()
Definition: regcomp.c:125

HERE
#define HERE()
Definition: regcomp.c:137

rz_regex_flags
RZ_API int rz_regex_flags(const char *f)
Definition: regcomp.c:197

INF
#define INF

p_b_coll_elem
static char p_b_coll_elem(struct parse *, int)
Definition: regcomp.c:988

SEETWO
#define SEETWO(a, b)
Definition: regcomp.c:121

mcadd
static void mcadd(struct parse *, cset *, char *)
Definition: regcomp.c:1391

GETNEXT
#define GETNEXT()
Definition: regcomp.c:127

p_ere_exp
static void p_ere_exp(struct parse *)
Definition: regcomp.c:422

dupl
static sopno dupl(struct parse *, sopno, sopno)
Definition: regcomp.c:1503

DROP
#define DROP(n)
Definition: regcomp.c:140

othercase
static char othercase(int)
Definition: regcomp.c:1019

p_b_cclass
static void p_b_cclass(struct parse *, cset *)
Definition: regcomp.c:923

doemit
static void doemit(struct parse *, sop, size_t)
Definition: regcomp.c:1533

NPAREN
#define NPAREN
Definition: regcomp.c:66

rz_regex_new
RZ_API RzRegex * rz_regex_new(const char *pattern, const char *flags)
Definition: regcomp.c:183

nonnewline
static void nonnewline(struct parse *)
Definition: regcomp.c:1133

p_b_eclass
static void p_b_eclass(struct parse *, cset *)
Definition: regcomp.c:959

freezeset
static int freezeset(struct parse *, cset *)
Definition: regcomp.c:1326

rz_regex_free
RZ_API void rz_regex_free(RzRegex *preg)
Definition: regcomp.c:249

BACKSL
#define BACKSL

ASTERN
#define ASTERN(sop, pos)
Definition: regcomp.c:136

AHEAD
#define AHEAD(pos)
Definition: regcomp.c:135

ordinary
static void ordinary(struct parse *, int)
Definition: regcomp.c:1062

MUSTEAT
#define MUSTEAT(c, e)
Definition: regcomp.c:131

SETERROR
#define SETERROR(e)
Definition: regcomp.c:128

PEEK2
#define PEEK2()
Definition: regcomp.c:117

categorize
static void categorize(struct parse *, struct re_guts *)
Definition: regcomp.c:1475

p_str
static void p_str(struct parse *)
Definition: regcomp.c:573

GOODFLAGS
#define GOODFLAGS(f)

bothcases
static void bothcases(struct parse *, int)
Definition: regcomp.c:1038

INSERT
#define INSERT(op, pos)
Definition: regcomp.c:134

nuls
static char nuls[10]
Definition: regcomp.c:110

repeat
static void repeat(struct parse *, sopno, int, int)
Definition: regcomp.c:1155

p_b_term
static void p_b_term(struct parse *, cset *)
Definition: regcomp.c:859

isinsets
static int isinsets(struct re_guts *, int)
Definition: regcomp.c:1438

NEXTn
#define NEXTn(n)
Definition: regcomp.c:126

PEEK
#define PEEK()
Definition: regcomp.c:116

SEE
#define SEE(c)
Definition: regcomp.c:120

rz_regex_get_match_list
RZ_API RzList * rz_regex_get_match_list(const char *pattern, const char *flags, const char *text)
Definition: regcomp.c:155

p_bre
static void p_bre(struct parse *, int, int)
Definition: regcomp.c:590

THERE
#define THERE()
Definition: regcomp.c:138

nch
static int nch(struct parse *, cset *)
Definition: regcomp.c:1375

doinsert
static void doinsert(struct parse *, sop, size_t, sopno)
Definition: regcomp.c:1557

pluscount
static sopno pluscount(struct parse *, struct re_guts *)
Definition: regcomp.c:1732

p_ere
static void p_ere(struct parse *, int)
Definition: regcomp.c:381

rz_regex_comp
RZ_API int rz_regex_comp(RzRegex *preg, const char *pattern, int cflags)
Definition: regcomp.c:258

p_b_symbol
static char p_b_symbol(struct parse *)
Definition: regcomp.c:970

MORE
#define MORE()
Definition: regcomp.c:118

findmust
static void findmust(struct parse *, struct re_guts *)
Definition: regcomp.c:1649

REP
#define REP(f, t)

allocset
static cset * allocset(struct parse *)
Definition: regcomp.c:1242

rz_regex_match
RZ_API int rz_regex_match(const char *pattern, const char *flags, const char *text)
Definition: regcomp.c:142

p_bracket
static void p_bracket(struct parse *)
Definition: regcomp.c:768

dofwd
static void dofwd(struct parse *, sopno, sop)
Definition: regcomp.c:1595

p_count
static int p_count(struct parse *)
Definition: regcomp.c:749

enlarge
static void enlarge(struct parse *, sopno)
Definition: regcomp.c:1610

rz_regex_fini
RZ_API void rz_regex_fini(RzRegex *preg)
Definition: regcomp.c:226

MORE2
#define MORE2()
Definition: regcomp.c:119

REQUIRE
#define REQUIRE(co, e)
Definition: regcomp.c:129

EAT
#define EAT(c)
Definition: regcomp.c:122

EMIT
#define EMIT(op, sopnd)
Definition: regcomp.c:133

THERETHERE
#define THERETHERE()
Definition: regcomp.c:139

p_simp_re
static int p_simp_re(struct parse *, int)
Definition: regcomp.c:621

freeset
static void freeset(struct parse *, cset *)
Definition: regcomp.c:1303

mcinvert
static void mcinvert(struct parse *, cset *)
Definition: regcomp.c:1417

seterr
static int seterr(struct parse *, int)
Definition: regcomp.c:1230

NEXT
#define NEXT()
Definition: regcomp.c:124

EATTWO
#define EATTWO(a, b)
Definition: regcomp.c:123

special
static void special(struct parse *, int)
Definition: regcomp.c:1076

firstch
static int firstch(struct parse *, cset *)
Definition: regcomp.c:1360

regex2.h

sop
unsigned long sop
Definition: regex2.h:62

CHsub
#define CHsub(cs, c)
Definition: regex2.h:114

sopno
long sopno
Definition: regex2.h:63

O_CH
#define O_CH
Definition: regex2.h:89

OBOL
#define OBOL
Definition: regex2.h:74

OCH_
#define OCH_
Definition: regex2.h:86

OOR2
#define OOR2
Definition: regex2.h:88

OEND
#define OEND
Definition: regex2.h:72

OCHAR
#define OCHAR
Definition: regex2.h:73

OQUEST_
#define OQUEST_
Definition: regex2.h:82

OEOL
#define OEOL
Definition: regex2.h:75

OLPAREN
#define OLPAREN
Definition: regex2.h:84

OBOW
#define OBOW
Definition: regex2.h:90

CHadd
#define CHadd(cs, c)
Definition: regex2.h:113

OPLUS_
#define OPLUS_
Definition: regex2.h:80

USEEOL
#define USEEOL
Definition: regex2.h:140

USEBOL
#define USEBOL
Definition: regex2.h:139

O_QUEST
#define O_QUEST
Definition: regex2.h:83

CHIN
#define CHIN(cs, c)
Definition: regex2.h:115

OANYOF
#define OANYOF
Definition: regex2.h:77

O_PLUS
#define O_PLUS
Definition: regex2.h:81

SOP
#define SOP(op, opnd)
Definition: regex2.h:69

O_BACK
#define O_BACK
Definition: regex2.h:79

MCadd
#define MCadd(p, cs, cp)
Definition: regex2.h:116

OBACK_
#define OBACK_
Definition: regex2.h:78

OPSHIFT
#define OPSHIFT
Definition: regex2.h:66

OEOW
#define OEOW
Definition: regex2.h:91

MAGIC1
#define MAGIC1
Definition: regex2.h:41

ORPAREN
#define ORPAREN
Definition: regex2.h:85

MAGIC2
#define MAGIC2
Definition: regex2.h:128

OUT
#define OUT
Definition: regex2.h:157

cat_t
unsigned char cat_t
Definition: regex2.h:121

OOR1
#define OOR1
Definition: regex2.h:87

OANY
#define OANY
Definition: regex2.h:76

BAD
#define BAD
Definition: regex2.h:141

eprintf
#define eprintf(x, y...)
Definition: rlcc.c:7

s
static RzSocket * s
Definition: rtr.c:28

rz_assert.h

rz_return_val_if_fail
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108

rz_regex.h

RZ_REGEX_PEND
#define RZ_REGEX_PEND
Definition: rz_regex.h:28

RZ_REGEX_ESPACE
#define RZ_REGEX_ESPACE
Definition: rz_regex.h:44

rz_regex_exec
RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch, RzRegexMatch __pmatch[], int eflags)
Definition: regexec.c:149

RZ_REGEX_STARTEND
#define RZ_REGEX_STARTEND
Definition: rz_regex.h:56

RZ_REGEX_ICASE
#define RZ_REGEX_ICASE
Definition: rz_regex.h:24

RZ_REGEX_NEWLINE
#define RZ_REGEX_NEWLINE
Definition: rz_regex.h:26

RZ_REGEX_BADBR
#define RZ_REGEX_BADBR
Definition: rz_regex.h:42

RZ_REGEX_EXTENDED
#define RZ_REGEX_EXTENDED
Definition: rz_regex.h:23

RZ_REGEX_ECTYPE
#define RZ_REGEX_ECTYPE
Definition: rz_regex.h:36

RZ_REGEX_EBRACK
#define RZ_REGEX_EBRACK
Definition: rz_regex.h:39

RZ_REGEX_NOSPEC
#define RZ_REGEX_NOSPEC
Definition: rz_regex.h:27

RZ_REGEX_ECOLLATE
#define RZ_REGEX_ECOLLATE
Definition: rz_regex.h:35

RZ_REGEX_ERANGE
#define RZ_REGEX_ERANGE
Definition: rz_regex.h:43

RZ_REGEX_ESUBREG
#define RZ_REGEX_ESUBREG
Definition: rz_regex.h:38

RZ_REGEX_EBRACE
#define RZ_REGEX_EBRACE
Definition: rz_regex.h:41

RZ_REGEX_EMPTY
#define RZ_REGEX_EMPTY
Definition: rz_regex.h:46

RZ_REGEX_NOSUB
#define RZ_REGEX_NOSUB
Definition: rz_regex.h:25

RZ_REGEX_BADRPT
#define RZ_REGEX_BADRPT
Definition: rz_regex.h:45

RZ_REGEX_DUMP
#define RZ_REGEX_DUMP
Definition: rz_regex.h:29

RZ_REGEX_INVARG
#define RZ_REGEX_INVARG
Definition: rz_regex.h:48

RZ_REGEX_EPAREN
#define RZ_REGEX_EPAREN
Definition: rz_regex.h:40

RZ_REGEX_EESCAPE
#define RZ_REGEX_EESCAPE
Definition: rz_regex.h:37

RZ_REGEX_ASSERT
#define RZ_REGEX_ASSERT
Definition: rz_regex.h:47

rz_str.h

rz_str_ncpy
RZ_API size_t rz_str_ncpy(char *dst, const char *src, size_t n)
Secure string copy with null terminator.
Definition: str.c:923

RZ_NEW
#define RZ_NEW(x)
Definition: rz_types.h:285

RZ_NEWS0
#define RZ_NEWS0(x, y)
Definition: rz_types.h:282

RZ_FREE
#define RZ_FREE(x)
Definition: rz_types.h:369

islower
#define islower(c)
Definition: safe-ctype.h:135

tolower
#define tolower(c)
Definition: safe-ctype.h:149

isalpha
#define isalpha(c)
Definition: safe-ctype.h:125

isdigit
#define isdigit(c)
Definition: safe-ctype.h:131

isupper
#define isupper(c)
Definition: safe-ctype.h:143

toupper
#define toupper(c)
Definition: safe-ctype.h:147

from
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123

to
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125

flags
static struct sockaddr static addrlen static backlog const void static flags void flags
Definition: sfsocketcall.h:123

size_t
int size_t
Definition: sftypes.h:40

f
#define f(i)
Definition: sha256.c:46

c
#define c(i)
Definition: sha256.c:43

h
#define h(i)
Definition: sha256.c:48

cclass
Definition: cclass.h:39

cclass::multis
char * multis
Definition: cclass.h:42

cclass::name
char * name
Definition: cclass.h:40

cclass::chars
char * chars
Definition: cclass.h:41

cname
Definition: cname.h:39

cname::code
char code
Definition: cname.h:41

cname::name
char * name
Definition: cname.h:40

cset
Definition: regex2.h:105

cset::hash
ut8 hash
Definition: regex2.h:108

entry
Definition: zipcmp.c:77

match
Definition: engine.c:71

parse
Definition: regcomp.c:57

parse::g
struct re_guts * g
Definition: regcomp.c:65

parse::pend
sopno pend[NPAREN]
Definition: regcomp.c:68

parse::next
char * next
Definition: regcomp.c:58

parse::slen
sopno slen
Definition: regcomp.c:63

parse::ssize
sopno ssize
Definition: regcomp.c:62

parse::error
int error
Definition: regcomp.c:60

parse::pbegin
sopno pbegin[NPAREN]
Definition: regcomp.c:67

parse::strip
sop * strip
Definition: regcomp.c:61

parse::ncsalloc
int ncsalloc
Definition: regcomp.c:64

parse::end
char * end
Definition: regcomp.c:59

re_guts
Definition: regex2.h:126

re_guts::cflags
int cflags
Definition: regex2.h:134

rz_list_t
Definition: rz_list.h:18

rz_regex_t
Definition: rz_regex.h:8

rz_regex_t::re_endp
const char * re_endp
Definition: rz_regex.h:11

rz_regex_t::re_flags
int re_flags
Definition: rz_regex.h:13

rz_regex_t::re_nsub
size_t re_nsub
Definition: rz_regex.h:10

rz_regex_t::re_magic
int re_magic
Definition: rz_regex.h:9

rz_regex_t::re_g
struct re_guts * re_g
Definition: rz_regex.h:12

rz_regmatch_t
Definition: rz_regex.h:16

pos
int pos
Definition: main.c:11

maxlen
ut64 maxlen
Definition: core.c:76

utils.h

op
Definition: dis.c:32

if
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4

sp
static int sp
Definition: z80asm.c:91

cat
static int cat(char *argv[])
Definition: ziptool.c:170