Rizin
unix-like reverse engineering framework and cli tools
regexec.c
Go to the documentation of this file.
1 /* $OpenBSD: regexec.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4  * Copyright (c) 1992, 1993, 1994
5  * The Regents of the University of California. All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Henry Spencer.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  * may be used to endorse or promote products derived from this software
20  * without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * @(#)regexec.c 8.3 (Berkeley) 3/20/94
35  */
36 
37 /*
38  * the outer shell of regexec()
39  *
40  * This file includes engine.c *twice*, after muchos fiddling with the
41  * macros that code uses. This lets the same code operate on two different
42  * representations for state sets.
43  */
44 #include <sys/types.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <limits.h>
49 #include <ctype.h>
50 #include <rz_regex.h>
51 
52 #include "utils.h"
53 #include "regex2.h"
54 
55 /* macros for manipulating states, small version */
56 #define states1 long long /* for later use in regexec() decision */
57 #define states states1
58 #define CLEAR(v) ((v) = 0)
59 #define SET0(v, n) ((v) &= ~((unsigned states)1 << (n)))
60 #define SET1(v, n) ((v) |= (unsigned states)1 << (n))
61 #define ISSET(v, n) (((v) & ((unsigned states)1 << (n))) != 0)
62 #define ASSIGN(d, s) ((d) = (s))
63 #define EQ(a, b) ((a) == (b))
64 #define STATEVARS states dummy /* dummy version */
65 #define STATESETUP(m, n) /* nothing */
66 #define STATETEARDOWN(m) /* nothing */
67 #define SETUP(v) ((v) = 0)
68 #define onestate states
69 #define INIT(o, n) ((o) = (unsigned states)1 << (n))
70 #define INC(o) ((o) <<= 1)
71 #define ISSTATEIN(v, o) (((v) & (o)) != 0)
72 /* some abbreviations; note that some of these know variable names! */
73 /* do "if I'm here, I can also be there" etc without branches */
74 #define FWD(dst, src, n) ((dst) |= ((unsigned states)(src) & (here)) << (n))
75 #define BACK(dst, src, n) ((dst) |= ((unsigned states)(src) & (here)) >> (n))
76 #define ISSETBACK(v, n) (((v) & ((unsigned states)here >> (n))) != 0)
77 /* function names */
78 #define SNAMES /* engine.c looks after details */
79 
80 #include "engine.c"
81 
82 /* now undo things */
83 #undef states
84 #undef CLEAR
85 #undef SET0
86 #undef SET1
87 #undef ISSET
88 #undef ASSIGN
89 #undef EQ
90 #undef STATEVARS
91 #undef STATESETUP
92 #undef STATETEARDOWN
93 #undef SETUP
94 #undef onestate
95 #undef INIT
96 #undef INC
97 #undef ISSTATEIN
98 #undef FWD
99 #undef BACK
100 #undef ISSETBACK
101 #undef SNAMES
102 
103 /* macros for manipulating states, large version */
104 #define states char *
105 #define CLEAR(v) memset(v, 0, m->g->nstates)
106 #define SET0(v, n) ((v)[n] = 0)
107 #define SET1(v, n) ((v)[n] = 1)
108 #define ISSET(v, n) ((v)[n])
109 #define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
110 #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
111 #define STATEVARS \
112  states1 vn; \
113  char *space
114 #define STATESETUP(m, nv) \
115  { \
116  (m)->space = malloc((nv) * (m)->g->nstates); \
117  if (!(m)->space) \
118  return RZ_REGEX_ESPACE; \
119  (m)->vn = 0; \
120  }
121 #define STATETEARDOWN(m) \
122  { free((m)->space); }
123 #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
124 #define onestate states1
125 #define INIT(o, n) ((o) = (n))
126 #define INC(o) ((o)++)
127 #define ISSTATEIN(v, o) ((v)[o])
128 /* some abbreviations; note that some of these know variable names! */
129 /* do "if I'm here, I can also be there" etc without branches */
130 #define FWD(dst, src, n) ((dst)[here + (n)] |= (src)[here])
131 #define BACK(dst, src, n) ((dst)[here - (n)] |= (src)[here])
132 #define ISSETBACK(v, n) ((v)[here - (n)])
133 /* function names */
134 #define LNAMES /* flag */
135 
136 #include "engine.c"
137 
138 RZ_API bool rz_regex_check(const RzRegex *rr, const char *str) {
139  return rz_regex_exec(rr, str, 0, NULL, rr->re_flags);
140 }
141 /*
142  - regexec - interface for matching
143  *
144  * We put this here so we can exploit knowledge of the state representation
145  * when choosing which matcher to call. Also, by this point the matchers
146  * have been prototyped.
147  */
148 /* 0 success, RZ_REGEX_NOMATCH failure */
149 RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch,
150  RzRegexMatch pmatch[], int eflags) {
151  struct re_guts *g;
152 #ifdef REDEBUG
153 #define GOODFLAGS(f) (f)
154 #else
155 #define GOODFLAGS(f) ((f) & (RZ_REGEX_NOTBOL | RZ_REGEX_NOTEOL | RZ_REGEX_STARTEND | RZ_REGEX_LARGE))
156 #endif
157  if (!preg || !string) {
158  return RZ_REGEX_ASSERT;
159  }
160 
161  g = preg->re_g;
162  if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) {
163  return (RZ_REGEX_BADPAT);
164  }
165  if (g->iflags & BAD) { /* backstop for no-debug case */
166  return (RZ_REGEX_BADPAT);
167  }
168  eflags = GOODFLAGS(eflags);
169  if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & RZ_REGEX_LARGE)) {
170  return (smatcher(g, (char *)string, nmatch, pmatch, eflags));
171  } else {
172  return (lmatcher(g, (char *)string, nmatch, pmatch, eflags));
173  }
174 }
#define CHAR_BIT
Definition: readbits.h:99
#define RZ_API
#define NULL
Definition: cris-opc.c:27
struct @667 g
#define MAGIC1
Definition: regex2.h:41
#define MAGIC2
Definition: regex2.h:128
#define BAD
Definition: regex2.h:141
RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch, RzRegexMatch pmatch[], int eflags)
Definition: regexec.c:149
#define states1
Definition: regexec.c:56
#define GOODFLAGS(f)
RZ_API bool rz_regex_check(const RzRegex *rr, const char *str)
Definition: regexec.c:138
#define RZ_REGEX_BADPAT
Definition: rz_regex.h:34
#define RZ_REGEX_LARGE
Definition: rz_regex.h:58
#define RZ_REGEX_ASSERT
Definition: rz_regex.h:47
int re_flags
Definition: rz_regex.h:13
int re_magic
Definition: rz_regex.h:9
struct re_guts * re_g
Definition: rz_regex.h:12