1*c9945492SAndroid Build Coastguard Worker /*
2*c9945492SAndroid Build Coastguard Worker regcomp.c - TRE POSIX compatible regex compilation functions.
3*c9945492SAndroid Build Coastguard Worker
4*c9945492SAndroid Build Coastguard Worker Copyright (c) 2001-2009 Ville Laurikari <[email protected]>
5*c9945492SAndroid Build Coastguard Worker All rights reserved.
6*c9945492SAndroid Build Coastguard Worker
7*c9945492SAndroid Build Coastguard Worker Redistribution and use in source and binary forms, with or without
8*c9945492SAndroid Build Coastguard Worker modification, are permitted provided that the following conditions
9*c9945492SAndroid Build Coastguard Worker are met:
10*c9945492SAndroid Build Coastguard Worker
11*c9945492SAndroid Build Coastguard Worker 1. Redistributions of source code must retain the above copyright
12*c9945492SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer.
13*c9945492SAndroid Build Coastguard Worker
14*c9945492SAndroid Build Coastguard Worker 2. Redistributions in binary form must reproduce the above copyright
15*c9945492SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer in the
16*c9945492SAndroid Build Coastguard Worker documentation and/or other materials provided with the distribution.
17*c9945492SAndroid Build Coastguard Worker
18*c9945492SAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19*c9945492SAndroid Build Coastguard Worker ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20*c9945492SAndroid Build Coastguard Worker LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21*c9945492SAndroid Build Coastguard Worker A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22*c9945492SAndroid Build Coastguard Worker HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23*c9945492SAndroid Build Coastguard Worker SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24*c9945492SAndroid Build Coastguard Worker LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25*c9945492SAndroid Build Coastguard Worker DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26*c9945492SAndroid Build Coastguard Worker THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*c9945492SAndroid Build Coastguard Worker (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28*c9945492SAndroid Build Coastguard Worker OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*c9945492SAndroid Build Coastguard Worker
30*c9945492SAndroid Build Coastguard Worker */
31*c9945492SAndroid Build Coastguard Worker
32*c9945492SAndroid Build Coastguard Worker #include <string.h>
33*c9945492SAndroid Build Coastguard Worker #include <stdlib.h>
34*c9945492SAndroid Build Coastguard Worker #include <regex.h>
35*c9945492SAndroid Build Coastguard Worker #include <limits.h>
36*c9945492SAndroid Build Coastguard Worker #include <stdint.h>
37*c9945492SAndroid Build Coastguard Worker #include <ctype.h>
38*c9945492SAndroid Build Coastguard Worker
39*c9945492SAndroid Build Coastguard Worker #include "tre.h"
40*c9945492SAndroid Build Coastguard Worker
41*c9945492SAndroid Build Coastguard Worker #include <assert.h>
42*c9945492SAndroid Build Coastguard Worker
43*c9945492SAndroid Build Coastguard Worker /***********************************************************************
44*c9945492SAndroid Build Coastguard Worker from tre-compile.h
45*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
46*c9945492SAndroid Build Coastguard Worker
47*c9945492SAndroid Build Coastguard Worker typedef struct {
48*c9945492SAndroid Build Coastguard Worker int position;
49*c9945492SAndroid Build Coastguard Worker int code_min;
50*c9945492SAndroid Build Coastguard Worker int code_max;
51*c9945492SAndroid Build Coastguard Worker int *tags;
52*c9945492SAndroid Build Coastguard Worker int assertions;
53*c9945492SAndroid Build Coastguard Worker tre_ctype_t class;
54*c9945492SAndroid Build Coastguard Worker tre_ctype_t *neg_classes;
55*c9945492SAndroid Build Coastguard Worker int backref;
56*c9945492SAndroid Build Coastguard Worker } tre_pos_and_tags_t;
57*c9945492SAndroid Build Coastguard Worker
58*c9945492SAndroid Build Coastguard Worker
59*c9945492SAndroid Build Coastguard Worker /***********************************************************************
60*c9945492SAndroid Build Coastguard Worker from tre-ast.c and tre-ast.h
61*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
62*c9945492SAndroid Build Coastguard Worker
63*c9945492SAndroid Build Coastguard Worker /* The different AST node types. */
64*c9945492SAndroid Build Coastguard Worker typedef enum {
65*c9945492SAndroid Build Coastguard Worker LITERAL,
66*c9945492SAndroid Build Coastguard Worker CATENATION,
67*c9945492SAndroid Build Coastguard Worker ITERATION,
68*c9945492SAndroid Build Coastguard Worker UNION
69*c9945492SAndroid Build Coastguard Worker } tre_ast_type_t;
70*c9945492SAndroid Build Coastguard Worker
71*c9945492SAndroid Build Coastguard Worker /* Special subtypes of TRE_LITERAL. */
72*c9945492SAndroid Build Coastguard Worker #define EMPTY -1 /* Empty leaf (denotes empty string). */
73*c9945492SAndroid Build Coastguard Worker #define ASSERTION -2 /* Assertion leaf. */
74*c9945492SAndroid Build Coastguard Worker #define TAG -3 /* Tag leaf. */
75*c9945492SAndroid Build Coastguard Worker #define BACKREF -4 /* Back reference leaf. */
76*c9945492SAndroid Build Coastguard Worker
77*c9945492SAndroid Build Coastguard Worker #define IS_SPECIAL(x) ((x)->code_min < 0)
78*c9945492SAndroid Build Coastguard Worker #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79*c9945492SAndroid Build Coastguard Worker #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80*c9945492SAndroid Build Coastguard Worker #define IS_TAG(x) ((x)->code_min == TAG)
81*c9945492SAndroid Build Coastguard Worker #define IS_BACKREF(x) ((x)->code_min == BACKREF)
82*c9945492SAndroid Build Coastguard Worker
83*c9945492SAndroid Build Coastguard Worker
84*c9945492SAndroid Build Coastguard Worker /* A generic AST node. All AST nodes consist of this node on the top
85*c9945492SAndroid Build Coastguard Worker level with `obj' pointing to the actual content. */
86*c9945492SAndroid Build Coastguard Worker typedef struct {
87*c9945492SAndroid Build Coastguard Worker tre_ast_type_t type; /* Type of the node. */
88*c9945492SAndroid Build Coastguard Worker void *obj; /* Pointer to actual node. */
89*c9945492SAndroid Build Coastguard Worker int nullable;
90*c9945492SAndroid Build Coastguard Worker int submatch_id;
91*c9945492SAndroid Build Coastguard Worker int num_submatches;
92*c9945492SAndroid Build Coastguard Worker int num_tags;
93*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *firstpos;
94*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *lastpos;
95*c9945492SAndroid Build Coastguard Worker } tre_ast_node_t;
96*c9945492SAndroid Build Coastguard Worker
97*c9945492SAndroid Build Coastguard Worker
98*c9945492SAndroid Build Coastguard Worker /* A "literal" node. These are created for assertions, back references,
99*c9945492SAndroid Build Coastguard Worker tags, matching parameter settings, and all expressions that match one
100*c9945492SAndroid Build Coastguard Worker character. */
101*c9945492SAndroid Build Coastguard Worker typedef struct {
102*c9945492SAndroid Build Coastguard Worker long code_min;
103*c9945492SAndroid Build Coastguard Worker long code_max;
104*c9945492SAndroid Build Coastguard Worker int position;
105*c9945492SAndroid Build Coastguard Worker tre_ctype_t class;
106*c9945492SAndroid Build Coastguard Worker tre_ctype_t *neg_classes;
107*c9945492SAndroid Build Coastguard Worker } tre_literal_t;
108*c9945492SAndroid Build Coastguard Worker
109*c9945492SAndroid Build Coastguard Worker /* A "catenation" node. These are created when two regexps are concatenated.
110*c9945492SAndroid Build Coastguard Worker If there are more than one subexpressions in sequence, the `left' part
111*c9945492SAndroid Build Coastguard Worker holds all but the last, and `right' part holds the last subexpression
112*c9945492SAndroid Build Coastguard Worker (catenation is left associative). */
113*c9945492SAndroid Build Coastguard Worker typedef struct {
114*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *left;
115*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *right;
116*c9945492SAndroid Build Coastguard Worker } tre_catenation_t;
117*c9945492SAndroid Build Coastguard Worker
118*c9945492SAndroid Build Coastguard Worker /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119*c9945492SAndroid Build Coastguard Worker operators. */
120*c9945492SAndroid Build Coastguard Worker typedef struct {
121*c9945492SAndroid Build Coastguard Worker /* Subexpression to match. */
122*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *arg;
123*c9945492SAndroid Build Coastguard Worker /* Minimum number of consecutive matches. */
124*c9945492SAndroid Build Coastguard Worker int min;
125*c9945492SAndroid Build Coastguard Worker /* Maximum number of consecutive matches. */
126*c9945492SAndroid Build Coastguard Worker int max;
127*c9945492SAndroid Build Coastguard Worker /* If 0, match as many characters as possible, if 1 match as few as
128*c9945492SAndroid Build Coastguard Worker possible. Note that this does not always mean the same thing as
129*c9945492SAndroid Build Coastguard Worker matching as many/few repetitions as possible. */
130*c9945492SAndroid Build Coastguard Worker unsigned int minimal:1;
131*c9945492SAndroid Build Coastguard Worker } tre_iteration_t;
132*c9945492SAndroid Build Coastguard Worker
133*c9945492SAndroid Build Coastguard Worker /* An "union" node. These are created for the "|" operator. */
134*c9945492SAndroid Build Coastguard Worker typedef struct {
135*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *left;
136*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *right;
137*c9945492SAndroid Build Coastguard Worker } tre_union_t;
138*c9945492SAndroid Build Coastguard Worker
139*c9945492SAndroid Build Coastguard Worker
140*c9945492SAndroid Build Coastguard Worker static tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem,int type,void * obj)141*c9945492SAndroid Build Coastguard Worker tre_ast_new_node(tre_mem_t mem, int type, void *obj)
142*c9945492SAndroid Build Coastguard Worker {
143*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
144*c9945492SAndroid Build Coastguard Worker if (!node || !obj)
145*c9945492SAndroid Build Coastguard Worker return 0;
146*c9945492SAndroid Build Coastguard Worker node->obj = obj;
147*c9945492SAndroid Build Coastguard Worker node->type = type;
148*c9945492SAndroid Build Coastguard Worker node->nullable = -1;
149*c9945492SAndroid Build Coastguard Worker node->submatch_id = -1;
150*c9945492SAndroid Build Coastguard Worker return node;
151*c9945492SAndroid Build Coastguard Worker }
152*c9945492SAndroid Build Coastguard Worker
153*c9945492SAndroid Build Coastguard Worker static tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem,int code_min,int code_max,int position)154*c9945492SAndroid Build Coastguard Worker tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155*c9945492SAndroid Build Coastguard Worker {
156*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
157*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit;
158*c9945492SAndroid Build Coastguard Worker
159*c9945492SAndroid Build Coastguard Worker lit = tre_mem_calloc(mem, sizeof *lit);
160*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_node(mem, LITERAL, lit);
161*c9945492SAndroid Build Coastguard Worker if (!node)
162*c9945492SAndroid Build Coastguard Worker return 0;
163*c9945492SAndroid Build Coastguard Worker lit->code_min = code_min;
164*c9945492SAndroid Build Coastguard Worker lit->code_max = code_max;
165*c9945492SAndroid Build Coastguard Worker lit->position = position;
166*c9945492SAndroid Build Coastguard Worker return node;
167*c9945492SAndroid Build Coastguard Worker }
168*c9945492SAndroid Build Coastguard Worker
169*c9945492SAndroid Build Coastguard Worker static tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem,tre_ast_node_t * arg,int min,int max,int minimal)170*c9945492SAndroid Build Coastguard Worker tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171*c9945492SAndroid Build Coastguard Worker {
172*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
173*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter;
174*c9945492SAndroid Build Coastguard Worker
175*c9945492SAndroid Build Coastguard Worker iter = tre_mem_calloc(mem, sizeof *iter);
176*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_node(mem, ITERATION, iter);
177*c9945492SAndroid Build Coastguard Worker if (!node)
178*c9945492SAndroid Build Coastguard Worker return 0;
179*c9945492SAndroid Build Coastguard Worker iter->arg = arg;
180*c9945492SAndroid Build Coastguard Worker iter->min = min;
181*c9945492SAndroid Build Coastguard Worker iter->max = max;
182*c9945492SAndroid Build Coastguard Worker iter->minimal = minimal;
183*c9945492SAndroid Build Coastguard Worker node->num_submatches = arg->num_submatches;
184*c9945492SAndroid Build Coastguard Worker return node;
185*c9945492SAndroid Build Coastguard Worker }
186*c9945492SAndroid Build Coastguard Worker
187*c9945492SAndroid Build Coastguard Worker static tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)188*c9945492SAndroid Build Coastguard Worker tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
189*c9945492SAndroid Build Coastguard Worker {
190*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
191*c9945492SAndroid Build Coastguard Worker tre_union_t *un;
192*c9945492SAndroid Build Coastguard Worker
193*c9945492SAndroid Build Coastguard Worker if (!left)
194*c9945492SAndroid Build Coastguard Worker return right;
195*c9945492SAndroid Build Coastguard Worker un = tre_mem_calloc(mem, sizeof *un);
196*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_node(mem, UNION, un);
197*c9945492SAndroid Build Coastguard Worker if (!node || !right)
198*c9945492SAndroid Build Coastguard Worker return 0;
199*c9945492SAndroid Build Coastguard Worker un->left = left;
200*c9945492SAndroid Build Coastguard Worker un->right = right;
201*c9945492SAndroid Build Coastguard Worker node->num_submatches = left->num_submatches + right->num_submatches;
202*c9945492SAndroid Build Coastguard Worker return node;
203*c9945492SAndroid Build Coastguard Worker }
204*c9945492SAndroid Build Coastguard Worker
205*c9945492SAndroid Build Coastguard Worker static tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)206*c9945492SAndroid Build Coastguard Worker tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
207*c9945492SAndroid Build Coastguard Worker {
208*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
209*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat;
210*c9945492SAndroid Build Coastguard Worker
211*c9945492SAndroid Build Coastguard Worker if (!left)
212*c9945492SAndroid Build Coastguard Worker return right;
213*c9945492SAndroid Build Coastguard Worker cat = tre_mem_calloc(mem, sizeof *cat);
214*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_node(mem, CATENATION, cat);
215*c9945492SAndroid Build Coastguard Worker if (!node)
216*c9945492SAndroid Build Coastguard Worker return 0;
217*c9945492SAndroid Build Coastguard Worker cat->left = left;
218*c9945492SAndroid Build Coastguard Worker cat->right = right;
219*c9945492SAndroid Build Coastguard Worker node->num_submatches = left->num_submatches + right->num_submatches;
220*c9945492SAndroid Build Coastguard Worker return node;
221*c9945492SAndroid Build Coastguard Worker }
222*c9945492SAndroid Build Coastguard Worker
223*c9945492SAndroid Build Coastguard Worker
224*c9945492SAndroid Build Coastguard Worker /***********************************************************************
225*c9945492SAndroid Build Coastguard Worker from tre-stack.c and tre-stack.h
226*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
227*c9945492SAndroid Build Coastguard Worker
228*c9945492SAndroid Build Coastguard Worker typedef struct tre_stack_rec tre_stack_t;
229*c9945492SAndroid Build Coastguard Worker
230*c9945492SAndroid Build Coastguard Worker /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231*c9945492SAndroid Build Coastguard Worker is maximum size, and `increment' specifies how much more space will be
232*c9945492SAndroid Build Coastguard Worker allocated with realloc() if all space gets used up. Returns the stack
233*c9945492SAndroid Build Coastguard Worker object or NULL if out of memory. */
234*c9945492SAndroid Build Coastguard Worker static tre_stack_t *
235*c9945492SAndroid Build Coastguard Worker tre_stack_new(int size, int max_size, int increment);
236*c9945492SAndroid Build Coastguard Worker
237*c9945492SAndroid Build Coastguard Worker /* Frees the stack object. */
238*c9945492SAndroid Build Coastguard Worker static void
239*c9945492SAndroid Build Coastguard Worker tre_stack_destroy(tre_stack_t *s);
240*c9945492SAndroid Build Coastguard Worker
241*c9945492SAndroid Build Coastguard Worker /* Returns the current number of objects in the stack. */
242*c9945492SAndroid Build Coastguard Worker static int
243*c9945492SAndroid Build Coastguard Worker tre_stack_num_objects(tre_stack_t *s);
244*c9945492SAndroid Build Coastguard Worker
245*c9945492SAndroid Build Coastguard Worker /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246*c9945492SAndroid Build Coastguard Worker `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247*c9945492SAndroid Build Coastguard Worker This tries to realloc() more space before failing if maximum size
248*c9945492SAndroid Build Coastguard Worker has not yet been reached. Returns REG_OK if successful. */
249*c9945492SAndroid Build Coastguard Worker #define declare_pushf(typetag, type) \
250*c9945492SAndroid Build Coastguard Worker static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251*c9945492SAndroid Build Coastguard Worker
252*c9945492SAndroid Build Coastguard Worker declare_pushf(voidptr, void *);
253*c9945492SAndroid Build Coastguard Worker declare_pushf(int, int);
254*c9945492SAndroid Build Coastguard Worker
255*c9945492SAndroid Build Coastguard Worker /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256*c9945492SAndroid Build Coastguard Worker element off of stack `s' and returns it. The stack must not be
257*c9945492SAndroid Build Coastguard Worker empty. */
258*c9945492SAndroid Build Coastguard Worker #define declare_popf(typetag, type) \
259*c9945492SAndroid Build Coastguard Worker static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260*c9945492SAndroid Build Coastguard Worker
261*c9945492SAndroid Build Coastguard Worker declare_popf(voidptr, void *);
262*c9945492SAndroid Build Coastguard Worker declare_popf(int, int);
263*c9945492SAndroid Build Coastguard Worker
264*c9945492SAndroid Build Coastguard Worker /* Just to save some typing. */
265*c9945492SAndroid Build Coastguard Worker #define STACK_PUSH(s, typetag, value) \
266*c9945492SAndroid Build Coastguard Worker do \
267*c9945492SAndroid Build Coastguard Worker { \
268*c9945492SAndroid Build Coastguard Worker status = tre_stack_push_ ## typetag(s, value); \
269*c9945492SAndroid Build Coastguard Worker } \
270*c9945492SAndroid Build Coastguard Worker while (/*CONSTCOND*/0)
271*c9945492SAndroid Build Coastguard Worker
272*c9945492SAndroid Build Coastguard Worker #define STACK_PUSHX(s, typetag, value) \
273*c9945492SAndroid Build Coastguard Worker { \
274*c9945492SAndroid Build Coastguard Worker status = tre_stack_push_ ## typetag(s, value); \
275*c9945492SAndroid Build Coastguard Worker if (status != REG_OK) \
276*c9945492SAndroid Build Coastguard Worker break; \
277*c9945492SAndroid Build Coastguard Worker }
278*c9945492SAndroid Build Coastguard Worker
279*c9945492SAndroid Build Coastguard Worker #define STACK_PUSHR(s, typetag, value) \
280*c9945492SAndroid Build Coastguard Worker { \
281*c9945492SAndroid Build Coastguard Worker reg_errcode_t _status; \
282*c9945492SAndroid Build Coastguard Worker _status = tre_stack_push_ ## typetag(s, value); \
283*c9945492SAndroid Build Coastguard Worker if (_status != REG_OK) \
284*c9945492SAndroid Build Coastguard Worker return _status; \
285*c9945492SAndroid Build Coastguard Worker }
286*c9945492SAndroid Build Coastguard Worker
287*c9945492SAndroid Build Coastguard Worker union tre_stack_item {
288*c9945492SAndroid Build Coastguard Worker void *voidptr_value;
289*c9945492SAndroid Build Coastguard Worker int int_value;
290*c9945492SAndroid Build Coastguard Worker };
291*c9945492SAndroid Build Coastguard Worker
292*c9945492SAndroid Build Coastguard Worker struct tre_stack_rec {
293*c9945492SAndroid Build Coastguard Worker int size;
294*c9945492SAndroid Build Coastguard Worker int max_size;
295*c9945492SAndroid Build Coastguard Worker int increment;
296*c9945492SAndroid Build Coastguard Worker int ptr;
297*c9945492SAndroid Build Coastguard Worker union tre_stack_item *stack;
298*c9945492SAndroid Build Coastguard Worker };
299*c9945492SAndroid Build Coastguard Worker
300*c9945492SAndroid Build Coastguard Worker
301*c9945492SAndroid Build Coastguard Worker static tre_stack_t *
tre_stack_new(int size,int max_size,int increment)302*c9945492SAndroid Build Coastguard Worker tre_stack_new(int size, int max_size, int increment)
303*c9945492SAndroid Build Coastguard Worker {
304*c9945492SAndroid Build Coastguard Worker tre_stack_t *s;
305*c9945492SAndroid Build Coastguard Worker
306*c9945492SAndroid Build Coastguard Worker s = xmalloc(sizeof(*s));
307*c9945492SAndroid Build Coastguard Worker if (s != NULL)
308*c9945492SAndroid Build Coastguard Worker {
309*c9945492SAndroid Build Coastguard Worker s->stack = xmalloc(sizeof(*s->stack) * size);
310*c9945492SAndroid Build Coastguard Worker if (s->stack == NULL)
311*c9945492SAndroid Build Coastguard Worker {
312*c9945492SAndroid Build Coastguard Worker xfree(s);
313*c9945492SAndroid Build Coastguard Worker return NULL;
314*c9945492SAndroid Build Coastguard Worker }
315*c9945492SAndroid Build Coastguard Worker s->size = size;
316*c9945492SAndroid Build Coastguard Worker s->max_size = max_size;
317*c9945492SAndroid Build Coastguard Worker s->increment = increment;
318*c9945492SAndroid Build Coastguard Worker s->ptr = 0;
319*c9945492SAndroid Build Coastguard Worker }
320*c9945492SAndroid Build Coastguard Worker return s;
321*c9945492SAndroid Build Coastguard Worker }
322*c9945492SAndroid Build Coastguard Worker
323*c9945492SAndroid Build Coastguard Worker static void
tre_stack_destroy(tre_stack_t * s)324*c9945492SAndroid Build Coastguard Worker tre_stack_destroy(tre_stack_t *s)
325*c9945492SAndroid Build Coastguard Worker {
326*c9945492SAndroid Build Coastguard Worker xfree(s->stack);
327*c9945492SAndroid Build Coastguard Worker xfree(s);
328*c9945492SAndroid Build Coastguard Worker }
329*c9945492SAndroid Build Coastguard Worker
330*c9945492SAndroid Build Coastguard Worker static int
tre_stack_num_objects(tre_stack_t * s)331*c9945492SAndroid Build Coastguard Worker tre_stack_num_objects(tre_stack_t *s)
332*c9945492SAndroid Build Coastguard Worker {
333*c9945492SAndroid Build Coastguard Worker return s->ptr;
334*c9945492SAndroid Build Coastguard Worker }
335*c9945492SAndroid Build Coastguard Worker
336*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_stack_push(tre_stack_t * s,union tre_stack_item value)337*c9945492SAndroid Build Coastguard Worker tre_stack_push(tre_stack_t *s, union tre_stack_item value)
338*c9945492SAndroid Build Coastguard Worker {
339*c9945492SAndroid Build Coastguard Worker if (s->ptr < s->size)
340*c9945492SAndroid Build Coastguard Worker {
341*c9945492SAndroid Build Coastguard Worker s->stack[s->ptr] = value;
342*c9945492SAndroid Build Coastguard Worker s->ptr++;
343*c9945492SAndroid Build Coastguard Worker }
344*c9945492SAndroid Build Coastguard Worker else
345*c9945492SAndroid Build Coastguard Worker {
346*c9945492SAndroid Build Coastguard Worker if (s->size >= s->max_size)
347*c9945492SAndroid Build Coastguard Worker {
348*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
349*c9945492SAndroid Build Coastguard Worker }
350*c9945492SAndroid Build Coastguard Worker else
351*c9945492SAndroid Build Coastguard Worker {
352*c9945492SAndroid Build Coastguard Worker union tre_stack_item *new_buffer;
353*c9945492SAndroid Build Coastguard Worker int new_size;
354*c9945492SAndroid Build Coastguard Worker new_size = s->size + s->increment;
355*c9945492SAndroid Build Coastguard Worker if (new_size > s->max_size)
356*c9945492SAndroid Build Coastguard Worker new_size = s->max_size;
357*c9945492SAndroid Build Coastguard Worker new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358*c9945492SAndroid Build Coastguard Worker if (new_buffer == NULL)
359*c9945492SAndroid Build Coastguard Worker {
360*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
361*c9945492SAndroid Build Coastguard Worker }
362*c9945492SAndroid Build Coastguard Worker assert(new_size > s->size);
363*c9945492SAndroid Build Coastguard Worker s->size = new_size;
364*c9945492SAndroid Build Coastguard Worker s->stack = new_buffer;
365*c9945492SAndroid Build Coastguard Worker tre_stack_push(s, value);
366*c9945492SAndroid Build Coastguard Worker }
367*c9945492SAndroid Build Coastguard Worker }
368*c9945492SAndroid Build Coastguard Worker return REG_OK;
369*c9945492SAndroid Build Coastguard Worker }
370*c9945492SAndroid Build Coastguard Worker
371*c9945492SAndroid Build Coastguard Worker #define define_pushf(typetag, type) \
372*c9945492SAndroid Build Coastguard Worker declare_pushf(typetag, type) { \
373*c9945492SAndroid Build Coastguard Worker union tre_stack_item item; \
374*c9945492SAndroid Build Coastguard Worker item.typetag ## _value = value; \
375*c9945492SAndroid Build Coastguard Worker return tre_stack_push(s, item); \
376*c9945492SAndroid Build Coastguard Worker }
377*c9945492SAndroid Build Coastguard Worker
378*c9945492SAndroid Build Coastguard Worker define_pushf(int, int)
379*c9945492SAndroid Build Coastguard Worker define_pushf(voidptr, void *)
380*c9945492SAndroid Build Coastguard Worker
381*c9945492SAndroid Build Coastguard Worker #define define_popf(typetag, type) \
382*c9945492SAndroid Build Coastguard Worker declare_popf(typetag, type) { \
383*c9945492SAndroid Build Coastguard Worker return s->stack[--s->ptr].typetag ## _value; \
384*c9945492SAndroid Build Coastguard Worker }
385*c9945492SAndroid Build Coastguard Worker
386*c9945492SAndroid Build Coastguard Worker define_popf(int, int)
387*c9945492SAndroid Build Coastguard Worker define_popf(voidptr, void *)
388*c9945492SAndroid Build Coastguard Worker
389*c9945492SAndroid Build Coastguard Worker
390*c9945492SAndroid Build Coastguard Worker /***********************************************************************
391*c9945492SAndroid Build Coastguard Worker from tre-parse.c and tre-parse.h
392*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
393*c9945492SAndroid Build Coastguard Worker
394*c9945492SAndroid Build Coastguard Worker /* Parse context. */
395*c9945492SAndroid Build Coastguard Worker typedef struct {
396*c9945492SAndroid Build Coastguard Worker /* Memory allocator. The AST is allocated using this. */
397*c9945492SAndroid Build Coastguard Worker tre_mem_t mem;
398*c9945492SAndroid Build Coastguard Worker /* Stack used for keeping track of regexp syntax. */
399*c9945492SAndroid Build Coastguard Worker tre_stack_t *stack;
400*c9945492SAndroid Build Coastguard Worker /* The parsed node after a parse function returns. */
401*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *n;
402*c9945492SAndroid Build Coastguard Worker /* Position in the regexp pattern after a parse function returns. */
403*c9945492SAndroid Build Coastguard Worker const char *s;
404*c9945492SAndroid Build Coastguard Worker /* The first character of the last subexpression parsed. */
405*c9945492SAndroid Build Coastguard Worker const char *start;
406*c9945492SAndroid Build Coastguard Worker /* Current submatch ID. */
407*c9945492SAndroid Build Coastguard Worker int submatch_id;
408*c9945492SAndroid Build Coastguard Worker /* Current position (number of literal). */
409*c9945492SAndroid Build Coastguard Worker int position;
410*c9945492SAndroid Build Coastguard Worker /* The highest back reference or -1 if none seen so far. */
411*c9945492SAndroid Build Coastguard Worker int max_backref;
412*c9945492SAndroid Build Coastguard Worker /* Compilation flags. */
413*c9945492SAndroid Build Coastguard Worker int cflags;
414*c9945492SAndroid Build Coastguard Worker } tre_parse_ctx_t;
415*c9945492SAndroid Build Coastguard Worker
416*c9945492SAndroid Build Coastguard Worker /* Some macros for expanding \w, \s, etc. */
417*c9945492SAndroid Build Coastguard Worker static const struct {
418*c9945492SAndroid Build Coastguard Worker char c;
419*c9945492SAndroid Build Coastguard Worker const char *expansion;
420*c9945492SAndroid Build Coastguard Worker } tre_macros[] = {
421*c9945492SAndroid Build Coastguard Worker {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422*c9945492SAndroid Build Coastguard Worker {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423*c9945492SAndroid Build Coastguard Worker {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424*c9945492SAndroid Build Coastguard Worker {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425*c9945492SAndroid Build Coastguard Worker { 0, 0 }
426*c9945492SAndroid Build Coastguard Worker };
427*c9945492SAndroid Build Coastguard Worker
428*c9945492SAndroid Build Coastguard Worker /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429*c9945492SAndroid Build Coastguard Worker must have at least `len' items. Sets buf[0] to zero if the there
430*c9945492SAndroid Build Coastguard Worker is no match in `tre_macros'. */
tre_expand_macro(const char * s)431*c9945492SAndroid Build Coastguard Worker static const char *tre_expand_macro(const char *s)
432*c9945492SAndroid Build Coastguard Worker {
433*c9945492SAndroid Build Coastguard Worker int i;
434*c9945492SAndroid Build Coastguard Worker for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435*c9945492SAndroid Build Coastguard Worker return tre_macros[i].expansion;
436*c9945492SAndroid Build Coastguard Worker }
437*c9945492SAndroid Build Coastguard Worker
438*c9945492SAndroid Build Coastguard Worker static int
tre_compare_lit(const void * a,const void * b)439*c9945492SAndroid Build Coastguard Worker tre_compare_lit(const void *a, const void *b)
440*c9945492SAndroid Build Coastguard Worker {
441*c9945492SAndroid Build Coastguard Worker const tre_literal_t *const *la = a;
442*c9945492SAndroid Build Coastguard Worker const tre_literal_t *const *lb = b;
443*c9945492SAndroid Build Coastguard Worker /* assumes the range of valid code_min is < INT_MAX */
444*c9945492SAndroid Build Coastguard Worker return la[0]->code_min - lb[0]->code_min;
445*c9945492SAndroid Build Coastguard Worker }
446*c9945492SAndroid Build Coastguard Worker
447*c9945492SAndroid Build Coastguard Worker struct literals {
448*c9945492SAndroid Build Coastguard Worker tre_mem_t mem;
449*c9945492SAndroid Build Coastguard Worker tre_literal_t **a;
450*c9945492SAndroid Build Coastguard Worker int len;
451*c9945492SAndroid Build Coastguard Worker int cap;
452*c9945492SAndroid Build Coastguard Worker };
453*c9945492SAndroid Build Coastguard Worker
tre_new_lit(struct literals * p)454*c9945492SAndroid Build Coastguard Worker static tre_literal_t *tre_new_lit(struct literals *p)
455*c9945492SAndroid Build Coastguard Worker {
456*c9945492SAndroid Build Coastguard Worker tre_literal_t **a;
457*c9945492SAndroid Build Coastguard Worker if (p->len >= p->cap) {
458*c9945492SAndroid Build Coastguard Worker if (p->cap >= 1<<15)
459*c9945492SAndroid Build Coastguard Worker return 0;
460*c9945492SAndroid Build Coastguard Worker p->cap *= 2;
461*c9945492SAndroid Build Coastguard Worker a = xrealloc(p->a, p->cap * sizeof *p->a);
462*c9945492SAndroid Build Coastguard Worker if (!a)
463*c9945492SAndroid Build Coastguard Worker return 0;
464*c9945492SAndroid Build Coastguard Worker p->a = a;
465*c9945492SAndroid Build Coastguard Worker }
466*c9945492SAndroid Build Coastguard Worker a = p->a + p->len++;
467*c9945492SAndroid Build Coastguard Worker *a = tre_mem_calloc(p->mem, sizeof **a);
468*c9945492SAndroid Build Coastguard Worker return *a;
469*c9945492SAndroid Build Coastguard Worker }
470*c9945492SAndroid Build Coastguard Worker
add_icase_literals(struct literals * ls,int min,int max)471*c9945492SAndroid Build Coastguard Worker static int add_icase_literals(struct literals *ls, int min, int max)
472*c9945492SAndroid Build Coastguard Worker {
473*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit;
474*c9945492SAndroid Build Coastguard Worker int b, e, c;
475*c9945492SAndroid Build Coastguard Worker for (c=min; c<=max; ) {
476*c9945492SAndroid Build Coastguard Worker /* assumes islower(c) and isupper(c) are exclusive
477*c9945492SAndroid Build Coastguard Worker and toupper(c)!=c if islower(c).
478*c9945492SAndroid Build Coastguard Worker multiple opposite case characters are not supported */
479*c9945492SAndroid Build Coastguard Worker if (tre_islower(c)) {
480*c9945492SAndroid Build Coastguard Worker b = e = tre_toupper(c);
481*c9945492SAndroid Build Coastguard Worker for (c++, e++; c<=max; c++, e++)
482*c9945492SAndroid Build Coastguard Worker if (tre_toupper(c) != e) break;
483*c9945492SAndroid Build Coastguard Worker } else if (tre_isupper(c)) {
484*c9945492SAndroid Build Coastguard Worker b = e = tre_tolower(c);
485*c9945492SAndroid Build Coastguard Worker for (c++, e++; c<=max; c++, e++)
486*c9945492SAndroid Build Coastguard Worker if (tre_tolower(c) != e) break;
487*c9945492SAndroid Build Coastguard Worker } else {
488*c9945492SAndroid Build Coastguard Worker c++;
489*c9945492SAndroid Build Coastguard Worker continue;
490*c9945492SAndroid Build Coastguard Worker }
491*c9945492SAndroid Build Coastguard Worker lit = tre_new_lit(ls);
492*c9945492SAndroid Build Coastguard Worker if (!lit)
493*c9945492SAndroid Build Coastguard Worker return -1;
494*c9945492SAndroid Build Coastguard Worker lit->code_min = b;
495*c9945492SAndroid Build Coastguard Worker lit->code_max = e-1;
496*c9945492SAndroid Build Coastguard Worker lit->position = -1;
497*c9945492SAndroid Build Coastguard Worker }
498*c9945492SAndroid Build Coastguard Worker return 0;
499*c9945492SAndroid Build Coastguard Worker }
500*c9945492SAndroid Build Coastguard Worker
501*c9945492SAndroid Build Coastguard Worker
502*c9945492SAndroid Build Coastguard Worker /* Maximum number of character classes in a negated bracket expression. */
503*c9945492SAndroid Build Coastguard Worker #define MAX_NEG_CLASSES 64
504*c9945492SAndroid Build Coastguard Worker
505*c9945492SAndroid Build Coastguard Worker struct neg {
506*c9945492SAndroid Build Coastguard Worker int negate;
507*c9945492SAndroid Build Coastguard Worker int len;
508*c9945492SAndroid Build Coastguard Worker tre_ctype_t a[MAX_NEG_CLASSES];
509*c9945492SAndroid Build Coastguard Worker };
510*c9945492SAndroid Build Coastguard Worker
511*c9945492SAndroid Build Coastguard Worker // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512*c9945492SAndroid Build Coastguard Worker
513*c9945492SAndroid Build Coastguard Worker /*
514*c9945492SAndroid Build Coastguard Worker bracket grammar:
515*c9945492SAndroid Build Coastguard Worker Bracket = '[' List ']' | '[^' List ']'
516*c9945492SAndroid Build Coastguard Worker List = Term | List Term
517*c9945492SAndroid Build Coastguard Worker Term = Char | Range | Chclass | Eqclass
518*c9945492SAndroid Build Coastguard Worker Range = Char '-' Char | Char '-' '-'
519*c9945492SAndroid Build Coastguard Worker Char = Coll | coll_single
520*c9945492SAndroid Build Coastguard Worker Meta = ']' | '-'
521*c9945492SAndroid Build Coastguard Worker Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522*c9945492SAndroid Build Coastguard Worker Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523*c9945492SAndroid Build Coastguard Worker Chclass = '[:' class ':]'
524*c9945492SAndroid Build Coastguard Worker
525*c9945492SAndroid Build Coastguard Worker coll_single is a single char collating element but it can be
526*c9945492SAndroid Build Coastguard Worker '-' only at the beginning or end of a List and
527*c9945492SAndroid Build Coastguard Worker ']' only at the beginning of a List and
528*c9945492SAndroid Build Coastguard Worker '^' anywhere except after the openning '['
529*c9945492SAndroid Build Coastguard Worker */
530*c9945492SAndroid Build Coastguard Worker
parse_bracket_terms(tre_parse_ctx_t * ctx,const char * s,struct literals * ls,struct neg * neg)531*c9945492SAndroid Build Coastguard Worker static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
532*c9945492SAndroid Build Coastguard Worker {
533*c9945492SAndroid Build Coastguard Worker const char *start = s;
534*c9945492SAndroid Build Coastguard Worker tre_ctype_t class;
535*c9945492SAndroid Build Coastguard Worker int min, max;
536*c9945492SAndroid Build Coastguard Worker wchar_t wc;
537*c9945492SAndroid Build Coastguard Worker int len;
538*c9945492SAndroid Build Coastguard Worker
539*c9945492SAndroid Build Coastguard Worker for (;;) {
540*c9945492SAndroid Build Coastguard Worker class = 0;
541*c9945492SAndroid Build Coastguard Worker len = mbtowc(&wc, s, -1);
542*c9945492SAndroid Build Coastguard Worker if (len <= 0)
543*c9945492SAndroid Build Coastguard Worker return *s ? REG_BADPAT : REG_EBRACK;
544*c9945492SAndroid Build Coastguard Worker if (*s == ']' && s != start) {
545*c9945492SAndroid Build Coastguard Worker ctx->s = s+1;
546*c9945492SAndroid Build Coastguard Worker return REG_OK;
547*c9945492SAndroid Build Coastguard Worker }
548*c9945492SAndroid Build Coastguard Worker if (*s == '-' && s != start && s[1] != ']' &&
549*c9945492SAndroid Build Coastguard Worker /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550*c9945492SAndroid Build Coastguard Worker (s[1] != '-' || s[2] == ']'))
551*c9945492SAndroid Build Coastguard Worker return REG_ERANGE;
552*c9945492SAndroid Build Coastguard Worker if (*s == '[' && (s[1] == '.' || s[1] == '='))
553*c9945492SAndroid Build Coastguard Worker /* collating symbols and equivalence classes are not supported */
554*c9945492SAndroid Build Coastguard Worker return REG_ECOLLATE;
555*c9945492SAndroid Build Coastguard Worker if (*s == '[' && s[1] == ':') {
556*c9945492SAndroid Build Coastguard Worker char tmp[CHARCLASS_NAME_MAX+1];
557*c9945492SAndroid Build Coastguard Worker s += 2;
558*c9945492SAndroid Build Coastguard Worker for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
559*c9945492SAndroid Build Coastguard Worker if (s[len] == ':') {
560*c9945492SAndroid Build Coastguard Worker memcpy(tmp, s, len);
561*c9945492SAndroid Build Coastguard Worker tmp[len] = 0;
562*c9945492SAndroid Build Coastguard Worker class = tre_ctype(tmp);
563*c9945492SAndroid Build Coastguard Worker break;
564*c9945492SAndroid Build Coastguard Worker }
565*c9945492SAndroid Build Coastguard Worker }
566*c9945492SAndroid Build Coastguard Worker if (!class || s[len+1] != ']')
567*c9945492SAndroid Build Coastguard Worker return REG_ECTYPE;
568*c9945492SAndroid Build Coastguard Worker min = 0;
569*c9945492SAndroid Build Coastguard Worker max = TRE_CHAR_MAX;
570*c9945492SAndroid Build Coastguard Worker s += len+2;
571*c9945492SAndroid Build Coastguard Worker } else {
572*c9945492SAndroid Build Coastguard Worker min = max = wc;
573*c9945492SAndroid Build Coastguard Worker s += len;
574*c9945492SAndroid Build Coastguard Worker if (*s == '-' && s[1] != ']') {
575*c9945492SAndroid Build Coastguard Worker s++;
576*c9945492SAndroid Build Coastguard Worker len = mbtowc(&wc, s, -1);
577*c9945492SAndroid Build Coastguard Worker max = wc;
578*c9945492SAndroid Build Coastguard Worker /* XXX - Should use collation order instead of
579*c9945492SAndroid Build Coastguard Worker encoding values in character ranges. */
580*c9945492SAndroid Build Coastguard Worker if (len <= 0 || min > max)
581*c9945492SAndroid Build Coastguard Worker return REG_ERANGE;
582*c9945492SAndroid Build Coastguard Worker s += len;
583*c9945492SAndroid Build Coastguard Worker }
584*c9945492SAndroid Build Coastguard Worker }
585*c9945492SAndroid Build Coastguard Worker
586*c9945492SAndroid Build Coastguard Worker if (class && neg->negate) {
587*c9945492SAndroid Build Coastguard Worker if (neg->len >= MAX_NEG_CLASSES)
588*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
589*c9945492SAndroid Build Coastguard Worker neg->a[neg->len++] = class;
590*c9945492SAndroid Build Coastguard Worker } else {
591*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit = tre_new_lit(ls);
592*c9945492SAndroid Build Coastguard Worker if (!lit)
593*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
594*c9945492SAndroid Build Coastguard Worker lit->code_min = min;
595*c9945492SAndroid Build Coastguard Worker lit->code_max = max;
596*c9945492SAndroid Build Coastguard Worker lit->class = class;
597*c9945492SAndroid Build Coastguard Worker lit->position = -1;
598*c9945492SAndroid Build Coastguard Worker
599*c9945492SAndroid Build Coastguard Worker /* Add opposite-case codepoints if REG_ICASE is present.
600*c9945492SAndroid Build Coastguard Worker It seems that POSIX requires that bracket negation
601*c9945492SAndroid Build Coastguard Worker should happen before case-folding, but most practical
602*c9945492SAndroid Build Coastguard Worker implementations do it the other way around. Changing
603*c9945492SAndroid Build Coastguard Worker the order would need efficient representation of
604*c9945492SAndroid Build Coastguard Worker case-fold ranges and bracket range sets even with
605*c9945492SAndroid Build Coastguard Worker simple patterns so this is ok for now. */
606*c9945492SAndroid Build Coastguard Worker if (ctx->cflags & REG_ICASE && !class)
607*c9945492SAndroid Build Coastguard Worker if (add_icase_literals(ls, min, max))
608*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
609*c9945492SAndroid Build Coastguard Worker }
610*c9945492SAndroid Build Coastguard Worker }
611*c9945492SAndroid Build Coastguard Worker }
612*c9945492SAndroid Build Coastguard Worker
parse_bracket(tre_parse_ctx_t * ctx,const char * s)613*c9945492SAndroid Build Coastguard Worker static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
614*c9945492SAndroid Build Coastguard Worker {
615*c9945492SAndroid Build Coastguard Worker int i, max, min, negmax, negmin;
616*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node = 0, *n;
617*c9945492SAndroid Build Coastguard Worker tre_ctype_t *nc = 0;
618*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit;
619*c9945492SAndroid Build Coastguard Worker struct literals ls;
620*c9945492SAndroid Build Coastguard Worker struct neg neg;
621*c9945492SAndroid Build Coastguard Worker reg_errcode_t err;
622*c9945492SAndroid Build Coastguard Worker
623*c9945492SAndroid Build Coastguard Worker ls.mem = ctx->mem;
624*c9945492SAndroid Build Coastguard Worker ls.len = 0;
625*c9945492SAndroid Build Coastguard Worker ls.cap = 32;
626*c9945492SAndroid Build Coastguard Worker ls.a = xmalloc(ls.cap * sizeof *ls.a);
627*c9945492SAndroid Build Coastguard Worker if (!ls.a)
628*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
629*c9945492SAndroid Build Coastguard Worker neg.len = 0;
630*c9945492SAndroid Build Coastguard Worker neg.negate = *s == '^';
631*c9945492SAndroid Build Coastguard Worker if (neg.negate)
632*c9945492SAndroid Build Coastguard Worker s++;
633*c9945492SAndroid Build Coastguard Worker
634*c9945492SAndroid Build Coastguard Worker err = parse_bracket_terms(ctx, s, &ls, &neg);
635*c9945492SAndroid Build Coastguard Worker if (err != REG_OK)
636*c9945492SAndroid Build Coastguard Worker goto parse_bracket_done;
637*c9945492SAndroid Build Coastguard Worker
638*c9945492SAndroid Build Coastguard Worker if (neg.negate) {
639*c9945492SAndroid Build Coastguard Worker /*
640*c9945492SAndroid Build Coastguard Worker * With REG_NEWLINE, POSIX requires that newlines are not matched by
641*c9945492SAndroid Build Coastguard Worker * any form of a non-matching list.
642*c9945492SAndroid Build Coastguard Worker */
643*c9945492SAndroid Build Coastguard Worker if (ctx->cflags & REG_NEWLINE) {
644*c9945492SAndroid Build Coastguard Worker lit = tre_new_lit(&ls);
645*c9945492SAndroid Build Coastguard Worker if (!lit) {
646*c9945492SAndroid Build Coastguard Worker err = REG_ESPACE;
647*c9945492SAndroid Build Coastguard Worker goto parse_bracket_done;
648*c9945492SAndroid Build Coastguard Worker }
649*c9945492SAndroid Build Coastguard Worker lit->code_min = '\n';
650*c9945492SAndroid Build Coastguard Worker lit->code_max = '\n';
651*c9945492SAndroid Build Coastguard Worker lit->position = -1;
652*c9945492SAndroid Build Coastguard Worker }
653*c9945492SAndroid Build Coastguard Worker /* Sort the array if we need to negate it. */
654*c9945492SAndroid Build Coastguard Worker qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
655*c9945492SAndroid Build Coastguard Worker /* extra lit for the last negated range */
656*c9945492SAndroid Build Coastguard Worker lit = tre_new_lit(&ls);
657*c9945492SAndroid Build Coastguard Worker if (!lit) {
658*c9945492SAndroid Build Coastguard Worker err = REG_ESPACE;
659*c9945492SAndroid Build Coastguard Worker goto parse_bracket_done;
660*c9945492SAndroid Build Coastguard Worker }
661*c9945492SAndroid Build Coastguard Worker lit->code_min = TRE_CHAR_MAX+1;
662*c9945492SAndroid Build Coastguard Worker lit->code_max = TRE_CHAR_MAX+1;
663*c9945492SAndroid Build Coastguard Worker lit->position = -1;
664*c9945492SAndroid Build Coastguard Worker /* negated classes */
665*c9945492SAndroid Build Coastguard Worker if (neg.len) {
666*c9945492SAndroid Build Coastguard Worker nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
667*c9945492SAndroid Build Coastguard Worker if (!nc) {
668*c9945492SAndroid Build Coastguard Worker err = REG_ESPACE;
669*c9945492SAndroid Build Coastguard Worker goto parse_bracket_done;
670*c9945492SAndroid Build Coastguard Worker }
671*c9945492SAndroid Build Coastguard Worker memcpy(nc, neg.a, neg.len*sizeof *neg.a);
672*c9945492SAndroid Build Coastguard Worker nc[neg.len] = 0;
673*c9945492SAndroid Build Coastguard Worker }
674*c9945492SAndroid Build Coastguard Worker }
675*c9945492SAndroid Build Coastguard Worker
676*c9945492SAndroid Build Coastguard Worker /* Build a union of the items in the array, negated if necessary. */
677*c9945492SAndroid Build Coastguard Worker negmax = negmin = 0;
678*c9945492SAndroid Build Coastguard Worker for (i = 0; i < ls.len; i++) {
679*c9945492SAndroid Build Coastguard Worker lit = ls.a[i];
680*c9945492SAndroid Build Coastguard Worker min = lit->code_min;
681*c9945492SAndroid Build Coastguard Worker max = lit->code_max;
682*c9945492SAndroid Build Coastguard Worker if (neg.negate) {
683*c9945492SAndroid Build Coastguard Worker if (min <= negmin) {
684*c9945492SAndroid Build Coastguard Worker /* Overlap. */
685*c9945492SAndroid Build Coastguard Worker negmin = MAX(max + 1, negmin);
686*c9945492SAndroid Build Coastguard Worker continue;
687*c9945492SAndroid Build Coastguard Worker }
688*c9945492SAndroid Build Coastguard Worker negmax = min - 1;
689*c9945492SAndroid Build Coastguard Worker lit->code_min = negmin;
690*c9945492SAndroid Build Coastguard Worker lit->code_max = negmax;
691*c9945492SAndroid Build Coastguard Worker negmin = max + 1;
692*c9945492SAndroid Build Coastguard Worker }
693*c9945492SAndroid Build Coastguard Worker lit->position = ctx->position;
694*c9945492SAndroid Build Coastguard Worker lit->neg_classes = nc;
695*c9945492SAndroid Build Coastguard Worker n = tre_ast_new_node(ctx->mem, LITERAL, lit);
696*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_union(ctx->mem, node, n);
697*c9945492SAndroid Build Coastguard Worker if (!node) {
698*c9945492SAndroid Build Coastguard Worker err = REG_ESPACE;
699*c9945492SAndroid Build Coastguard Worker break;
700*c9945492SAndroid Build Coastguard Worker }
701*c9945492SAndroid Build Coastguard Worker }
702*c9945492SAndroid Build Coastguard Worker
703*c9945492SAndroid Build Coastguard Worker parse_bracket_done:
704*c9945492SAndroid Build Coastguard Worker xfree(ls.a);
705*c9945492SAndroid Build Coastguard Worker ctx->position++;
706*c9945492SAndroid Build Coastguard Worker ctx->n = node;
707*c9945492SAndroid Build Coastguard Worker return err;
708*c9945492SAndroid Build Coastguard Worker }
709*c9945492SAndroid Build Coastguard Worker
parse_dup_count(const char * s,int * n)710*c9945492SAndroid Build Coastguard Worker static const char *parse_dup_count(const char *s, int *n)
711*c9945492SAndroid Build Coastguard Worker {
712*c9945492SAndroid Build Coastguard Worker *n = -1;
713*c9945492SAndroid Build Coastguard Worker if (!isdigit(*s))
714*c9945492SAndroid Build Coastguard Worker return s;
715*c9945492SAndroid Build Coastguard Worker *n = 0;
716*c9945492SAndroid Build Coastguard Worker for (;;) {
717*c9945492SAndroid Build Coastguard Worker *n = 10 * *n + (*s - '0');
718*c9945492SAndroid Build Coastguard Worker s++;
719*c9945492SAndroid Build Coastguard Worker if (!isdigit(*s) || *n > RE_DUP_MAX)
720*c9945492SAndroid Build Coastguard Worker break;
721*c9945492SAndroid Build Coastguard Worker }
722*c9945492SAndroid Build Coastguard Worker return s;
723*c9945492SAndroid Build Coastguard Worker }
724*c9945492SAndroid Build Coastguard Worker
parse_dup(const char * s,int ere,int * pmin,int * pmax)725*c9945492SAndroid Build Coastguard Worker static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
726*c9945492SAndroid Build Coastguard Worker {
727*c9945492SAndroid Build Coastguard Worker int min, max;
728*c9945492SAndroid Build Coastguard Worker
729*c9945492SAndroid Build Coastguard Worker s = parse_dup_count(s, &min);
730*c9945492SAndroid Build Coastguard Worker if (*s == ',')
731*c9945492SAndroid Build Coastguard Worker s = parse_dup_count(s+1, &max);
732*c9945492SAndroid Build Coastguard Worker else
733*c9945492SAndroid Build Coastguard Worker max = min;
734*c9945492SAndroid Build Coastguard Worker
735*c9945492SAndroid Build Coastguard Worker if (
736*c9945492SAndroid Build Coastguard Worker (max < min && max >= 0) ||
737*c9945492SAndroid Build Coastguard Worker max > RE_DUP_MAX ||
738*c9945492SAndroid Build Coastguard Worker min > RE_DUP_MAX ||
739*c9945492SAndroid Build Coastguard Worker min < 0 ||
740*c9945492SAndroid Build Coastguard Worker (!ere && *s++ != '\\') ||
741*c9945492SAndroid Build Coastguard Worker *s++ != '}'
742*c9945492SAndroid Build Coastguard Worker )
743*c9945492SAndroid Build Coastguard Worker return 0;
744*c9945492SAndroid Build Coastguard Worker *pmin = min;
745*c9945492SAndroid Build Coastguard Worker *pmax = max;
746*c9945492SAndroid Build Coastguard Worker return s;
747*c9945492SAndroid Build Coastguard Worker }
748*c9945492SAndroid Build Coastguard Worker
hexval(unsigned c)749*c9945492SAndroid Build Coastguard Worker static int hexval(unsigned c)
750*c9945492SAndroid Build Coastguard Worker {
751*c9945492SAndroid Build Coastguard Worker if (c-'0'<10) return c-'0';
752*c9945492SAndroid Build Coastguard Worker c |= 32;
753*c9945492SAndroid Build Coastguard Worker if (c-'a'<6) return c-'a'+10;
754*c9945492SAndroid Build Coastguard Worker return -1;
755*c9945492SAndroid Build Coastguard Worker }
756*c9945492SAndroid Build Coastguard Worker
marksub(tre_parse_ctx_t * ctx,tre_ast_node_t * node,int subid)757*c9945492SAndroid Build Coastguard Worker static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
758*c9945492SAndroid Build Coastguard Worker {
759*c9945492SAndroid Build Coastguard Worker if (node->submatch_id >= 0) {
760*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
761*c9945492SAndroid Build Coastguard Worker if (!n)
762*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
763*c9945492SAndroid Build Coastguard Worker n = tre_ast_new_catenation(ctx->mem, n, node);
764*c9945492SAndroid Build Coastguard Worker if (!n)
765*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
766*c9945492SAndroid Build Coastguard Worker n->num_submatches = node->num_submatches;
767*c9945492SAndroid Build Coastguard Worker node = n;
768*c9945492SAndroid Build Coastguard Worker }
769*c9945492SAndroid Build Coastguard Worker node->submatch_id = subid;
770*c9945492SAndroid Build Coastguard Worker node->num_submatches++;
771*c9945492SAndroid Build Coastguard Worker ctx->n = node;
772*c9945492SAndroid Build Coastguard Worker return REG_OK;
773*c9945492SAndroid Build Coastguard Worker }
774*c9945492SAndroid Build Coastguard Worker
775*c9945492SAndroid Build Coastguard Worker /*
776*c9945492SAndroid Build Coastguard Worker BRE grammar:
777*c9945492SAndroid Build Coastguard Worker Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
778*c9945492SAndroid Build Coastguard Worker Branch = Atom | Branch Atom
779*c9945492SAndroid Build Coastguard Worker Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
780*c9945492SAndroid Build Coastguard Worker Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
781*c9945492SAndroid Build Coastguard Worker
782*c9945492SAndroid Build Coastguard Worker (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
783*c9945492SAndroid Build Coastguard Worker
784*c9945492SAndroid Build Coastguard Worker ERE grammar:
785*c9945492SAndroid Build Coastguard Worker Regex = Branch | Regex '|' Branch
786*c9945492SAndroid Build Coastguard Worker Branch = Atom | Branch Atom
787*c9945492SAndroid Build Coastguard Worker Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
788*c9945492SAndroid Build Coastguard Worker Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
789*c9945492SAndroid Build Coastguard Worker
790*c9945492SAndroid Build Coastguard Worker (a*+?, ^*, $+, \X, {, (|a) are unspecified)
791*c9945492SAndroid Build Coastguard Worker */
792*c9945492SAndroid Build Coastguard Worker
parse_atom(tre_parse_ctx_t * ctx,const char * s)793*c9945492SAndroid Build Coastguard Worker static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
794*c9945492SAndroid Build Coastguard Worker {
795*c9945492SAndroid Build Coastguard Worker int len, ere = ctx->cflags & REG_EXTENDED;
796*c9945492SAndroid Build Coastguard Worker const char *p;
797*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
798*c9945492SAndroid Build Coastguard Worker wchar_t wc;
799*c9945492SAndroid Build Coastguard Worker switch (*s) {
800*c9945492SAndroid Build Coastguard Worker case '[':
801*c9945492SAndroid Build Coastguard Worker return parse_bracket(ctx, s+1);
802*c9945492SAndroid Build Coastguard Worker case '\\':
803*c9945492SAndroid Build Coastguard Worker p = tre_expand_macro(s+1);
804*c9945492SAndroid Build Coastguard Worker if (p) {
805*c9945492SAndroid Build Coastguard Worker /* assume \X expansion is a single atom */
806*c9945492SAndroid Build Coastguard Worker reg_errcode_t err = parse_atom(ctx, p);
807*c9945492SAndroid Build Coastguard Worker ctx->s = s+2;
808*c9945492SAndroid Build Coastguard Worker return err;
809*c9945492SAndroid Build Coastguard Worker }
810*c9945492SAndroid Build Coastguard Worker /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
811*c9945492SAndroid Build Coastguard Worker switch (*++s) {
812*c9945492SAndroid Build Coastguard Worker case 0:
813*c9945492SAndroid Build Coastguard Worker return REG_EESCAPE;
814*c9945492SAndroid Build Coastguard Worker case 'b':
815*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
816*c9945492SAndroid Build Coastguard Worker break;
817*c9945492SAndroid Build Coastguard Worker case 'B':
818*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
819*c9945492SAndroid Build Coastguard Worker break;
820*c9945492SAndroid Build Coastguard Worker case '<':
821*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
822*c9945492SAndroid Build Coastguard Worker break;
823*c9945492SAndroid Build Coastguard Worker case '>':
824*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
825*c9945492SAndroid Build Coastguard Worker break;
826*c9945492SAndroid Build Coastguard Worker case 'x':
827*c9945492SAndroid Build Coastguard Worker s++;
828*c9945492SAndroid Build Coastguard Worker int i, v = 0, c;
829*c9945492SAndroid Build Coastguard Worker len = 2;
830*c9945492SAndroid Build Coastguard Worker if (*s == '{') {
831*c9945492SAndroid Build Coastguard Worker len = 8;
832*c9945492SAndroid Build Coastguard Worker s++;
833*c9945492SAndroid Build Coastguard Worker }
834*c9945492SAndroid Build Coastguard Worker for (i=0; i<len && v<0x110000; i++) {
835*c9945492SAndroid Build Coastguard Worker c = hexval(s[i]);
836*c9945492SAndroid Build Coastguard Worker if (c < 0) break;
837*c9945492SAndroid Build Coastguard Worker v = 16*v + c;
838*c9945492SAndroid Build Coastguard Worker }
839*c9945492SAndroid Build Coastguard Worker s += i;
840*c9945492SAndroid Build Coastguard Worker if (len == 8) {
841*c9945492SAndroid Build Coastguard Worker if (*s != '}')
842*c9945492SAndroid Build Coastguard Worker return REG_EBRACE;
843*c9945492SAndroid Build Coastguard Worker s++;
844*c9945492SAndroid Build Coastguard Worker }
845*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
846*c9945492SAndroid Build Coastguard Worker s--;
847*c9945492SAndroid Build Coastguard Worker break;
848*c9945492SAndroid Build Coastguard Worker case '{':
849*c9945492SAndroid Build Coastguard Worker case '+':
850*c9945492SAndroid Build Coastguard Worker case '?':
851*c9945492SAndroid Build Coastguard Worker /* extension: treat \+, \? as repetitions in BRE */
852*c9945492SAndroid Build Coastguard Worker /* reject repetitions after empty expression in BRE */
853*c9945492SAndroid Build Coastguard Worker if (!ere)
854*c9945492SAndroid Build Coastguard Worker return REG_BADRPT;
855*c9945492SAndroid Build Coastguard Worker case '|':
856*c9945492SAndroid Build Coastguard Worker /* extension: treat \| as alternation in BRE */
857*c9945492SAndroid Build Coastguard Worker if (!ere) {
858*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
859*c9945492SAndroid Build Coastguard Worker s--;
860*c9945492SAndroid Build Coastguard Worker goto end;
861*c9945492SAndroid Build Coastguard Worker }
862*c9945492SAndroid Build Coastguard Worker /* fallthrough */
863*c9945492SAndroid Build Coastguard Worker default:
864*c9945492SAndroid Build Coastguard Worker if (!ere && (unsigned)*s-'1' < 9) {
865*c9945492SAndroid Build Coastguard Worker /* back reference */
866*c9945492SAndroid Build Coastguard Worker int val = *s - '0';
867*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
868*c9945492SAndroid Build Coastguard Worker ctx->max_backref = MAX(val, ctx->max_backref);
869*c9945492SAndroid Build Coastguard Worker } else {
870*c9945492SAndroid Build Coastguard Worker /* extension: accept unknown escaped char
871*c9945492SAndroid Build Coastguard Worker as a literal */
872*c9945492SAndroid Build Coastguard Worker goto parse_literal;
873*c9945492SAndroid Build Coastguard Worker }
874*c9945492SAndroid Build Coastguard Worker }
875*c9945492SAndroid Build Coastguard Worker s++;
876*c9945492SAndroid Build Coastguard Worker break;
877*c9945492SAndroid Build Coastguard Worker case '.':
878*c9945492SAndroid Build Coastguard Worker if (ctx->cflags & REG_NEWLINE) {
879*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *tmp1, *tmp2;
880*c9945492SAndroid Build Coastguard Worker tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
881*c9945492SAndroid Build Coastguard Worker tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
882*c9945492SAndroid Build Coastguard Worker if (tmp1 && tmp2)
883*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
884*c9945492SAndroid Build Coastguard Worker else
885*c9945492SAndroid Build Coastguard Worker node = 0;
886*c9945492SAndroid Build Coastguard Worker } else {
887*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
888*c9945492SAndroid Build Coastguard Worker }
889*c9945492SAndroid Build Coastguard Worker s++;
890*c9945492SAndroid Build Coastguard Worker break;
891*c9945492SAndroid Build Coastguard Worker case '^':
892*c9945492SAndroid Build Coastguard Worker /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
893*c9945492SAndroid Build Coastguard Worker if (!ere && s != ctx->start)
894*c9945492SAndroid Build Coastguard Worker goto parse_literal;
895*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
896*c9945492SAndroid Build Coastguard Worker s++;
897*c9945492SAndroid Build Coastguard Worker break;
898*c9945492SAndroid Build Coastguard Worker case '$':
899*c9945492SAndroid Build Coastguard Worker /* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */
900*c9945492SAndroid Build Coastguard Worker if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|')))
901*c9945492SAndroid Build Coastguard Worker goto parse_literal;
902*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
903*c9945492SAndroid Build Coastguard Worker s++;
904*c9945492SAndroid Build Coastguard Worker break;
905*c9945492SAndroid Build Coastguard Worker case '*':
906*c9945492SAndroid Build Coastguard Worker case '{':
907*c9945492SAndroid Build Coastguard Worker case '+':
908*c9945492SAndroid Build Coastguard Worker case '?':
909*c9945492SAndroid Build Coastguard Worker /* reject repetitions after empty expression in ERE */
910*c9945492SAndroid Build Coastguard Worker if (ere)
911*c9945492SAndroid Build Coastguard Worker return REG_BADRPT;
912*c9945492SAndroid Build Coastguard Worker case '|':
913*c9945492SAndroid Build Coastguard Worker if (!ere)
914*c9945492SAndroid Build Coastguard Worker goto parse_literal;
915*c9945492SAndroid Build Coastguard Worker case 0:
916*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
917*c9945492SAndroid Build Coastguard Worker break;
918*c9945492SAndroid Build Coastguard Worker default:
919*c9945492SAndroid Build Coastguard Worker parse_literal:
920*c9945492SAndroid Build Coastguard Worker len = mbtowc(&wc, s, -1);
921*c9945492SAndroid Build Coastguard Worker if (len < 0)
922*c9945492SAndroid Build Coastguard Worker return REG_BADPAT;
923*c9945492SAndroid Build Coastguard Worker if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
924*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *tmp1, *tmp2;
925*c9945492SAndroid Build Coastguard Worker /* multiple opposite case characters are not supported */
926*c9945492SAndroid Build Coastguard Worker tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
927*c9945492SAndroid Build Coastguard Worker tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
928*c9945492SAndroid Build Coastguard Worker if (tmp1 && tmp2)
929*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
930*c9945492SAndroid Build Coastguard Worker else
931*c9945492SAndroid Build Coastguard Worker node = 0;
932*c9945492SAndroid Build Coastguard Worker } else {
933*c9945492SAndroid Build Coastguard Worker node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
934*c9945492SAndroid Build Coastguard Worker }
935*c9945492SAndroid Build Coastguard Worker ctx->position++;
936*c9945492SAndroid Build Coastguard Worker s += len;
937*c9945492SAndroid Build Coastguard Worker break;
938*c9945492SAndroid Build Coastguard Worker }
939*c9945492SAndroid Build Coastguard Worker end:
940*c9945492SAndroid Build Coastguard Worker if (!node)
941*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
942*c9945492SAndroid Build Coastguard Worker ctx->n = node;
943*c9945492SAndroid Build Coastguard Worker ctx->s = s;
944*c9945492SAndroid Build Coastguard Worker return REG_OK;
945*c9945492SAndroid Build Coastguard Worker }
946*c9945492SAndroid Build Coastguard Worker
947*c9945492SAndroid Build Coastguard Worker #define PUSHPTR(err, s, v) do { \
948*c9945492SAndroid Build Coastguard Worker if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
949*c9945492SAndroid Build Coastguard Worker return err; \
950*c9945492SAndroid Build Coastguard Worker } while(0)
951*c9945492SAndroid Build Coastguard Worker
952*c9945492SAndroid Build Coastguard Worker #define PUSHINT(err, s, v) do { \
953*c9945492SAndroid Build Coastguard Worker if ((err = tre_stack_push_int(s, v)) != REG_OK) \
954*c9945492SAndroid Build Coastguard Worker return err; \
955*c9945492SAndroid Build Coastguard Worker } while(0)
956*c9945492SAndroid Build Coastguard Worker
tre_parse(tre_parse_ctx_t * ctx)957*c9945492SAndroid Build Coastguard Worker static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
958*c9945492SAndroid Build Coastguard Worker {
959*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *nbranch=0, *nunion=0;
960*c9945492SAndroid Build Coastguard Worker int ere = ctx->cflags & REG_EXTENDED;
961*c9945492SAndroid Build Coastguard Worker const char *s = ctx->start;
962*c9945492SAndroid Build Coastguard Worker int subid = 0;
963*c9945492SAndroid Build Coastguard Worker int depth = 0;
964*c9945492SAndroid Build Coastguard Worker reg_errcode_t err;
965*c9945492SAndroid Build Coastguard Worker tre_stack_t *stack = ctx->stack;
966*c9945492SAndroid Build Coastguard Worker
967*c9945492SAndroid Build Coastguard Worker PUSHINT(err, stack, subid++);
968*c9945492SAndroid Build Coastguard Worker for (;;) {
969*c9945492SAndroid Build Coastguard Worker if ((!ere && *s == '\\' && s[1] == '(') ||
970*c9945492SAndroid Build Coastguard Worker (ere && *s == '(')) {
971*c9945492SAndroid Build Coastguard Worker PUSHPTR(err, stack, nunion);
972*c9945492SAndroid Build Coastguard Worker PUSHPTR(err, stack, nbranch);
973*c9945492SAndroid Build Coastguard Worker PUSHINT(err, stack, subid++);
974*c9945492SAndroid Build Coastguard Worker s++;
975*c9945492SAndroid Build Coastguard Worker if (!ere)
976*c9945492SAndroid Build Coastguard Worker s++;
977*c9945492SAndroid Build Coastguard Worker depth++;
978*c9945492SAndroid Build Coastguard Worker nbranch = nunion = 0;
979*c9945492SAndroid Build Coastguard Worker ctx->start = s;
980*c9945492SAndroid Build Coastguard Worker continue;
981*c9945492SAndroid Build Coastguard Worker }
982*c9945492SAndroid Build Coastguard Worker if ((!ere && *s == '\\' && s[1] == ')') ||
983*c9945492SAndroid Build Coastguard Worker (ere && *s == ')' && depth)) {
984*c9945492SAndroid Build Coastguard Worker ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
985*c9945492SAndroid Build Coastguard Worker if (!ctx->n)
986*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
987*c9945492SAndroid Build Coastguard Worker } else {
988*c9945492SAndroid Build Coastguard Worker err = parse_atom(ctx, s);
989*c9945492SAndroid Build Coastguard Worker if (err != REG_OK)
990*c9945492SAndroid Build Coastguard Worker return err;
991*c9945492SAndroid Build Coastguard Worker s = ctx->s;
992*c9945492SAndroid Build Coastguard Worker }
993*c9945492SAndroid Build Coastguard Worker
994*c9945492SAndroid Build Coastguard Worker parse_iter:
995*c9945492SAndroid Build Coastguard Worker for (;;) {
996*c9945492SAndroid Build Coastguard Worker int min, max;
997*c9945492SAndroid Build Coastguard Worker
998*c9945492SAndroid Build Coastguard Worker if (*s!='\\' && *s!='*') {
999*c9945492SAndroid Build Coastguard Worker if (!ere)
1000*c9945492SAndroid Build Coastguard Worker break;
1001*c9945492SAndroid Build Coastguard Worker if (*s!='+' && *s!='?' && *s!='{')
1002*c9945492SAndroid Build Coastguard Worker break;
1003*c9945492SAndroid Build Coastguard Worker }
1004*c9945492SAndroid Build Coastguard Worker if (*s=='\\' && ere)
1005*c9945492SAndroid Build Coastguard Worker break;
1006*c9945492SAndroid Build Coastguard Worker /* extension: treat \+, \? as repetitions in BRE */
1007*c9945492SAndroid Build Coastguard Worker if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
1008*c9945492SAndroid Build Coastguard Worker break;
1009*c9945492SAndroid Build Coastguard Worker if (*s=='\\')
1010*c9945492SAndroid Build Coastguard Worker s++;
1011*c9945492SAndroid Build Coastguard Worker
1012*c9945492SAndroid Build Coastguard Worker /* handle ^* at the start of a BRE. */
1013*c9945492SAndroid Build Coastguard Worker if (!ere && s==ctx->start+1 && s[-1]=='^')
1014*c9945492SAndroid Build Coastguard Worker break;
1015*c9945492SAndroid Build Coastguard Worker
1016*c9945492SAndroid Build Coastguard Worker /* extension: multiple consecutive *+?{,} is unspecified,
1017*c9945492SAndroid Build Coastguard Worker but (a+)+ has to be supported so accepting a++ makes
1018*c9945492SAndroid Build Coastguard Worker sense, note however that the RE_DUP_MAX limit can be
1019*c9945492SAndroid Build Coastguard Worker circumvented: (a{255}){255} uses a lot of memory.. */
1020*c9945492SAndroid Build Coastguard Worker if (*s=='{') {
1021*c9945492SAndroid Build Coastguard Worker s = parse_dup(s+1, ere, &min, &max);
1022*c9945492SAndroid Build Coastguard Worker if (!s)
1023*c9945492SAndroid Build Coastguard Worker return REG_BADBR;
1024*c9945492SAndroid Build Coastguard Worker } else {
1025*c9945492SAndroid Build Coastguard Worker min=0;
1026*c9945492SAndroid Build Coastguard Worker max=-1;
1027*c9945492SAndroid Build Coastguard Worker if (*s == '+')
1028*c9945492SAndroid Build Coastguard Worker min = 1;
1029*c9945492SAndroid Build Coastguard Worker if (*s == '?')
1030*c9945492SAndroid Build Coastguard Worker max = 1;
1031*c9945492SAndroid Build Coastguard Worker s++;
1032*c9945492SAndroid Build Coastguard Worker }
1033*c9945492SAndroid Build Coastguard Worker if (max == 0)
1034*c9945492SAndroid Build Coastguard Worker ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1035*c9945492SAndroid Build Coastguard Worker else
1036*c9945492SAndroid Build Coastguard Worker ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1037*c9945492SAndroid Build Coastguard Worker if (!ctx->n)
1038*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1039*c9945492SAndroid Build Coastguard Worker }
1040*c9945492SAndroid Build Coastguard Worker
1041*c9945492SAndroid Build Coastguard Worker nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1042*c9945492SAndroid Build Coastguard Worker if ((ere && *s == '|') ||
1043*c9945492SAndroid Build Coastguard Worker (ere && *s == ')' && depth) ||
1044*c9945492SAndroid Build Coastguard Worker (!ere && *s == '\\' && s[1] == ')') ||
1045*c9945492SAndroid Build Coastguard Worker /* extension: treat \| as alternation in BRE */
1046*c9945492SAndroid Build Coastguard Worker (!ere && *s == '\\' && s[1] == '|') ||
1047*c9945492SAndroid Build Coastguard Worker !*s) {
1048*c9945492SAndroid Build Coastguard Worker /* extension: empty branch is unspecified (), (|a), (a|)
1049*c9945492SAndroid Build Coastguard Worker here they are not rejected but match on empty string */
1050*c9945492SAndroid Build Coastguard Worker int c = *s;
1051*c9945492SAndroid Build Coastguard Worker nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1052*c9945492SAndroid Build Coastguard Worker nbranch = 0;
1053*c9945492SAndroid Build Coastguard Worker
1054*c9945492SAndroid Build Coastguard Worker if (c == '\\' && s[1] == '|') {
1055*c9945492SAndroid Build Coastguard Worker s+=2;
1056*c9945492SAndroid Build Coastguard Worker ctx->start = s;
1057*c9945492SAndroid Build Coastguard Worker } else if (c == '|') {
1058*c9945492SAndroid Build Coastguard Worker s++;
1059*c9945492SAndroid Build Coastguard Worker ctx->start = s;
1060*c9945492SAndroid Build Coastguard Worker } else {
1061*c9945492SAndroid Build Coastguard Worker if (c == '\\') {
1062*c9945492SAndroid Build Coastguard Worker if (!depth) return REG_EPAREN;
1063*c9945492SAndroid Build Coastguard Worker s+=2;
1064*c9945492SAndroid Build Coastguard Worker } else if (c == ')')
1065*c9945492SAndroid Build Coastguard Worker s++;
1066*c9945492SAndroid Build Coastguard Worker depth--;
1067*c9945492SAndroid Build Coastguard Worker err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1068*c9945492SAndroid Build Coastguard Worker if (err != REG_OK)
1069*c9945492SAndroid Build Coastguard Worker return err;
1070*c9945492SAndroid Build Coastguard Worker if (!c && depth<0) {
1071*c9945492SAndroid Build Coastguard Worker ctx->submatch_id = subid;
1072*c9945492SAndroid Build Coastguard Worker return REG_OK;
1073*c9945492SAndroid Build Coastguard Worker }
1074*c9945492SAndroid Build Coastguard Worker if (!c || depth<0)
1075*c9945492SAndroid Build Coastguard Worker return REG_EPAREN;
1076*c9945492SAndroid Build Coastguard Worker nbranch = tre_stack_pop_voidptr(stack);
1077*c9945492SAndroid Build Coastguard Worker nunion = tre_stack_pop_voidptr(stack);
1078*c9945492SAndroid Build Coastguard Worker goto parse_iter;
1079*c9945492SAndroid Build Coastguard Worker }
1080*c9945492SAndroid Build Coastguard Worker }
1081*c9945492SAndroid Build Coastguard Worker }
1082*c9945492SAndroid Build Coastguard Worker }
1083*c9945492SAndroid Build Coastguard Worker
1084*c9945492SAndroid Build Coastguard Worker
1085*c9945492SAndroid Build Coastguard Worker /***********************************************************************
1086*c9945492SAndroid Build Coastguard Worker from tre-compile.c
1087*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
1088*c9945492SAndroid Build Coastguard Worker
1089*c9945492SAndroid Build Coastguard Worker
1090*c9945492SAndroid Build Coastguard Worker /*
1091*c9945492SAndroid Build Coastguard Worker TODO:
1092*c9945492SAndroid Build Coastguard Worker - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1093*c9945492SAndroid Build Coastguard Worker function calls.
1094*c9945492SAndroid Build Coastguard Worker */
1095*c9945492SAndroid Build Coastguard Worker
1096*c9945492SAndroid Build Coastguard Worker /*
1097*c9945492SAndroid Build Coastguard Worker Algorithms to setup tags so that submatch addressing can be done.
1098*c9945492SAndroid Build Coastguard Worker */
1099*c9945492SAndroid Build Coastguard Worker
1100*c9945492SAndroid Build Coastguard Worker
1101*c9945492SAndroid Build Coastguard Worker /* Inserts a catenation node to the root of the tree given in `node'.
1102*c9945492SAndroid Build Coastguard Worker As the left child a new tag with number `tag_id' to `node' is added,
1103*c9945492SAndroid Build Coastguard Worker and the right child is the old root. */
1104*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_add_tag_left(tre_mem_t mem,tre_ast_node_t * node,int tag_id)1105*c9945492SAndroid Build Coastguard Worker tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1106*c9945492SAndroid Build Coastguard Worker {
1107*c9945492SAndroid Build Coastguard Worker tre_catenation_t *c;
1108*c9945492SAndroid Build Coastguard Worker
1109*c9945492SAndroid Build Coastguard Worker c = tre_mem_alloc(mem, sizeof(*c));
1110*c9945492SAndroid Build Coastguard Worker if (c == NULL)
1111*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1112*c9945492SAndroid Build Coastguard Worker c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1113*c9945492SAndroid Build Coastguard Worker if (c->left == NULL)
1114*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1115*c9945492SAndroid Build Coastguard Worker c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1116*c9945492SAndroid Build Coastguard Worker if (c->right == NULL)
1117*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1118*c9945492SAndroid Build Coastguard Worker
1119*c9945492SAndroid Build Coastguard Worker c->right->obj = node->obj;
1120*c9945492SAndroid Build Coastguard Worker c->right->type = node->type;
1121*c9945492SAndroid Build Coastguard Worker c->right->nullable = -1;
1122*c9945492SAndroid Build Coastguard Worker c->right->submatch_id = -1;
1123*c9945492SAndroid Build Coastguard Worker c->right->firstpos = NULL;
1124*c9945492SAndroid Build Coastguard Worker c->right->lastpos = NULL;
1125*c9945492SAndroid Build Coastguard Worker c->right->num_tags = 0;
1126*c9945492SAndroid Build Coastguard Worker c->right->num_submatches = 0;
1127*c9945492SAndroid Build Coastguard Worker node->obj = c;
1128*c9945492SAndroid Build Coastguard Worker node->type = CATENATION;
1129*c9945492SAndroid Build Coastguard Worker return REG_OK;
1130*c9945492SAndroid Build Coastguard Worker }
1131*c9945492SAndroid Build Coastguard Worker
1132*c9945492SAndroid Build Coastguard Worker /* Inserts a catenation node to the root of the tree given in `node'.
1133*c9945492SAndroid Build Coastguard Worker As the right child a new tag with number `tag_id' to `node' is added,
1134*c9945492SAndroid Build Coastguard Worker and the left child is the old root. */
1135*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_add_tag_right(tre_mem_t mem,tre_ast_node_t * node,int tag_id)1136*c9945492SAndroid Build Coastguard Worker tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1137*c9945492SAndroid Build Coastguard Worker {
1138*c9945492SAndroid Build Coastguard Worker tre_catenation_t *c;
1139*c9945492SAndroid Build Coastguard Worker
1140*c9945492SAndroid Build Coastguard Worker c = tre_mem_alloc(mem, sizeof(*c));
1141*c9945492SAndroid Build Coastguard Worker if (c == NULL)
1142*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1143*c9945492SAndroid Build Coastguard Worker c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1144*c9945492SAndroid Build Coastguard Worker if (c->right == NULL)
1145*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1146*c9945492SAndroid Build Coastguard Worker c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1147*c9945492SAndroid Build Coastguard Worker if (c->left == NULL)
1148*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1149*c9945492SAndroid Build Coastguard Worker
1150*c9945492SAndroid Build Coastguard Worker c->left->obj = node->obj;
1151*c9945492SAndroid Build Coastguard Worker c->left->type = node->type;
1152*c9945492SAndroid Build Coastguard Worker c->left->nullable = -1;
1153*c9945492SAndroid Build Coastguard Worker c->left->submatch_id = -1;
1154*c9945492SAndroid Build Coastguard Worker c->left->firstpos = NULL;
1155*c9945492SAndroid Build Coastguard Worker c->left->lastpos = NULL;
1156*c9945492SAndroid Build Coastguard Worker c->left->num_tags = 0;
1157*c9945492SAndroid Build Coastguard Worker c->left->num_submatches = 0;
1158*c9945492SAndroid Build Coastguard Worker node->obj = c;
1159*c9945492SAndroid Build Coastguard Worker node->type = CATENATION;
1160*c9945492SAndroid Build Coastguard Worker return REG_OK;
1161*c9945492SAndroid Build Coastguard Worker }
1162*c9945492SAndroid Build Coastguard Worker
1163*c9945492SAndroid Build Coastguard Worker typedef enum {
1164*c9945492SAndroid Build Coastguard Worker ADDTAGS_RECURSE,
1165*c9945492SAndroid Build Coastguard Worker ADDTAGS_AFTER_ITERATION,
1166*c9945492SAndroid Build Coastguard Worker ADDTAGS_AFTER_UNION_LEFT,
1167*c9945492SAndroid Build Coastguard Worker ADDTAGS_AFTER_UNION_RIGHT,
1168*c9945492SAndroid Build Coastguard Worker ADDTAGS_AFTER_CAT_LEFT,
1169*c9945492SAndroid Build Coastguard Worker ADDTAGS_AFTER_CAT_RIGHT,
1170*c9945492SAndroid Build Coastguard Worker ADDTAGS_SET_SUBMATCH_END
1171*c9945492SAndroid Build Coastguard Worker } tre_addtags_symbol_t;
1172*c9945492SAndroid Build Coastguard Worker
1173*c9945492SAndroid Build Coastguard Worker
1174*c9945492SAndroid Build Coastguard Worker typedef struct {
1175*c9945492SAndroid Build Coastguard Worker int tag;
1176*c9945492SAndroid Build Coastguard Worker int next_tag;
1177*c9945492SAndroid Build Coastguard Worker } tre_tag_states_t;
1178*c9945492SAndroid Build Coastguard Worker
1179*c9945492SAndroid Build Coastguard Worker
1180*c9945492SAndroid Build Coastguard Worker /* Go through `regset' and set submatch data for submatches that are
1181*c9945492SAndroid Build Coastguard Worker using this tag. */
1182*c9945492SAndroid Build Coastguard Worker static void
tre_purge_regset(int * regset,tre_tnfa_t * tnfa,int tag)1183*c9945492SAndroid Build Coastguard Worker tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1184*c9945492SAndroid Build Coastguard Worker {
1185*c9945492SAndroid Build Coastguard Worker int i;
1186*c9945492SAndroid Build Coastguard Worker
1187*c9945492SAndroid Build Coastguard Worker for (i = 0; regset[i] >= 0; i++)
1188*c9945492SAndroid Build Coastguard Worker {
1189*c9945492SAndroid Build Coastguard Worker int id = regset[i] / 2;
1190*c9945492SAndroid Build Coastguard Worker int start = !(regset[i] % 2);
1191*c9945492SAndroid Build Coastguard Worker if (start)
1192*c9945492SAndroid Build Coastguard Worker tnfa->submatch_data[id].so_tag = tag;
1193*c9945492SAndroid Build Coastguard Worker else
1194*c9945492SAndroid Build Coastguard Worker tnfa->submatch_data[id].eo_tag = tag;
1195*c9945492SAndroid Build Coastguard Worker }
1196*c9945492SAndroid Build Coastguard Worker regset[0] = -1;
1197*c9945492SAndroid Build Coastguard Worker }
1198*c9945492SAndroid Build Coastguard Worker
1199*c9945492SAndroid Build Coastguard Worker
1200*c9945492SAndroid Build Coastguard Worker /* Adds tags to appropriate locations in the parse tree in `tree', so that
1201*c9945492SAndroid Build Coastguard Worker subexpressions marked for submatch addressing can be traced. */
1202*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_add_tags(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree,tre_tnfa_t * tnfa)1203*c9945492SAndroid Build Coastguard Worker tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1204*c9945492SAndroid Build Coastguard Worker tre_tnfa_t *tnfa)
1205*c9945492SAndroid Build Coastguard Worker {
1206*c9945492SAndroid Build Coastguard Worker reg_errcode_t status = REG_OK;
1207*c9945492SAndroid Build Coastguard Worker tre_addtags_symbol_t symbol;
1208*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1209*c9945492SAndroid Build Coastguard Worker int bottom = tre_stack_num_objects(stack);
1210*c9945492SAndroid Build Coastguard Worker /* True for first pass (counting number of needed tags) */
1211*c9945492SAndroid Build Coastguard Worker int first_pass = (mem == NULL || tnfa == NULL);
1212*c9945492SAndroid Build Coastguard Worker int *regset, *orig_regset;
1213*c9945492SAndroid Build Coastguard Worker int num_tags = 0; /* Total number of tags. */
1214*c9945492SAndroid Build Coastguard Worker int num_minimals = 0; /* Number of special minimal tags. */
1215*c9945492SAndroid Build Coastguard Worker int tag = 0; /* The tag that is to be added next. */
1216*c9945492SAndroid Build Coastguard Worker int next_tag = 1; /* Next tag to use after this one. */
1217*c9945492SAndroid Build Coastguard Worker int *parents; /* Stack of submatches the current submatch is
1218*c9945492SAndroid Build Coastguard Worker contained in. */
1219*c9945492SAndroid Build Coastguard Worker int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1220*c9945492SAndroid Build Coastguard Worker tre_tag_states_t *saved_states;
1221*c9945492SAndroid Build Coastguard Worker
1222*c9945492SAndroid Build Coastguard Worker tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1223*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1224*c9945492SAndroid Build Coastguard Worker {
1225*c9945492SAndroid Build Coastguard Worker tnfa->end_tag = 0;
1226*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[0] = -1;
1227*c9945492SAndroid Build Coastguard Worker }
1228*c9945492SAndroid Build Coastguard Worker
1229*c9945492SAndroid Build Coastguard Worker regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1230*c9945492SAndroid Build Coastguard Worker if (regset == NULL)
1231*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1232*c9945492SAndroid Build Coastguard Worker regset[0] = -1;
1233*c9945492SAndroid Build Coastguard Worker orig_regset = regset;
1234*c9945492SAndroid Build Coastguard Worker
1235*c9945492SAndroid Build Coastguard Worker parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1236*c9945492SAndroid Build Coastguard Worker if (parents == NULL)
1237*c9945492SAndroid Build Coastguard Worker {
1238*c9945492SAndroid Build Coastguard Worker xfree(regset);
1239*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1240*c9945492SAndroid Build Coastguard Worker }
1241*c9945492SAndroid Build Coastguard Worker parents[0] = -1;
1242*c9945492SAndroid Build Coastguard Worker
1243*c9945492SAndroid Build Coastguard Worker saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1244*c9945492SAndroid Build Coastguard Worker if (saved_states == NULL)
1245*c9945492SAndroid Build Coastguard Worker {
1246*c9945492SAndroid Build Coastguard Worker xfree(regset);
1247*c9945492SAndroid Build Coastguard Worker xfree(parents);
1248*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1249*c9945492SAndroid Build Coastguard Worker }
1250*c9945492SAndroid Build Coastguard Worker else
1251*c9945492SAndroid Build Coastguard Worker {
1252*c9945492SAndroid Build Coastguard Worker unsigned int i;
1253*c9945492SAndroid Build Coastguard Worker for (i = 0; i <= tnfa->num_submatches; i++)
1254*c9945492SAndroid Build Coastguard Worker saved_states[i].tag = -1;
1255*c9945492SAndroid Build Coastguard Worker }
1256*c9945492SAndroid Build Coastguard Worker
1257*c9945492SAndroid Build Coastguard Worker STACK_PUSH(stack, voidptr, node);
1258*c9945492SAndroid Build Coastguard Worker STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1259*c9945492SAndroid Build Coastguard Worker
1260*c9945492SAndroid Build Coastguard Worker while (tre_stack_num_objects(stack) > bottom)
1261*c9945492SAndroid Build Coastguard Worker {
1262*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1263*c9945492SAndroid Build Coastguard Worker break;
1264*c9945492SAndroid Build Coastguard Worker
1265*c9945492SAndroid Build Coastguard Worker symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1266*c9945492SAndroid Build Coastguard Worker switch (symbol)
1267*c9945492SAndroid Build Coastguard Worker {
1268*c9945492SAndroid Build Coastguard Worker
1269*c9945492SAndroid Build Coastguard Worker case ADDTAGS_SET_SUBMATCH_END:
1270*c9945492SAndroid Build Coastguard Worker {
1271*c9945492SAndroid Build Coastguard Worker int id = tre_stack_pop_int(stack);
1272*c9945492SAndroid Build Coastguard Worker int i;
1273*c9945492SAndroid Build Coastguard Worker
1274*c9945492SAndroid Build Coastguard Worker /* Add end of this submatch to regset. */
1275*c9945492SAndroid Build Coastguard Worker for (i = 0; regset[i] >= 0; i++);
1276*c9945492SAndroid Build Coastguard Worker regset[i] = id * 2 + 1;
1277*c9945492SAndroid Build Coastguard Worker regset[i + 1] = -1;
1278*c9945492SAndroid Build Coastguard Worker
1279*c9945492SAndroid Build Coastguard Worker /* Pop this submatch from the parents stack. */
1280*c9945492SAndroid Build Coastguard Worker for (i = 0; parents[i] >= 0; i++);
1281*c9945492SAndroid Build Coastguard Worker parents[i - 1] = -1;
1282*c9945492SAndroid Build Coastguard Worker break;
1283*c9945492SAndroid Build Coastguard Worker }
1284*c9945492SAndroid Build Coastguard Worker
1285*c9945492SAndroid Build Coastguard Worker case ADDTAGS_RECURSE:
1286*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1287*c9945492SAndroid Build Coastguard Worker
1288*c9945492SAndroid Build Coastguard Worker if (node->submatch_id >= 0)
1289*c9945492SAndroid Build Coastguard Worker {
1290*c9945492SAndroid Build Coastguard Worker int id = node->submatch_id;
1291*c9945492SAndroid Build Coastguard Worker int i;
1292*c9945492SAndroid Build Coastguard Worker
1293*c9945492SAndroid Build Coastguard Worker
1294*c9945492SAndroid Build Coastguard Worker /* Add start of this submatch to regset. */
1295*c9945492SAndroid Build Coastguard Worker for (i = 0; regset[i] >= 0; i++);
1296*c9945492SAndroid Build Coastguard Worker regset[i] = id * 2;
1297*c9945492SAndroid Build Coastguard Worker regset[i + 1] = -1;
1298*c9945492SAndroid Build Coastguard Worker
1299*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1300*c9945492SAndroid Build Coastguard Worker {
1301*c9945492SAndroid Build Coastguard Worker for (i = 0; parents[i] >= 0; i++);
1302*c9945492SAndroid Build Coastguard Worker tnfa->submatch_data[id].parents = NULL;
1303*c9945492SAndroid Build Coastguard Worker if (i > 0)
1304*c9945492SAndroid Build Coastguard Worker {
1305*c9945492SAndroid Build Coastguard Worker int *p = xmalloc(sizeof(*p) * (i + 1));
1306*c9945492SAndroid Build Coastguard Worker if (p == NULL)
1307*c9945492SAndroid Build Coastguard Worker {
1308*c9945492SAndroid Build Coastguard Worker status = REG_ESPACE;
1309*c9945492SAndroid Build Coastguard Worker break;
1310*c9945492SAndroid Build Coastguard Worker }
1311*c9945492SAndroid Build Coastguard Worker assert(tnfa->submatch_data[id].parents == NULL);
1312*c9945492SAndroid Build Coastguard Worker tnfa->submatch_data[id].parents = p;
1313*c9945492SAndroid Build Coastguard Worker for (i = 0; parents[i] >= 0; i++)
1314*c9945492SAndroid Build Coastguard Worker p[i] = parents[i];
1315*c9945492SAndroid Build Coastguard Worker p[i] = -1;
1316*c9945492SAndroid Build Coastguard Worker }
1317*c9945492SAndroid Build Coastguard Worker }
1318*c9945492SAndroid Build Coastguard Worker
1319*c9945492SAndroid Build Coastguard Worker /* Add end of this submatch to regset after processing this
1320*c9945492SAndroid Build Coastguard Worker node. */
1321*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, node->submatch_id);
1322*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1323*c9945492SAndroid Build Coastguard Worker }
1324*c9945492SAndroid Build Coastguard Worker
1325*c9945492SAndroid Build Coastguard Worker switch (node->type)
1326*c9945492SAndroid Build Coastguard Worker {
1327*c9945492SAndroid Build Coastguard Worker case LITERAL:
1328*c9945492SAndroid Build Coastguard Worker {
1329*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit = node->obj;
1330*c9945492SAndroid Build Coastguard Worker
1331*c9945492SAndroid Build Coastguard Worker if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1332*c9945492SAndroid Build Coastguard Worker {
1333*c9945492SAndroid Build Coastguard Worker int i;
1334*c9945492SAndroid Build Coastguard Worker if (regset[0] >= 0)
1335*c9945492SAndroid Build Coastguard Worker {
1336*c9945492SAndroid Build Coastguard Worker /* Regset is not empty, so add a tag before the
1337*c9945492SAndroid Build Coastguard Worker literal or backref. */
1338*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1339*c9945492SAndroid Build Coastguard Worker {
1340*c9945492SAndroid Build Coastguard Worker status = tre_add_tag_left(mem, node, tag);
1341*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag] = direction;
1342*c9945492SAndroid Build Coastguard Worker if (minimal_tag >= 0)
1343*c9945492SAndroid Build Coastguard Worker {
1344*c9945492SAndroid Build Coastguard Worker for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1345*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i] = tag;
1346*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 1] = minimal_tag;
1347*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 2] = -1;
1348*c9945492SAndroid Build Coastguard Worker minimal_tag = -1;
1349*c9945492SAndroid Build Coastguard Worker num_minimals++;
1350*c9945492SAndroid Build Coastguard Worker }
1351*c9945492SAndroid Build Coastguard Worker tre_purge_regset(regset, tnfa, tag);
1352*c9945492SAndroid Build Coastguard Worker }
1353*c9945492SAndroid Build Coastguard Worker else
1354*c9945492SAndroid Build Coastguard Worker {
1355*c9945492SAndroid Build Coastguard Worker node->num_tags = 1;
1356*c9945492SAndroid Build Coastguard Worker }
1357*c9945492SAndroid Build Coastguard Worker
1358*c9945492SAndroid Build Coastguard Worker regset[0] = -1;
1359*c9945492SAndroid Build Coastguard Worker tag = next_tag;
1360*c9945492SAndroid Build Coastguard Worker num_tags++;
1361*c9945492SAndroid Build Coastguard Worker next_tag++;
1362*c9945492SAndroid Build Coastguard Worker }
1363*c9945492SAndroid Build Coastguard Worker }
1364*c9945492SAndroid Build Coastguard Worker else
1365*c9945492SAndroid Build Coastguard Worker {
1366*c9945492SAndroid Build Coastguard Worker assert(!IS_TAG(lit));
1367*c9945492SAndroid Build Coastguard Worker }
1368*c9945492SAndroid Build Coastguard Worker break;
1369*c9945492SAndroid Build Coastguard Worker }
1370*c9945492SAndroid Build Coastguard Worker case CATENATION:
1371*c9945492SAndroid Build Coastguard Worker {
1372*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat = node->obj;
1373*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *left = cat->left;
1374*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *right = cat->right;
1375*c9945492SAndroid Build Coastguard Worker int reserved_tag = -1;
1376*c9945492SAndroid Build Coastguard Worker
1377*c9945492SAndroid Build Coastguard Worker
1378*c9945492SAndroid Build Coastguard Worker /* After processing right child. */
1379*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, node);
1380*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1381*c9945492SAndroid Build Coastguard Worker
1382*c9945492SAndroid Build Coastguard Worker /* Process right child. */
1383*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, right);
1384*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1385*c9945492SAndroid Build Coastguard Worker
1386*c9945492SAndroid Build Coastguard Worker /* After processing left child. */
1387*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, next_tag + left->num_tags);
1388*c9945492SAndroid Build Coastguard Worker if (left->num_tags > 0 && right->num_tags > 0)
1389*c9945492SAndroid Build Coastguard Worker {
1390*c9945492SAndroid Build Coastguard Worker /* Reserve the next tag to the right child. */
1391*c9945492SAndroid Build Coastguard Worker reserved_tag = next_tag;
1392*c9945492SAndroid Build Coastguard Worker next_tag++;
1393*c9945492SAndroid Build Coastguard Worker }
1394*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, reserved_tag);
1395*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1396*c9945492SAndroid Build Coastguard Worker
1397*c9945492SAndroid Build Coastguard Worker /* Process left child. */
1398*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, left);
1399*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1400*c9945492SAndroid Build Coastguard Worker
1401*c9945492SAndroid Build Coastguard Worker }
1402*c9945492SAndroid Build Coastguard Worker break;
1403*c9945492SAndroid Build Coastguard Worker case ITERATION:
1404*c9945492SAndroid Build Coastguard Worker {
1405*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter = node->obj;
1406*c9945492SAndroid Build Coastguard Worker
1407*c9945492SAndroid Build Coastguard Worker if (first_pass)
1408*c9945492SAndroid Build Coastguard Worker {
1409*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1410*c9945492SAndroid Build Coastguard Worker }
1411*c9945492SAndroid Build Coastguard Worker else
1412*c9945492SAndroid Build Coastguard Worker {
1413*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, tag);
1414*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, iter->minimal);
1415*c9945492SAndroid Build Coastguard Worker }
1416*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, node);
1417*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1418*c9945492SAndroid Build Coastguard Worker
1419*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, iter->arg);
1420*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1421*c9945492SAndroid Build Coastguard Worker
1422*c9945492SAndroid Build Coastguard Worker /* Regset is not empty, so add a tag here. */
1423*c9945492SAndroid Build Coastguard Worker if (regset[0] >= 0 || iter->minimal)
1424*c9945492SAndroid Build Coastguard Worker {
1425*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1426*c9945492SAndroid Build Coastguard Worker {
1427*c9945492SAndroid Build Coastguard Worker int i;
1428*c9945492SAndroid Build Coastguard Worker status = tre_add_tag_left(mem, node, tag);
1429*c9945492SAndroid Build Coastguard Worker if (iter->minimal)
1430*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1431*c9945492SAndroid Build Coastguard Worker else
1432*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag] = direction;
1433*c9945492SAndroid Build Coastguard Worker if (minimal_tag >= 0)
1434*c9945492SAndroid Build Coastguard Worker {
1435*c9945492SAndroid Build Coastguard Worker for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1436*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i] = tag;
1437*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 1] = minimal_tag;
1438*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 2] = -1;
1439*c9945492SAndroid Build Coastguard Worker minimal_tag = -1;
1440*c9945492SAndroid Build Coastguard Worker num_minimals++;
1441*c9945492SAndroid Build Coastguard Worker }
1442*c9945492SAndroid Build Coastguard Worker tre_purge_regset(regset, tnfa, tag);
1443*c9945492SAndroid Build Coastguard Worker }
1444*c9945492SAndroid Build Coastguard Worker
1445*c9945492SAndroid Build Coastguard Worker regset[0] = -1;
1446*c9945492SAndroid Build Coastguard Worker tag = next_tag;
1447*c9945492SAndroid Build Coastguard Worker num_tags++;
1448*c9945492SAndroid Build Coastguard Worker next_tag++;
1449*c9945492SAndroid Build Coastguard Worker }
1450*c9945492SAndroid Build Coastguard Worker direction = TRE_TAG_MINIMIZE;
1451*c9945492SAndroid Build Coastguard Worker }
1452*c9945492SAndroid Build Coastguard Worker break;
1453*c9945492SAndroid Build Coastguard Worker case UNION:
1454*c9945492SAndroid Build Coastguard Worker {
1455*c9945492SAndroid Build Coastguard Worker tre_union_t *uni = node->obj;
1456*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *left = uni->left;
1457*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *right = uni->right;
1458*c9945492SAndroid Build Coastguard Worker int left_tag;
1459*c9945492SAndroid Build Coastguard Worker int right_tag;
1460*c9945492SAndroid Build Coastguard Worker
1461*c9945492SAndroid Build Coastguard Worker if (regset[0] >= 0)
1462*c9945492SAndroid Build Coastguard Worker {
1463*c9945492SAndroid Build Coastguard Worker left_tag = next_tag;
1464*c9945492SAndroid Build Coastguard Worker right_tag = next_tag + 1;
1465*c9945492SAndroid Build Coastguard Worker }
1466*c9945492SAndroid Build Coastguard Worker else
1467*c9945492SAndroid Build Coastguard Worker {
1468*c9945492SAndroid Build Coastguard Worker left_tag = tag;
1469*c9945492SAndroid Build Coastguard Worker right_tag = next_tag;
1470*c9945492SAndroid Build Coastguard Worker }
1471*c9945492SAndroid Build Coastguard Worker
1472*c9945492SAndroid Build Coastguard Worker /* After processing right child. */
1473*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, right_tag);
1474*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, left_tag);
1475*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, regset);
1476*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, regset[0] >= 0);
1477*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, node);
1478*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, right);
1479*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, left);
1480*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1481*c9945492SAndroid Build Coastguard Worker
1482*c9945492SAndroid Build Coastguard Worker /* Process right child. */
1483*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, right);
1484*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1485*c9945492SAndroid Build Coastguard Worker
1486*c9945492SAndroid Build Coastguard Worker /* After processing left child. */
1487*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1488*c9945492SAndroid Build Coastguard Worker
1489*c9945492SAndroid Build Coastguard Worker /* Process left child. */
1490*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, left);
1491*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1492*c9945492SAndroid Build Coastguard Worker
1493*c9945492SAndroid Build Coastguard Worker /* Regset is not empty, so add a tag here. */
1494*c9945492SAndroid Build Coastguard Worker if (regset[0] >= 0)
1495*c9945492SAndroid Build Coastguard Worker {
1496*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1497*c9945492SAndroid Build Coastguard Worker {
1498*c9945492SAndroid Build Coastguard Worker int i;
1499*c9945492SAndroid Build Coastguard Worker status = tre_add_tag_left(mem, node, tag);
1500*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag] = direction;
1501*c9945492SAndroid Build Coastguard Worker if (minimal_tag >= 0)
1502*c9945492SAndroid Build Coastguard Worker {
1503*c9945492SAndroid Build Coastguard Worker for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1504*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i] = tag;
1505*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 1] = minimal_tag;
1506*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 2] = -1;
1507*c9945492SAndroid Build Coastguard Worker minimal_tag = -1;
1508*c9945492SAndroid Build Coastguard Worker num_minimals++;
1509*c9945492SAndroid Build Coastguard Worker }
1510*c9945492SAndroid Build Coastguard Worker tre_purge_regset(regset, tnfa, tag);
1511*c9945492SAndroid Build Coastguard Worker }
1512*c9945492SAndroid Build Coastguard Worker
1513*c9945492SAndroid Build Coastguard Worker regset[0] = -1;
1514*c9945492SAndroid Build Coastguard Worker tag = next_tag;
1515*c9945492SAndroid Build Coastguard Worker num_tags++;
1516*c9945492SAndroid Build Coastguard Worker next_tag++;
1517*c9945492SAndroid Build Coastguard Worker }
1518*c9945492SAndroid Build Coastguard Worker
1519*c9945492SAndroid Build Coastguard Worker if (node->num_submatches > 0)
1520*c9945492SAndroid Build Coastguard Worker {
1521*c9945492SAndroid Build Coastguard Worker /* The next two tags are reserved for markers. */
1522*c9945492SAndroid Build Coastguard Worker next_tag++;
1523*c9945492SAndroid Build Coastguard Worker tag = next_tag;
1524*c9945492SAndroid Build Coastguard Worker next_tag++;
1525*c9945492SAndroid Build Coastguard Worker }
1526*c9945492SAndroid Build Coastguard Worker
1527*c9945492SAndroid Build Coastguard Worker break;
1528*c9945492SAndroid Build Coastguard Worker }
1529*c9945492SAndroid Build Coastguard Worker }
1530*c9945492SAndroid Build Coastguard Worker
1531*c9945492SAndroid Build Coastguard Worker if (node->submatch_id >= 0)
1532*c9945492SAndroid Build Coastguard Worker {
1533*c9945492SAndroid Build Coastguard Worker int i;
1534*c9945492SAndroid Build Coastguard Worker /* Push this submatch on the parents stack. */
1535*c9945492SAndroid Build Coastguard Worker for (i = 0; parents[i] >= 0; i++);
1536*c9945492SAndroid Build Coastguard Worker parents[i] = node->submatch_id;
1537*c9945492SAndroid Build Coastguard Worker parents[i + 1] = -1;
1538*c9945492SAndroid Build Coastguard Worker }
1539*c9945492SAndroid Build Coastguard Worker
1540*c9945492SAndroid Build Coastguard Worker break; /* end case: ADDTAGS_RECURSE */
1541*c9945492SAndroid Build Coastguard Worker
1542*c9945492SAndroid Build Coastguard Worker case ADDTAGS_AFTER_ITERATION:
1543*c9945492SAndroid Build Coastguard Worker {
1544*c9945492SAndroid Build Coastguard Worker int minimal = 0;
1545*c9945492SAndroid Build Coastguard Worker int enter_tag;
1546*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1547*c9945492SAndroid Build Coastguard Worker if (first_pass)
1548*c9945492SAndroid Build Coastguard Worker {
1549*c9945492SAndroid Build Coastguard Worker node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1550*c9945492SAndroid Build Coastguard Worker + tre_stack_pop_int(stack);
1551*c9945492SAndroid Build Coastguard Worker minimal_tag = -1;
1552*c9945492SAndroid Build Coastguard Worker }
1553*c9945492SAndroid Build Coastguard Worker else
1554*c9945492SAndroid Build Coastguard Worker {
1555*c9945492SAndroid Build Coastguard Worker minimal = tre_stack_pop_int(stack);
1556*c9945492SAndroid Build Coastguard Worker enter_tag = tre_stack_pop_int(stack);
1557*c9945492SAndroid Build Coastguard Worker if (minimal)
1558*c9945492SAndroid Build Coastguard Worker minimal_tag = enter_tag;
1559*c9945492SAndroid Build Coastguard Worker }
1560*c9945492SAndroid Build Coastguard Worker
1561*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1562*c9945492SAndroid Build Coastguard Worker {
1563*c9945492SAndroid Build Coastguard Worker if (minimal)
1564*c9945492SAndroid Build Coastguard Worker direction = TRE_TAG_MINIMIZE;
1565*c9945492SAndroid Build Coastguard Worker else
1566*c9945492SAndroid Build Coastguard Worker direction = TRE_TAG_MAXIMIZE;
1567*c9945492SAndroid Build Coastguard Worker }
1568*c9945492SAndroid Build Coastguard Worker break;
1569*c9945492SAndroid Build Coastguard Worker }
1570*c9945492SAndroid Build Coastguard Worker
1571*c9945492SAndroid Build Coastguard Worker case ADDTAGS_AFTER_CAT_LEFT:
1572*c9945492SAndroid Build Coastguard Worker {
1573*c9945492SAndroid Build Coastguard Worker int new_tag = tre_stack_pop_int(stack);
1574*c9945492SAndroid Build Coastguard Worker next_tag = tre_stack_pop_int(stack);
1575*c9945492SAndroid Build Coastguard Worker if (new_tag >= 0)
1576*c9945492SAndroid Build Coastguard Worker {
1577*c9945492SAndroid Build Coastguard Worker tag = new_tag;
1578*c9945492SAndroid Build Coastguard Worker }
1579*c9945492SAndroid Build Coastguard Worker break;
1580*c9945492SAndroid Build Coastguard Worker }
1581*c9945492SAndroid Build Coastguard Worker
1582*c9945492SAndroid Build Coastguard Worker case ADDTAGS_AFTER_CAT_RIGHT:
1583*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1584*c9945492SAndroid Build Coastguard Worker if (first_pass)
1585*c9945492SAndroid Build Coastguard Worker node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1586*c9945492SAndroid Build Coastguard Worker + ((tre_catenation_t *)node->obj)->right->num_tags;
1587*c9945492SAndroid Build Coastguard Worker break;
1588*c9945492SAndroid Build Coastguard Worker
1589*c9945492SAndroid Build Coastguard Worker case ADDTAGS_AFTER_UNION_LEFT:
1590*c9945492SAndroid Build Coastguard Worker /* Lift the bottom of the `regset' array so that when processing
1591*c9945492SAndroid Build Coastguard Worker the right operand the items currently in the array are
1592*c9945492SAndroid Build Coastguard Worker invisible. The original bottom was saved at ADDTAGS_UNION and
1593*c9945492SAndroid Build Coastguard Worker will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1594*c9945492SAndroid Build Coastguard Worker while (*regset >= 0)
1595*c9945492SAndroid Build Coastguard Worker regset++;
1596*c9945492SAndroid Build Coastguard Worker break;
1597*c9945492SAndroid Build Coastguard Worker
1598*c9945492SAndroid Build Coastguard Worker case ADDTAGS_AFTER_UNION_RIGHT:
1599*c9945492SAndroid Build Coastguard Worker {
1600*c9945492SAndroid Build Coastguard Worker int added_tags, tag_left, tag_right;
1601*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1602*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1603*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1604*c9945492SAndroid Build Coastguard Worker added_tags = tre_stack_pop_int(stack);
1605*c9945492SAndroid Build Coastguard Worker if (first_pass)
1606*c9945492SAndroid Build Coastguard Worker {
1607*c9945492SAndroid Build Coastguard Worker node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1608*c9945492SAndroid Build Coastguard Worker + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1609*c9945492SAndroid Build Coastguard Worker + ((node->num_submatches > 0) ? 2 : 0);
1610*c9945492SAndroid Build Coastguard Worker }
1611*c9945492SAndroid Build Coastguard Worker regset = tre_stack_pop_voidptr(stack);
1612*c9945492SAndroid Build Coastguard Worker tag_left = tre_stack_pop_int(stack);
1613*c9945492SAndroid Build Coastguard Worker tag_right = tre_stack_pop_int(stack);
1614*c9945492SAndroid Build Coastguard Worker
1615*c9945492SAndroid Build Coastguard Worker /* Add tags after both children, the left child gets a smaller
1616*c9945492SAndroid Build Coastguard Worker tag than the right child. This guarantees that we prefer
1617*c9945492SAndroid Build Coastguard Worker the left child over the right child. */
1618*c9945492SAndroid Build Coastguard Worker /* XXX - This is not always necessary (if the children have
1619*c9945492SAndroid Build Coastguard Worker tags which must be seen for every match of that child). */
1620*c9945492SAndroid Build Coastguard Worker /* XXX - Check if this is the only place where tre_add_tag_right
1621*c9945492SAndroid Build Coastguard Worker is used. If so, use tre_add_tag_left (putting the tag before
1622*c9945492SAndroid Build Coastguard Worker the child as opposed after the child) and throw away
1623*c9945492SAndroid Build Coastguard Worker tre_add_tag_right. */
1624*c9945492SAndroid Build Coastguard Worker if (node->num_submatches > 0)
1625*c9945492SAndroid Build Coastguard Worker {
1626*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1627*c9945492SAndroid Build Coastguard Worker {
1628*c9945492SAndroid Build Coastguard Worker status = tre_add_tag_right(mem, left, tag_left);
1629*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1630*c9945492SAndroid Build Coastguard Worker if (status == REG_OK)
1631*c9945492SAndroid Build Coastguard Worker status = tre_add_tag_right(mem, right, tag_right);
1632*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1633*c9945492SAndroid Build Coastguard Worker }
1634*c9945492SAndroid Build Coastguard Worker num_tags += 2;
1635*c9945492SAndroid Build Coastguard Worker }
1636*c9945492SAndroid Build Coastguard Worker direction = TRE_TAG_MAXIMIZE;
1637*c9945492SAndroid Build Coastguard Worker break;
1638*c9945492SAndroid Build Coastguard Worker }
1639*c9945492SAndroid Build Coastguard Worker
1640*c9945492SAndroid Build Coastguard Worker default:
1641*c9945492SAndroid Build Coastguard Worker assert(0);
1642*c9945492SAndroid Build Coastguard Worker break;
1643*c9945492SAndroid Build Coastguard Worker
1644*c9945492SAndroid Build Coastguard Worker } /* end switch(symbol) */
1645*c9945492SAndroid Build Coastguard Worker } /* end while(tre_stack_num_objects(stack) > bottom) */
1646*c9945492SAndroid Build Coastguard Worker
1647*c9945492SAndroid Build Coastguard Worker if (!first_pass)
1648*c9945492SAndroid Build Coastguard Worker tre_purge_regset(regset, tnfa, tag);
1649*c9945492SAndroid Build Coastguard Worker
1650*c9945492SAndroid Build Coastguard Worker if (!first_pass && minimal_tag >= 0)
1651*c9945492SAndroid Build Coastguard Worker {
1652*c9945492SAndroid Build Coastguard Worker int i;
1653*c9945492SAndroid Build Coastguard Worker for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1654*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i] = tag;
1655*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 1] = minimal_tag;
1656*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags[i + 2] = -1;
1657*c9945492SAndroid Build Coastguard Worker minimal_tag = -1;
1658*c9945492SAndroid Build Coastguard Worker num_minimals++;
1659*c9945492SAndroid Build Coastguard Worker }
1660*c9945492SAndroid Build Coastguard Worker
1661*c9945492SAndroid Build Coastguard Worker assert(tree->num_tags == num_tags);
1662*c9945492SAndroid Build Coastguard Worker tnfa->end_tag = num_tags;
1663*c9945492SAndroid Build Coastguard Worker tnfa->num_tags = num_tags;
1664*c9945492SAndroid Build Coastguard Worker tnfa->num_minimals = num_minimals;
1665*c9945492SAndroid Build Coastguard Worker xfree(orig_regset);
1666*c9945492SAndroid Build Coastguard Worker xfree(parents);
1667*c9945492SAndroid Build Coastguard Worker xfree(saved_states);
1668*c9945492SAndroid Build Coastguard Worker return status;
1669*c9945492SAndroid Build Coastguard Worker }
1670*c9945492SAndroid Build Coastguard Worker
1671*c9945492SAndroid Build Coastguard Worker
1672*c9945492SAndroid Build Coastguard Worker
1673*c9945492SAndroid Build Coastguard Worker /*
1674*c9945492SAndroid Build Coastguard Worker AST to TNFA compilation routines.
1675*c9945492SAndroid Build Coastguard Worker */
1676*c9945492SAndroid Build Coastguard Worker
1677*c9945492SAndroid Build Coastguard Worker typedef enum {
1678*c9945492SAndroid Build Coastguard Worker COPY_RECURSE,
1679*c9945492SAndroid Build Coastguard Worker COPY_SET_RESULT_PTR
1680*c9945492SAndroid Build Coastguard Worker } tre_copyast_symbol_t;
1681*c9945492SAndroid Build Coastguard Worker
1682*c9945492SAndroid Build Coastguard Worker /* Flags for tre_copy_ast(). */
1683*c9945492SAndroid Build Coastguard Worker #define COPY_REMOVE_TAGS 1
1684*c9945492SAndroid Build Coastguard Worker #define COPY_MAXIMIZE_FIRST_TAG 2
1685*c9945492SAndroid Build Coastguard Worker
1686*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_copy_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int flags,int * pos_add,tre_tag_direction_t * tag_directions,tre_ast_node_t ** copy,int * max_pos)1687*c9945492SAndroid Build Coastguard Worker tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1688*c9945492SAndroid Build Coastguard Worker int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1689*c9945492SAndroid Build Coastguard Worker tre_ast_node_t **copy, int *max_pos)
1690*c9945492SAndroid Build Coastguard Worker {
1691*c9945492SAndroid Build Coastguard Worker reg_errcode_t status = REG_OK;
1692*c9945492SAndroid Build Coastguard Worker int bottom = tre_stack_num_objects(stack);
1693*c9945492SAndroid Build Coastguard Worker int num_copied = 0;
1694*c9945492SAndroid Build Coastguard Worker int first_tag = 1;
1695*c9945492SAndroid Build Coastguard Worker tre_ast_node_t **result = copy;
1696*c9945492SAndroid Build Coastguard Worker tre_copyast_symbol_t symbol;
1697*c9945492SAndroid Build Coastguard Worker
1698*c9945492SAndroid Build Coastguard Worker STACK_PUSH(stack, voidptr, ast);
1699*c9945492SAndroid Build Coastguard Worker STACK_PUSH(stack, int, COPY_RECURSE);
1700*c9945492SAndroid Build Coastguard Worker
1701*c9945492SAndroid Build Coastguard Worker while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1702*c9945492SAndroid Build Coastguard Worker {
1703*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
1704*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1705*c9945492SAndroid Build Coastguard Worker break;
1706*c9945492SAndroid Build Coastguard Worker
1707*c9945492SAndroid Build Coastguard Worker symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1708*c9945492SAndroid Build Coastguard Worker switch (symbol)
1709*c9945492SAndroid Build Coastguard Worker {
1710*c9945492SAndroid Build Coastguard Worker case COPY_SET_RESULT_PTR:
1711*c9945492SAndroid Build Coastguard Worker result = tre_stack_pop_voidptr(stack);
1712*c9945492SAndroid Build Coastguard Worker break;
1713*c9945492SAndroid Build Coastguard Worker case COPY_RECURSE:
1714*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1715*c9945492SAndroid Build Coastguard Worker switch (node->type)
1716*c9945492SAndroid Build Coastguard Worker {
1717*c9945492SAndroid Build Coastguard Worker case LITERAL:
1718*c9945492SAndroid Build Coastguard Worker {
1719*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit = node->obj;
1720*c9945492SAndroid Build Coastguard Worker int pos = lit->position;
1721*c9945492SAndroid Build Coastguard Worker int min = lit->code_min;
1722*c9945492SAndroid Build Coastguard Worker int max = lit->code_max;
1723*c9945492SAndroid Build Coastguard Worker if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1724*c9945492SAndroid Build Coastguard Worker {
1725*c9945492SAndroid Build Coastguard Worker /* XXX - e.g. [ab] has only one position but two
1726*c9945492SAndroid Build Coastguard Worker nodes, so we are creating holes in the state space
1727*c9945492SAndroid Build Coastguard Worker here. Not fatal, just wastes memory. */
1728*c9945492SAndroid Build Coastguard Worker pos += *pos_add;
1729*c9945492SAndroid Build Coastguard Worker num_copied++;
1730*c9945492SAndroid Build Coastguard Worker }
1731*c9945492SAndroid Build Coastguard Worker else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1732*c9945492SAndroid Build Coastguard Worker {
1733*c9945492SAndroid Build Coastguard Worker /* Change this tag to empty. */
1734*c9945492SAndroid Build Coastguard Worker min = EMPTY;
1735*c9945492SAndroid Build Coastguard Worker max = pos = -1;
1736*c9945492SAndroid Build Coastguard Worker }
1737*c9945492SAndroid Build Coastguard Worker else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1738*c9945492SAndroid Build Coastguard Worker && first_tag)
1739*c9945492SAndroid Build Coastguard Worker {
1740*c9945492SAndroid Build Coastguard Worker /* Maximize the first tag. */
1741*c9945492SAndroid Build Coastguard Worker tag_directions[max] = TRE_TAG_MAXIMIZE;
1742*c9945492SAndroid Build Coastguard Worker first_tag = 0;
1743*c9945492SAndroid Build Coastguard Worker }
1744*c9945492SAndroid Build Coastguard Worker *result = tre_ast_new_literal(mem, min, max, pos);
1745*c9945492SAndroid Build Coastguard Worker if (*result == NULL)
1746*c9945492SAndroid Build Coastguard Worker status = REG_ESPACE;
1747*c9945492SAndroid Build Coastguard Worker else {
1748*c9945492SAndroid Build Coastguard Worker tre_literal_t *p = (*result)->obj;
1749*c9945492SAndroid Build Coastguard Worker p->class = lit->class;
1750*c9945492SAndroid Build Coastguard Worker p->neg_classes = lit->neg_classes;
1751*c9945492SAndroid Build Coastguard Worker }
1752*c9945492SAndroid Build Coastguard Worker
1753*c9945492SAndroid Build Coastguard Worker if (pos > *max_pos)
1754*c9945492SAndroid Build Coastguard Worker *max_pos = pos;
1755*c9945492SAndroid Build Coastguard Worker break;
1756*c9945492SAndroid Build Coastguard Worker }
1757*c9945492SAndroid Build Coastguard Worker case UNION:
1758*c9945492SAndroid Build Coastguard Worker {
1759*c9945492SAndroid Build Coastguard Worker tre_union_t *uni = node->obj;
1760*c9945492SAndroid Build Coastguard Worker tre_union_t *tmp;
1761*c9945492SAndroid Build Coastguard Worker *result = tre_ast_new_union(mem, uni->left, uni->right);
1762*c9945492SAndroid Build Coastguard Worker if (*result == NULL)
1763*c9945492SAndroid Build Coastguard Worker {
1764*c9945492SAndroid Build Coastguard Worker status = REG_ESPACE;
1765*c9945492SAndroid Build Coastguard Worker break;
1766*c9945492SAndroid Build Coastguard Worker }
1767*c9945492SAndroid Build Coastguard Worker tmp = (*result)->obj;
1768*c9945492SAndroid Build Coastguard Worker result = &tmp->left;
1769*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->right);
1770*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_RECURSE);
1771*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, &tmp->right);
1772*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1773*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->left);
1774*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_RECURSE);
1775*c9945492SAndroid Build Coastguard Worker break;
1776*c9945492SAndroid Build Coastguard Worker }
1777*c9945492SAndroid Build Coastguard Worker case CATENATION:
1778*c9945492SAndroid Build Coastguard Worker {
1779*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat = node->obj;
1780*c9945492SAndroid Build Coastguard Worker tre_catenation_t *tmp;
1781*c9945492SAndroid Build Coastguard Worker *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1782*c9945492SAndroid Build Coastguard Worker if (*result == NULL)
1783*c9945492SAndroid Build Coastguard Worker {
1784*c9945492SAndroid Build Coastguard Worker status = REG_ESPACE;
1785*c9945492SAndroid Build Coastguard Worker break;
1786*c9945492SAndroid Build Coastguard Worker }
1787*c9945492SAndroid Build Coastguard Worker tmp = (*result)->obj;
1788*c9945492SAndroid Build Coastguard Worker tmp->left = NULL;
1789*c9945492SAndroid Build Coastguard Worker tmp->right = NULL;
1790*c9945492SAndroid Build Coastguard Worker result = &tmp->left;
1791*c9945492SAndroid Build Coastguard Worker
1792*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->right);
1793*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_RECURSE);
1794*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, &tmp->right);
1795*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1796*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->left);
1797*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_RECURSE);
1798*c9945492SAndroid Build Coastguard Worker break;
1799*c9945492SAndroid Build Coastguard Worker }
1800*c9945492SAndroid Build Coastguard Worker case ITERATION:
1801*c9945492SAndroid Build Coastguard Worker {
1802*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter = node->obj;
1803*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, iter->arg);
1804*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, COPY_RECURSE);
1805*c9945492SAndroid Build Coastguard Worker *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1806*c9945492SAndroid Build Coastguard Worker iter->max, iter->minimal);
1807*c9945492SAndroid Build Coastguard Worker if (*result == NULL)
1808*c9945492SAndroid Build Coastguard Worker {
1809*c9945492SAndroid Build Coastguard Worker status = REG_ESPACE;
1810*c9945492SAndroid Build Coastguard Worker break;
1811*c9945492SAndroid Build Coastguard Worker }
1812*c9945492SAndroid Build Coastguard Worker iter = (*result)->obj;
1813*c9945492SAndroid Build Coastguard Worker result = &iter->arg;
1814*c9945492SAndroid Build Coastguard Worker break;
1815*c9945492SAndroid Build Coastguard Worker }
1816*c9945492SAndroid Build Coastguard Worker default:
1817*c9945492SAndroid Build Coastguard Worker assert(0);
1818*c9945492SAndroid Build Coastguard Worker break;
1819*c9945492SAndroid Build Coastguard Worker }
1820*c9945492SAndroid Build Coastguard Worker break;
1821*c9945492SAndroid Build Coastguard Worker }
1822*c9945492SAndroid Build Coastguard Worker }
1823*c9945492SAndroid Build Coastguard Worker *pos_add += num_copied;
1824*c9945492SAndroid Build Coastguard Worker return status;
1825*c9945492SAndroid Build Coastguard Worker }
1826*c9945492SAndroid Build Coastguard Worker
1827*c9945492SAndroid Build Coastguard Worker typedef enum {
1828*c9945492SAndroid Build Coastguard Worker EXPAND_RECURSE,
1829*c9945492SAndroid Build Coastguard Worker EXPAND_AFTER_ITER
1830*c9945492SAndroid Build Coastguard Worker } tre_expand_ast_symbol_t;
1831*c9945492SAndroid Build Coastguard Worker
1832*c9945492SAndroid Build Coastguard Worker /* Expands each iteration node that has a finite nonzero minimum or maximum
1833*c9945492SAndroid Build Coastguard Worker iteration count to a catenated sequence of copies of the node. */
1834*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_expand_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int * position,tre_tag_direction_t * tag_directions)1835*c9945492SAndroid Build Coastguard Worker tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1836*c9945492SAndroid Build Coastguard Worker int *position, tre_tag_direction_t *tag_directions)
1837*c9945492SAndroid Build Coastguard Worker {
1838*c9945492SAndroid Build Coastguard Worker reg_errcode_t status = REG_OK;
1839*c9945492SAndroid Build Coastguard Worker int bottom = tre_stack_num_objects(stack);
1840*c9945492SAndroid Build Coastguard Worker int pos_add = 0;
1841*c9945492SAndroid Build Coastguard Worker int pos_add_total = 0;
1842*c9945492SAndroid Build Coastguard Worker int max_pos = 0;
1843*c9945492SAndroid Build Coastguard Worker int iter_depth = 0;
1844*c9945492SAndroid Build Coastguard Worker
1845*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ast);
1846*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, EXPAND_RECURSE);
1847*c9945492SAndroid Build Coastguard Worker while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1848*c9945492SAndroid Build Coastguard Worker {
1849*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
1850*c9945492SAndroid Build Coastguard Worker tre_expand_ast_symbol_t symbol;
1851*c9945492SAndroid Build Coastguard Worker
1852*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1853*c9945492SAndroid Build Coastguard Worker break;
1854*c9945492SAndroid Build Coastguard Worker
1855*c9945492SAndroid Build Coastguard Worker symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1856*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
1857*c9945492SAndroid Build Coastguard Worker switch (symbol)
1858*c9945492SAndroid Build Coastguard Worker {
1859*c9945492SAndroid Build Coastguard Worker case EXPAND_RECURSE:
1860*c9945492SAndroid Build Coastguard Worker switch (node->type)
1861*c9945492SAndroid Build Coastguard Worker {
1862*c9945492SAndroid Build Coastguard Worker case LITERAL:
1863*c9945492SAndroid Build Coastguard Worker {
1864*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit= node->obj;
1865*c9945492SAndroid Build Coastguard Worker if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1866*c9945492SAndroid Build Coastguard Worker {
1867*c9945492SAndroid Build Coastguard Worker lit->position += pos_add;
1868*c9945492SAndroid Build Coastguard Worker if (lit->position > max_pos)
1869*c9945492SAndroid Build Coastguard Worker max_pos = lit->position;
1870*c9945492SAndroid Build Coastguard Worker }
1871*c9945492SAndroid Build Coastguard Worker break;
1872*c9945492SAndroid Build Coastguard Worker }
1873*c9945492SAndroid Build Coastguard Worker case UNION:
1874*c9945492SAndroid Build Coastguard Worker {
1875*c9945492SAndroid Build Coastguard Worker tre_union_t *uni = node->obj;
1876*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->right);
1877*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_RECURSE);
1878*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->left);
1879*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880*c9945492SAndroid Build Coastguard Worker break;
1881*c9945492SAndroid Build Coastguard Worker }
1882*c9945492SAndroid Build Coastguard Worker case CATENATION:
1883*c9945492SAndroid Build Coastguard Worker {
1884*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat = node->obj;
1885*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->right);
1886*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_RECURSE);
1887*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->left);
1888*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_RECURSE);
1889*c9945492SAndroid Build Coastguard Worker break;
1890*c9945492SAndroid Build Coastguard Worker }
1891*c9945492SAndroid Build Coastguard Worker case ITERATION:
1892*c9945492SAndroid Build Coastguard Worker {
1893*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter = node->obj;
1894*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, pos_add);
1895*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, node);
1896*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1897*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, iter->arg);
1898*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, int, EXPAND_RECURSE);
1899*c9945492SAndroid Build Coastguard Worker /* If we are going to expand this node at EXPAND_AFTER_ITER
1900*c9945492SAndroid Build Coastguard Worker then don't increase the `pos' fields of the nodes now, it
1901*c9945492SAndroid Build Coastguard Worker will get done when expanding. */
1902*c9945492SAndroid Build Coastguard Worker if (iter->min > 1 || iter->max > 1)
1903*c9945492SAndroid Build Coastguard Worker pos_add = 0;
1904*c9945492SAndroid Build Coastguard Worker iter_depth++;
1905*c9945492SAndroid Build Coastguard Worker break;
1906*c9945492SAndroid Build Coastguard Worker }
1907*c9945492SAndroid Build Coastguard Worker default:
1908*c9945492SAndroid Build Coastguard Worker assert(0);
1909*c9945492SAndroid Build Coastguard Worker break;
1910*c9945492SAndroid Build Coastguard Worker }
1911*c9945492SAndroid Build Coastguard Worker break;
1912*c9945492SAndroid Build Coastguard Worker case EXPAND_AFTER_ITER:
1913*c9945492SAndroid Build Coastguard Worker {
1914*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter = node->obj;
1915*c9945492SAndroid Build Coastguard Worker int pos_add_last;
1916*c9945492SAndroid Build Coastguard Worker pos_add = tre_stack_pop_int(stack);
1917*c9945492SAndroid Build Coastguard Worker pos_add_last = pos_add;
1918*c9945492SAndroid Build Coastguard Worker if (iter->min > 1 || iter->max > 1)
1919*c9945492SAndroid Build Coastguard Worker {
1920*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1921*c9945492SAndroid Build Coastguard Worker int j;
1922*c9945492SAndroid Build Coastguard Worker int pos_add_save = pos_add;
1923*c9945492SAndroid Build Coastguard Worker
1924*c9945492SAndroid Build Coastguard Worker /* Create a catenated sequence of copies of the node. */
1925*c9945492SAndroid Build Coastguard Worker for (j = 0; j < iter->min; j++)
1926*c9945492SAndroid Build Coastguard Worker {
1927*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *copy;
1928*c9945492SAndroid Build Coastguard Worker /* Remove tags from all but the last copy. */
1929*c9945492SAndroid Build Coastguard Worker int flags = ((j + 1 < iter->min)
1930*c9945492SAndroid Build Coastguard Worker ? COPY_REMOVE_TAGS
1931*c9945492SAndroid Build Coastguard Worker : COPY_MAXIMIZE_FIRST_TAG);
1932*c9945492SAndroid Build Coastguard Worker pos_add_save = pos_add;
1933*c9945492SAndroid Build Coastguard Worker status = tre_copy_ast(mem, stack, iter->arg, flags,
1934*c9945492SAndroid Build Coastguard Worker &pos_add, tag_directions, ©,
1935*c9945492SAndroid Build Coastguard Worker &max_pos);
1936*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1937*c9945492SAndroid Build Coastguard Worker return status;
1938*c9945492SAndroid Build Coastguard Worker if (seq1 != NULL)
1939*c9945492SAndroid Build Coastguard Worker seq1 = tre_ast_new_catenation(mem, seq1, copy);
1940*c9945492SAndroid Build Coastguard Worker else
1941*c9945492SAndroid Build Coastguard Worker seq1 = copy;
1942*c9945492SAndroid Build Coastguard Worker if (seq1 == NULL)
1943*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1944*c9945492SAndroid Build Coastguard Worker }
1945*c9945492SAndroid Build Coastguard Worker
1946*c9945492SAndroid Build Coastguard Worker if (iter->max == -1)
1947*c9945492SAndroid Build Coastguard Worker {
1948*c9945492SAndroid Build Coastguard Worker /* No upper limit. */
1949*c9945492SAndroid Build Coastguard Worker pos_add_save = pos_add;
1950*c9945492SAndroid Build Coastguard Worker status = tre_copy_ast(mem, stack, iter->arg, 0,
1951*c9945492SAndroid Build Coastguard Worker &pos_add, NULL, &seq2, &max_pos);
1952*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1953*c9945492SAndroid Build Coastguard Worker return status;
1954*c9945492SAndroid Build Coastguard Worker seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1955*c9945492SAndroid Build Coastguard Worker if (seq2 == NULL)
1956*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1957*c9945492SAndroid Build Coastguard Worker }
1958*c9945492SAndroid Build Coastguard Worker else
1959*c9945492SAndroid Build Coastguard Worker {
1960*c9945492SAndroid Build Coastguard Worker for (j = iter->min; j < iter->max; j++)
1961*c9945492SAndroid Build Coastguard Worker {
1962*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *tmp, *copy;
1963*c9945492SAndroid Build Coastguard Worker pos_add_save = pos_add;
1964*c9945492SAndroid Build Coastguard Worker status = tre_copy_ast(mem, stack, iter->arg, 0,
1965*c9945492SAndroid Build Coastguard Worker &pos_add, NULL, ©, &max_pos);
1966*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
1967*c9945492SAndroid Build Coastguard Worker return status;
1968*c9945492SAndroid Build Coastguard Worker if (seq2 != NULL)
1969*c9945492SAndroid Build Coastguard Worker seq2 = tre_ast_new_catenation(mem, copy, seq2);
1970*c9945492SAndroid Build Coastguard Worker else
1971*c9945492SAndroid Build Coastguard Worker seq2 = copy;
1972*c9945492SAndroid Build Coastguard Worker if (seq2 == NULL)
1973*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1974*c9945492SAndroid Build Coastguard Worker tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1975*c9945492SAndroid Build Coastguard Worker if (tmp == NULL)
1976*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1977*c9945492SAndroid Build Coastguard Worker seq2 = tre_ast_new_union(mem, tmp, seq2);
1978*c9945492SAndroid Build Coastguard Worker if (seq2 == NULL)
1979*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1980*c9945492SAndroid Build Coastguard Worker }
1981*c9945492SAndroid Build Coastguard Worker }
1982*c9945492SAndroid Build Coastguard Worker
1983*c9945492SAndroid Build Coastguard Worker pos_add = pos_add_save;
1984*c9945492SAndroid Build Coastguard Worker if (seq1 == NULL)
1985*c9945492SAndroid Build Coastguard Worker seq1 = seq2;
1986*c9945492SAndroid Build Coastguard Worker else if (seq2 != NULL)
1987*c9945492SAndroid Build Coastguard Worker seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1988*c9945492SAndroid Build Coastguard Worker if (seq1 == NULL)
1989*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1990*c9945492SAndroid Build Coastguard Worker node->obj = seq1->obj;
1991*c9945492SAndroid Build Coastguard Worker node->type = seq1->type;
1992*c9945492SAndroid Build Coastguard Worker }
1993*c9945492SAndroid Build Coastguard Worker
1994*c9945492SAndroid Build Coastguard Worker iter_depth--;
1995*c9945492SAndroid Build Coastguard Worker pos_add_total += pos_add - pos_add_last;
1996*c9945492SAndroid Build Coastguard Worker if (iter_depth == 0)
1997*c9945492SAndroid Build Coastguard Worker pos_add = pos_add_total;
1998*c9945492SAndroid Build Coastguard Worker
1999*c9945492SAndroid Build Coastguard Worker break;
2000*c9945492SAndroid Build Coastguard Worker }
2001*c9945492SAndroid Build Coastguard Worker default:
2002*c9945492SAndroid Build Coastguard Worker assert(0);
2003*c9945492SAndroid Build Coastguard Worker break;
2004*c9945492SAndroid Build Coastguard Worker }
2005*c9945492SAndroid Build Coastguard Worker }
2006*c9945492SAndroid Build Coastguard Worker
2007*c9945492SAndroid Build Coastguard Worker *position += pos_add_total;
2008*c9945492SAndroid Build Coastguard Worker
2009*c9945492SAndroid Build Coastguard Worker /* `max_pos' should never be larger than `*position' if the above
2010*c9945492SAndroid Build Coastguard Worker code works, but just an extra safeguard let's make sure
2011*c9945492SAndroid Build Coastguard Worker `*position' is set large enough so enough memory will be
2012*c9945492SAndroid Build Coastguard Worker allocated for the transition table. */
2013*c9945492SAndroid Build Coastguard Worker if (max_pos > *position)
2014*c9945492SAndroid Build Coastguard Worker *position = max_pos;
2015*c9945492SAndroid Build Coastguard Worker
2016*c9945492SAndroid Build Coastguard Worker return status;
2017*c9945492SAndroid Build Coastguard Worker }
2018*c9945492SAndroid Build Coastguard Worker
2019*c9945492SAndroid Build Coastguard Worker static tre_pos_and_tags_t *
tre_set_empty(tre_mem_t mem)2020*c9945492SAndroid Build Coastguard Worker tre_set_empty(tre_mem_t mem)
2021*c9945492SAndroid Build Coastguard Worker {
2022*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *new_set;
2023*c9945492SAndroid Build Coastguard Worker
2024*c9945492SAndroid Build Coastguard Worker new_set = tre_mem_calloc(mem, sizeof(*new_set));
2025*c9945492SAndroid Build Coastguard Worker if (new_set == NULL)
2026*c9945492SAndroid Build Coastguard Worker return NULL;
2027*c9945492SAndroid Build Coastguard Worker
2028*c9945492SAndroid Build Coastguard Worker new_set[0].position = -1;
2029*c9945492SAndroid Build Coastguard Worker new_set[0].code_min = -1;
2030*c9945492SAndroid Build Coastguard Worker new_set[0].code_max = -1;
2031*c9945492SAndroid Build Coastguard Worker
2032*c9945492SAndroid Build Coastguard Worker return new_set;
2033*c9945492SAndroid Build Coastguard Worker }
2034*c9945492SAndroid Build Coastguard Worker
2035*c9945492SAndroid Build Coastguard Worker static tre_pos_and_tags_t *
tre_set_one(tre_mem_t mem,int position,int code_min,int code_max,tre_ctype_t class,tre_ctype_t * neg_classes,int backref)2036*c9945492SAndroid Build Coastguard Worker tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2037*c9945492SAndroid Build Coastguard Worker tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2038*c9945492SAndroid Build Coastguard Worker {
2039*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *new_set;
2040*c9945492SAndroid Build Coastguard Worker
2041*c9945492SAndroid Build Coastguard Worker new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2042*c9945492SAndroid Build Coastguard Worker if (new_set == NULL)
2043*c9945492SAndroid Build Coastguard Worker return NULL;
2044*c9945492SAndroid Build Coastguard Worker
2045*c9945492SAndroid Build Coastguard Worker new_set[0].position = position;
2046*c9945492SAndroid Build Coastguard Worker new_set[0].code_min = code_min;
2047*c9945492SAndroid Build Coastguard Worker new_set[0].code_max = code_max;
2048*c9945492SAndroid Build Coastguard Worker new_set[0].class = class;
2049*c9945492SAndroid Build Coastguard Worker new_set[0].neg_classes = neg_classes;
2050*c9945492SAndroid Build Coastguard Worker new_set[0].backref = backref;
2051*c9945492SAndroid Build Coastguard Worker new_set[1].position = -1;
2052*c9945492SAndroid Build Coastguard Worker new_set[1].code_min = -1;
2053*c9945492SAndroid Build Coastguard Worker new_set[1].code_max = -1;
2054*c9945492SAndroid Build Coastguard Worker
2055*c9945492SAndroid Build Coastguard Worker return new_set;
2056*c9945492SAndroid Build Coastguard Worker }
2057*c9945492SAndroid Build Coastguard Worker
2058*c9945492SAndroid Build Coastguard Worker static tre_pos_and_tags_t *
tre_set_union(tre_mem_t mem,tre_pos_and_tags_t * set1,tre_pos_and_tags_t * set2,int * tags,int assertions)2059*c9945492SAndroid Build Coastguard Worker tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2060*c9945492SAndroid Build Coastguard Worker int *tags, int assertions)
2061*c9945492SAndroid Build Coastguard Worker {
2062*c9945492SAndroid Build Coastguard Worker int s1, s2, i, j;
2063*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *new_set;
2064*c9945492SAndroid Build Coastguard Worker int *new_tags;
2065*c9945492SAndroid Build Coastguard Worker int num_tags;
2066*c9945492SAndroid Build Coastguard Worker
2067*c9945492SAndroid Build Coastguard Worker for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2068*c9945492SAndroid Build Coastguard Worker for (s1 = 0; set1[s1].position >= 0; s1++);
2069*c9945492SAndroid Build Coastguard Worker for (s2 = 0; set2[s2].position >= 0; s2++);
2070*c9945492SAndroid Build Coastguard Worker new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2071*c9945492SAndroid Build Coastguard Worker if (!new_set )
2072*c9945492SAndroid Build Coastguard Worker return NULL;
2073*c9945492SAndroid Build Coastguard Worker
2074*c9945492SAndroid Build Coastguard Worker for (s1 = 0; set1[s1].position >= 0; s1++)
2075*c9945492SAndroid Build Coastguard Worker {
2076*c9945492SAndroid Build Coastguard Worker new_set[s1].position = set1[s1].position;
2077*c9945492SAndroid Build Coastguard Worker new_set[s1].code_min = set1[s1].code_min;
2078*c9945492SAndroid Build Coastguard Worker new_set[s1].code_max = set1[s1].code_max;
2079*c9945492SAndroid Build Coastguard Worker new_set[s1].assertions = set1[s1].assertions | assertions;
2080*c9945492SAndroid Build Coastguard Worker new_set[s1].class = set1[s1].class;
2081*c9945492SAndroid Build Coastguard Worker new_set[s1].neg_classes = set1[s1].neg_classes;
2082*c9945492SAndroid Build Coastguard Worker new_set[s1].backref = set1[s1].backref;
2083*c9945492SAndroid Build Coastguard Worker if (set1[s1].tags == NULL && tags == NULL)
2084*c9945492SAndroid Build Coastguard Worker new_set[s1].tags = NULL;
2085*c9945492SAndroid Build Coastguard Worker else
2086*c9945492SAndroid Build Coastguard Worker {
2087*c9945492SAndroid Build Coastguard Worker for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2088*c9945492SAndroid Build Coastguard Worker new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2089*c9945492SAndroid Build Coastguard Worker * (i + num_tags + 1)));
2090*c9945492SAndroid Build Coastguard Worker if (new_tags == NULL)
2091*c9945492SAndroid Build Coastguard Worker return NULL;
2092*c9945492SAndroid Build Coastguard Worker for (j = 0; j < i; j++)
2093*c9945492SAndroid Build Coastguard Worker new_tags[j] = set1[s1].tags[j];
2094*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
2095*c9945492SAndroid Build Coastguard Worker new_tags[j + i] = tags[i];
2096*c9945492SAndroid Build Coastguard Worker new_tags[j + i] = -1;
2097*c9945492SAndroid Build Coastguard Worker new_set[s1].tags = new_tags;
2098*c9945492SAndroid Build Coastguard Worker }
2099*c9945492SAndroid Build Coastguard Worker }
2100*c9945492SAndroid Build Coastguard Worker
2101*c9945492SAndroid Build Coastguard Worker for (s2 = 0; set2[s2].position >= 0; s2++)
2102*c9945492SAndroid Build Coastguard Worker {
2103*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].position = set2[s2].position;
2104*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].code_min = set2[s2].code_min;
2105*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].code_max = set2[s2].code_max;
2106*c9945492SAndroid Build Coastguard Worker /* XXX - why not | assertions here as well? */
2107*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].assertions = set2[s2].assertions;
2108*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].class = set2[s2].class;
2109*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2110*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].backref = set2[s2].backref;
2111*c9945492SAndroid Build Coastguard Worker if (set2[s2].tags == NULL)
2112*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].tags = NULL;
2113*c9945492SAndroid Build Coastguard Worker else
2114*c9945492SAndroid Build Coastguard Worker {
2115*c9945492SAndroid Build Coastguard Worker for (i = 0; set2[s2].tags[i] >= 0; i++);
2116*c9945492SAndroid Build Coastguard Worker new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2117*c9945492SAndroid Build Coastguard Worker if (new_tags == NULL)
2118*c9945492SAndroid Build Coastguard Worker return NULL;
2119*c9945492SAndroid Build Coastguard Worker for (j = 0; j < i; j++)
2120*c9945492SAndroid Build Coastguard Worker new_tags[j] = set2[s2].tags[j];
2121*c9945492SAndroid Build Coastguard Worker new_tags[j] = -1;
2122*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].tags = new_tags;
2123*c9945492SAndroid Build Coastguard Worker }
2124*c9945492SAndroid Build Coastguard Worker }
2125*c9945492SAndroid Build Coastguard Worker new_set[s1 + s2].position = -1;
2126*c9945492SAndroid Build Coastguard Worker return new_set;
2127*c9945492SAndroid Build Coastguard Worker }
2128*c9945492SAndroid Build Coastguard Worker
2129*c9945492SAndroid Build Coastguard Worker /* Finds the empty path through `node' which is the one that should be
2130*c9945492SAndroid Build Coastguard Worker taken according to POSIX.2 rules, and adds the tags on that path to
2131*c9945492SAndroid Build Coastguard Worker `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2132*c9945492SAndroid Build Coastguard Worker set to the number of tags seen on the path. */
2133*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_match_empty(tre_stack_t * stack,tre_ast_node_t * node,int * tags,int * assertions,int * num_tags_seen)2134*c9945492SAndroid Build Coastguard Worker tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2135*c9945492SAndroid Build Coastguard Worker int *assertions, int *num_tags_seen)
2136*c9945492SAndroid Build Coastguard Worker {
2137*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit;
2138*c9945492SAndroid Build Coastguard Worker tre_union_t *uni;
2139*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat;
2140*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter;
2141*c9945492SAndroid Build Coastguard Worker int i;
2142*c9945492SAndroid Build Coastguard Worker int bottom = tre_stack_num_objects(stack);
2143*c9945492SAndroid Build Coastguard Worker reg_errcode_t status = REG_OK;
2144*c9945492SAndroid Build Coastguard Worker if (num_tags_seen)
2145*c9945492SAndroid Build Coastguard Worker *num_tags_seen = 0;
2146*c9945492SAndroid Build Coastguard Worker
2147*c9945492SAndroid Build Coastguard Worker status = tre_stack_push_voidptr(stack, node);
2148*c9945492SAndroid Build Coastguard Worker
2149*c9945492SAndroid Build Coastguard Worker /* Walk through the tree recursively. */
2150*c9945492SAndroid Build Coastguard Worker while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2151*c9945492SAndroid Build Coastguard Worker {
2152*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
2153*c9945492SAndroid Build Coastguard Worker
2154*c9945492SAndroid Build Coastguard Worker switch (node->type)
2155*c9945492SAndroid Build Coastguard Worker {
2156*c9945492SAndroid Build Coastguard Worker case LITERAL:
2157*c9945492SAndroid Build Coastguard Worker lit = (tre_literal_t *)node->obj;
2158*c9945492SAndroid Build Coastguard Worker switch (lit->code_min)
2159*c9945492SAndroid Build Coastguard Worker {
2160*c9945492SAndroid Build Coastguard Worker case TAG:
2161*c9945492SAndroid Build Coastguard Worker if (lit->code_max >= 0)
2162*c9945492SAndroid Build Coastguard Worker {
2163*c9945492SAndroid Build Coastguard Worker if (tags != NULL)
2164*c9945492SAndroid Build Coastguard Worker {
2165*c9945492SAndroid Build Coastguard Worker /* Add the tag to `tags'. */
2166*c9945492SAndroid Build Coastguard Worker for (i = 0; tags[i] >= 0; i++)
2167*c9945492SAndroid Build Coastguard Worker if (tags[i] == lit->code_max)
2168*c9945492SAndroid Build Coastguard Worker break;
2169*c9945492SAndroid Build Coastguard Worker if (tags[i] < 0)
2170*c9945492SAndroid Build Coastguard Worker {
2171*c9945492SAndroid Build Coastguard Worker tags[i] = lit->code_max;
2172*c9945492SAndroid Build Coastguard Worker tags[i + 1] = -1;
2173*c9945492SAndroid Build Coastguard Worker }
2174*c9945492SAndroid Build Coastguard Worker }
2175*c9945492SAndroid Build Coastguard Worker if (num_tags_seen)
2176*c9945492SAndroid Build Coastguard Worker (*num_tags_seen)++;
2177*c9945492SAndroid Build Coastguard Worker }
2178*c9945492SAndroid Build Coastguard Worker break;
2179*c9945492SAndroid Build Coastguard Worker case ASSERTION:
2180*c9945492SAndroid Build Coastguard Worker assert(lit->code_max >= 1
2181*c9945492SAndroid Build Coastguard Worker || lit->code_max <= ASSERT_LAST);
2182*c9945492SAndroid Build Coastguard Worker if (assertions != NULL)
2183*c9945492SAndroid Build Coastguard Worker *assertions |= lit->code_max;
2184*c9945492SAndroid Build Coastguard Worker break;
2185*c9945492SAndroid Build Coastguard Worker case EMPTY:
2186*c9945492SAndroid Build Coastguard Worker break;
2187*c9945492SAndroid Build Coastguard Worker default:
2188*c9945492SAndroid Build Coastguard Worker assert(0);
2189*c9945492SAndroid Build Coastguard Worker break;
2190*c9945492SAndroid Build Coastguard Worker }
2191*c9945492SAndroid Build Coastguard Worker break;
2192*c9945492SAndroid Build Coastguard Worker
2193*c9945492SAndroid Build Coastguard Worker case UNION:
2194*c9945492SAndroid Build Coastguard Worker /* Subexpressions starting earlier take priority over ones
2195*c9945492SAndroid Build Coastguard Worker starting later, so we prefer the left subexpression over the
2196*c9945492SAndroid Build Coastguard Worker right subexpression. */
2197*c9945492SAndroid Build Coastguard Worker uni = (tre_union_t *)node->obj;
2198*c9945492SAndroid Build Coastguard Worker if (uni->left->nullable)
2199*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->left)
2200*c9945492SAndroid Build Coastguard Worker else if (uni->right->nullable)
2201*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, uni->right)
2202*c9945492SAndroid Build Coastguard Worker else
2203*c9945492SAndroid Build Coastguard Worker assert(0);
2204*c9945492SAndroid Build Coastguard Worker break;
2205*c9945492SAndroid Build Coastguard Worker
2206*c9945492SAndroid Build Coastguard Worker case CATENATION:
2207*c9945492SAndroid Build Coastguard Worker /* The path must go through both children. */
2208*c9945492SAndroid Build Coastguard Worker cat = (tre_catenation_t *)node->obj;
2209*c9945492SAndroid Build Coastguard Worker assert(cat->left->nullable);
2210*c9945492SAndroid Build Coastguard Worker assert(cat->right->nullable);
2211*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->left);
2212*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, cat->right);
2213*c9945492SAndroid Build Coastguard Worker break;
2214*c9945492SAndroid Build Coastguard Worker
2215*c9945492SAndroid Build Coastguard Worker case ITERATION:
2216*c9945492SAndroid Build Coastguard Worker /* A match with an empty string is preferred over no match at
2217*c9945492SAndroid Build Coastguard Worker all, so we go through the argument if possible. */
2218*c9945492SAndroid Build Coastguard Worker iter = (tre_iteration_t *)node->obj;
2219*c9945492SAndroid Build Coastguard Worker if (iter->arg->nullable)
2220*c9945492SAndroid Build Coastguard Worker STACK_PUSHX(stack, voidptr, iter->arg);
2221*c9945492SAndroid Build Coastguard Worker break;
2222*c9945492SAndroid Build Coastguard Worker
2223*c9945492SAndroid Build Coastguard Worker default:
2224*c9945492SAndroid Build Coastguard Worker assert(0);
2225*c9945492SAndroid Build Coastguard Worker break;
2226*c9945492SAndroid Build Coastguard Worker }
2227*c9945492SAndroid Build Coastguard Worker }
2228*c9945492SAndroid Build Coastguard Worker
2229*c9945492SAndroid Build Coastguard Worker return status;
2230*c9945492SAndroid Build Coastguard Worker }
2231*c9945492SAndroid Build Coastguard Worker
2232*c9945492SAndroid Build Coastguard Worker
2233*c9945492SAndroid Build Coastguard Worker typedef enum {
2234*c9945492SAndroid Build Coastguard Worker NFL_RECURSE,
2235*c9945492SAndroid Build Coastguard Worker NFL_POST_UNION,
2236*c9945492SAndroid Build Coastguard Worker NFL_POST_CATENATION,
2237*c9945492SAndroid Build Coastguard Worker NFL_POST_ITERATION
2238*c9945492SAndroid Build Coastguard Worker } tre_nfl_stack_symbol_t;
2239*c9945492SAndroid Build Coastguard Worker
2240*c9945492SAndroid Build Coastguard Worker
2241*c9945492SAndroid Build Coastguard Worker /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2242*c9945492SAndroid Build Coastguard Worker the nodes of the AST `tree'. */
2243*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_compute_nfl(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree)2244*c9945492SAndroid Build Coastguard Worker tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2245*c9945492SAndroid Build Coastguard Worker {
2246*c9945492SAndroid Build Coastguard Worker int bottom = tre_stack_num_objects(stack);
2247*c9945492SAndroid Build Coastguard Worker
2248*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, tree);
2249*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2250*c9945492SAndroid Build Coastguard Worker
2251*c9945492SAndroid Build Coastguard Worker while (tre_stack_num_objects(stack) > bottom)
2252*c9945492SAndroid Build Coastguard Worker {
2253*c9945492SAndroid Build Coastguard Worker tre_nfl_stack_symbol_t symbol;
2254*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *node;
2255*c9945492SAndroid Build Coastguard Worker
2256*c9945492SAndroid Build Coastguard Worker symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2257*c9945492SAndroid Build Coastguard Worker node = tre_stack_pop_voidptr(stack);
2258*c9945492SAndroid Build Coastguard Worker switch (symbol)
2259*c9945492SAndroid Build Coastguard Worker {
2260*c9945492SAndroid Build Coastguard Worker case NFL_RECURSE:
2261*c9945492SAndroid Build Coastguard Worker switch (node->type)
2262*c9945492SAndroid Build Coastguard Worker {
2263*c9945492SAndroid Build Coastguard Worker case LITERAL:
2264*c9945492SAndroid Build Coastguard Worker {
2265*c9945492SAndroid Build Coastguard Worker tre_literal_t *lit = (tre_literal_t *)node->obj;
2266*c9945492SAndroid Build Coastguard Worker if (IS_BACKREF(lit))
2267*c9945492SAndroid Build Coastguard Worker {
2268*c9945492SAndroid Build Coastguard Worker /* Back references: nullable = false, firstpos = {i},
2269*c9945492SAndroid Build Coastguard Worker lastpos = {i}. */
2270*c9945492SAndroid Build Coastguard Worker node->nullable = 0;
2271*c9945492SAndroid Build Coastguard Worker node->firstpos = tre_set_one(mem, lit->position, 0,
2272*c9945492SAndroid Build Coastguard Worker TRE_CHAR_MAX, 0, NULL, -1);
2273*c9945492SAndroid Build Coastguard Worker if (!node->firstpos)
2274*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2275*c9945492SAndroid Build Coastguard Worker node->lastpos = tre_set_one(mem, lit->position, 0,
2276*c9945492SAndroid Build Coastguard Worker TRE_CHAR_MAX, 0, NULL,
2277*c9945492SAndroid Build Coastguard Worker (int)lit->code_max);
2278*c9945492SAndroid Build Coastguard Worker if (!node->lastpos)
2279*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2280*c9945492SAndroid Build Coastguard Worker }
2281*c9945492SAndroid Build Coastguard Worker else if (lit->code_min < 0)
2282*c9945492SAndroid Build Coastguard Worker {
2283*c9945492SAndroid Build Coastguard Worker /* Tags, empty strings, params, and zero width assertions:
2284*c9945492SAndroid Build Coastguard Worker nullable = true, firstpos = {}, and lastpos = {}. */
2285*c9945492SAndroid Build Coastguard Worker node->nullable = 1;
2286*c9945492SAndroid Build Coastguard Worker node->firstpos = tre_set_empty(mem);
2287*c9945492SAndroid Build Coastguard Worker if (!node->firstpos)
2288*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2289*c9945492SAndroid Build Coastguard Worker node->lastpos = tre_set_empty(mem);
2290*c9945492SAndroid Build Coastguard Worker if (!node->lastpos)
2291*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2292*c9945492SAndroid Build Coastguard Worker }
2293*c9945492SAndroid Build Coastguard Worker else
2294*c9945492SAndroid Build Coastguard Worker {
2295*c9945492SAndroid Build Coastguard Worker /* Literal at position i: nullable = false, firstpos = {i},
2296*c9945492SAndroid Build Coastguard Worker lastpos = {i}. */
2297*c9945492SAndroid Build Coastguard Worker node->nullable = 0;
2298*c9945492SAndroid Build Coastguard Worker node->firstpos =
2299*c9945492SAndroid Build Coastguard Worker tre_set_one(mem, lit->position, (int)lit->code_min,
2300*c9945492SAndroid Build Coastguard Worker (int)lit->code_max, 0, NULL, -1);
2301*c9945492SAndroid Build Coastguard Worker if (!node->firstpos)
2302*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2303*c9945492SAndroid Build Coastguard Worker node->lastpos = tre_set_one(mem, lit->position,
2304*c9945492SAndroid Build Coastguard Worker (int)lit->code_min,
2305*c9945492SAndroid Build Coastguard Worker (int)lit->code_max,
2306*c9945492SAndroid Build Coastguard Worker lit->class, lit->neg_classes,
2307*c9945492SAndroid Build Coastguard Worker -1);
2308*c9945492SAndroid Build Coastguard Worker if (!node->lastpos)
2309*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2310*c9945492SAndroid Build Coastguard Worker }
2311*c9945492SAndroid Build Coastguard Worker break;
2312*c9945492SAndroid Build Coastguard Worker }
2313*c9945492SAndroid Build Coastguard Worker
2314*c9945492SAndroid Build Coastguard Worker case UNION:
2315*c9945492SAndroid Build Coastguard Worker /* Compute the attributes for the two subtrees, and after that
2316*c9945492SAndroid Build Coastguard Worker for this node. */
2317*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, node);
2318*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_POST_UNION);
2319*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2320*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2321*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2322*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2323*c9945492SAndroid Build Coastguard Worker break;
2324*c9945492SAndroid Build Coastguard Worker
2325*c9945492SAndroid Build Coastguard Worker case CATENATION:
2326*c9945492SAndroid Build Coastguard Worker /* Compute the attributes for the two subtrees, and after that
2327*c9945492SAndroid Build Coastguard Worker for this node. */
2328*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, node);
2329*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2330*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2331*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2332*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2333*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2334*c9945492SAndroid Build Coastguard Worker break;
2335*c9945492SAndroid Build Coastguard Worker
2336*c9945492SAndroid Build Coastguard Worker case ITERATION:
2337*c9945492SAndroid Build Coastguard Worker /* Compute the attributes for the subtree, and after that for
2338*c9945492SAndroid Build Coastguard Worker this node. */
2339*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, node);
2340*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2341*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2342*c9945492SAndroid Build Coastguard Worker STACK_PUSHR(stack, int, NFL_RECURSE);
2343*c9945492SAndroid Build Coastguard Worker break;
2344*c9945492SAndroid Build Coastguard Worker }
2345*c9945492SAndroid Build Coastguard Worker break; /* end case: NFL_RECURSE */
2346*c9945492SAndroid Build Coastguard Worker
2347*c9945492SAndroid Build Coastguard Worker case NFL_POST_UNION:
2348*c9945492SAndroid Build Coastguard Worker {
2349*c9945492SAndroid Build Coastguard Worker tre_union_t *uni = (tre_union_t *)node->obj;
2350*c9945492SAndroid Build Coastguard Worker node->nullable = uni->left->nullable || uni->right->nullable;
2351*c9945492SAndroid Build Coastguard Worker node->firstpos = tre_set_union(mem, uni->left->firstpos,
2352*c9945492SAndroid Build Coastguard Worker uni->right->firstpos, NULL, 0);
2353*c9945492SAndroid Build Coastguard Worker if (!node->firstpos)
2354*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2355*c9945492SAndroid Build Coastguard Worker node->lastpos = tre_set_union(mem, uni->left->lastpos,
2356*c9945492SAndroid Build Coastguard Worker uni->right->lastpos, NULL, 0);
2357*c9945492SAndroid Build Coastguard Worker if (!node->lastpos)
2358*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2359*c9945492SAndroid Build Coastguard Worker break;
2360*c9945492SAndroid Build Coastguard Worker }
2361*c9945492SAndroid Build Coastguard Worker
2362*c9945492SAndroid Build Coastguard Worker case NFL_POST_ITERATION:
2363*c9945492SAndroid Build Coastguard Worker {
2364*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2365*c9945492SAndroid Build Coastguard Worker
2366*c9945492SAndroid Build Coastguard Worker if (iter->min == 0 || iter->arg->nullable)
2367*c9945492SAndroid Build Coastguard Worker node->nullable = 1;
2368*c9945492SAndroid Build Coastguard Worker else
2369*c9945492SAndroid Build Coastguard Worker node->nullable = 0;
2370*c9945492SAndroid Build Coastguard Worker node->firstpos = iter->arg->firstpos;
2371*c9945492SAndroid Build Coastguard Worker node->lastpos = iter->arg->lastpos;
2372*c9945492SAndroid Build Coastguard Worker break;
2373*c9945492SAndroid Build Coastguard Worker }
2374*c9945492SAndroid Build Coastguard Worker
2375*c9945492SAndroid Build Coastguard Worker case NFL_POST_CATENATION:
2376*c9945492SAndroid Build Coastguard Worker {
2377*c9945492SAndroid Build Coastguard Worker int num_tags, *tags, assertions;
2378*c9945492SAndroid Build Coastguard Worker reg_errcode_t status;
2379*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat = node->obj;
2380*c9945492SAndroid Build Coastguard Worker node->nullable = cat->left->nullable && cat->right->nullable;
2381*c9945492SAndroid Build Coastguard Worker
2382*c9945492SAndroid Build Coastguard Worker /* Compute firstpos. */
2383*c9945492SAndroid Build Coastguard Worker if (cat->left->nullable)
2384*c9945492SAndroid Build Coastguard Worker {
2385*c9945492SAndroid Build Coastguard Worker /* The left side matches the empty string. Make a first pass
2386*c9945492SAndroid Build Coastguard Worker with tre_match_empty() to get the number of tags and
2387*c9945492SAndroid Build Coastguard Worker parameters. */
2388*c9945492SAndroid Build Coastguard Worker status = tre_match_empty(stack, cat->left,
2389*c9945492SAndroid Build Coastguard Worker NULL, NULL, &num_tags);
2390*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
2391*c9945492SAndroid Build Coastguard Worker return status;
2392*c9945492SAndroid Build Coastguard Worker /* Allocate arrays for the tags and parameters. */
2393*c9945492SAndroid Build Coastguard Worker tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2394*c9945492SAndroid Build Coastguard Worker if (!tags)
2395*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2396*c9945492SAndroid Build Coastguard Worker tags[0] = -1;
2397*c9945492SAndroid Build Coastguard Worker assertions = 0;
2398*c9945492SAndroid Build Coastguard Worker /* Second pass with tre_mach_empty() to get the list of
2399*c9945492SAndroid Build Coastguard Worker tags and parameters. */
2400*c9945492SAndroid Build Coastguard Worker status = tre_match_empty(stack, cat->left, tags,
2401*c9945492SAndroid Build Coastguard Worker &assertions, NULL);
2402*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
2403*c9945492SAndroid Build Coastguard Worker {
2404*c9945492SAndroid Build Coastguard Worker xfree(tags);
2405*c9945492SAndroid Build Coastguard Worker return status;
2406*c9945492SAndroid Build Coastguard Worker }
2407*c9945492SAndroid Build Coastguard Worker node->firstpos =
2408*c9945492SAndroid Build Coastguard Worker tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2409*c9945492SAndroid Build Coastguard Worker tags, assertions);
2410*c9945492SAndroid Build Coastguard Worker xfree(tags);
2411*c9945492SAndroid Build Coastguard Worker if (!node->firstpos)
2412*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2413*c9945492SAndroid Build Coastguard Worker }
2414*c9945492SAndroid Build Coastguard Worker else
2415*c9945492SAndroid Build Coastguard Worker {
2416*c9945492SAndroid Build Coastguard Worker node->firstpos = cat->left->firstpos;
2417*c9945492SAndroid Build Coastguard Worker }
2418*c9945492SAndroid Build Coastguard Worker
2419*c9945492SAndroid Build Coastguard Worker /* Compute lastpos. */
2420*c9945492SAndroid Build Coastguard Worker if (cat->right->nullable)
2421*c9945492SAndroid Build Coastguard Worker {
2422*c9945492SAndroid Build Coastguard Worker /* The right side matches the empty string. Make a first pass
2423*c9945492SAndroid Build Coastguard Worker with tre_match_empty() to get the number of tags and
2424*c9945492SAndroid Build Coastguard Worker parameters. */
2425*c9945492SAndroid Build Coastguard Worker status = tre_match_empty(stack, cat->right,
2426*c9945492SAndroid Build Coastguard Worker NULL, NULL, &num_tags);
2427*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
2428*c9945492SAndroid Build Coastguard Worker return status;
2429*c9945492SAndroid Build Coastguard Worker /* Allocate arrays for the tags and parameters. */
2430*c9945492SAndroid Build Coastguard Worker tags = xmalloc(sizeof(int) * (num_tags + 1));
2431*c9945492SAndroid Build Coastguard Worker if (!tags)
2432*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2433*c9945492SAndroid Build Coastguard Worker tags[0] = -1;
2434*c9945492SAndroid Build Coastguard Worker assertions = 0;
2435*c9945492SAndroid Build Coastguard Worker /* Second pass with tre_mach_empty() to get the list of
2436*c9945492SAndroid Build Coastguard Worker tags and parameters. */
2437*c9945492SAndroid Build Coastguard Worker status = tre_match_empty(stack, cat->right, tags,
2438*c9945492SAndroid Build Coastguard Worker &assertions, NULL);
2439*c9945492SAndroid Build Coastguard Worker if (status != REG_OK)
2440*c9945492SAndroid Build Coastguard Worker {
2441*c9945492SAndroid Build Coastguard Worker xfree(tags);
2442*c9945492SAndroid Build Coastguard Worker return status;
2443*c9945492SAndroid Build Coastguard Worker }
2444*c9945492SAndroid Build Coastguard Worker node->lastpos =
2445*c9945492SAndroid Build Coastguard Worker tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2446*c9945492SAndroid Build Coastguard Worker tags, assertions);
2447*c9945492SAndroid Build Coastguard Worker xfree(tags);
2448*c9945492SAndroid Build Coastguard Worker if (!node->lastpos)
2449*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2450*c9945492SAndroid Build Coastguard Worker }
2451*c9945492SAndroid Build Coastguard Worker else
2452*c9945492SAndroid Build Coastguard Worker {
2453*c9945492SAndroid Build Coastguard Worker node->lastpos = cat->right->lastpos;
2454*c9945492SAndroid Build Coastguard Worker }
2455*c9945492SAndroid Build Coastguard Worker break;
2456*c9945492SAndroid Build Coastguard Worker }
2457*c9945492SAndroid Build Coastguard Worker
2458*c9945492SAndroid Build Coastguard Worker default:
2459*c9945492SAndroid Build Coastguard Worker assert(0);
2460*c9945492SAndroid Build Coastguard Worker break;
2461*c9945492SAndroid Build Coastguard Worker }
2462*c9945492SAndroid Build Coastguard Worker }
2463*c9945492SAndroid Build Coastguard Worker
2464*c9945492SAndroid Build Coastguard Worker return REG_OK;
2465*c9945492SAndroid Build Coastguard Worker }
2466*c9945492SAndroid Build Coastguard Worker
2467*c9945492SAndroid Build Coastguard Worker
2468*c9945492SAndroid Build Coastguard Worker /* Adds a transition from each position in `p1' to each position in `p2'. */
2469*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_make_trans(tre_pos_and_tags_t * p1,tre_pos_and_tags_t * p2,tre_tnfa_transition_t * transitions,int * counts,int * offs)2470*c9945492SAndroid Build Coastguard Worker tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2471*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *transitions,
2472*c9945492SAndroid Build Coastguard Worker int *counts, int *offs)
2473*c9945492SAndroid Build Coastguard Worker {
2474*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *orig_p2 = p2;
2475*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *trans;
2476*c9945492SAndroid Build Coastguard Worker int i, j, k, l, dup, prev_p2_pos;
2477*c9945492SAndroid Build Coastguard Worker
2478*c9945492SAndroid Build Coastguard Worker if (transitions != NULL)
2479*c9945492SAndroid Build Coastguard Worker while (p1->position >= 0)
2480*c9945492SAndroid Build Coastguard Worker {
2481*c9945492SAndroid Build Coastguard Worker p2 = orig_p2;
2482*c9945492SAndroid Build Coastguard Worker prev_p2_pos = -1;
2483*c9945492SAndroid Build Coastguard Worker while (p2->position >= 0)
2484*c9945492SAndroid Build Coastguard Worker {
2485*c9945492SAndroid Build Coastguard Worker /* Optimization: if this position was already handled, skip it. */
2486*c9945492SAndroid Build Coastguard Worker if (p2->position == prev_p2_pos)
2487*c9945492SAndroid Build Coastguard Worker {
2488*c9945492SAndroid Build Coastguard Worker p2++;
2489*c9945492SAndroid Build Coastguard Worker continue;
2490*c9945492SAndroid Build Coastguard Worker }
2491*c9945492SAndroid Build Coastguard Worker prev_p2_pos = p2->position;
2492*c9945492SAndroid Build Coastguard Worker /* Set `trans' to point to the next unused transition from
2493*c9945492SAndroid Build Coastguard Worker position `p1->position'. */
2494*c9945492SAndroid Build Coastguard Worker trans = transitions + offs[p1->position];
2495*c9945492SAndroid Build Coastguard Worker while (trans->state != NULL)
2496*c9945492SAndroid Build Coastguard Worker {
2497*c9945492SAndroid Build Coastguard Worker #if 0
2498*c9945492SAndroid Build Coastguard Worker /* If we find a previous transition from `p1->position' to
2499*c9945492SAndroid Build Coastguard Worker `p2->position', it is overwritten. This can happen only
2500*c9945492SAndroid Build Coastguard Worker if there are nested loops in the regexp, like in "((a)*)*".
2501*c9945492SAndroid Build Coastguard Worker In POSIX.2 repetition using the outer loop is always
2502*c9945492SAndroid Build Coastguard Worker preferred over using the inner loop. Therefore the
2503*c9945492SAndroid Build Coastguard Worker transition for the inner loop is useless and can be thrown
2504*c9945492SAndroid Build Coastguard Worker away. */
2505*c9945492SAndroid Build Coastguard Worker /* XXX - The same position is used for all nodes in a bracket
2506*c9945492SAndroid Build Coastguard Worker expression, so this optimization cannot be used (it will
2507*c9945492SAndroid Build Coastguard Worker break bracket expressions) unless I figure out a way to
2508*c9945492SAndroid Build Coastguard Worker detect it here. */
2509*c9945492SAndroid Build Coastguard Worker if (trans->state_id == p2->position)
2510*c9945492SAndroid Build Coastguard Worker {
2511*c9945492SAndroid Build Coastguard Worker break;
2512*c9945492SAndroid Build Coastguard Worker }
2513*c9945492SAndroid Build Coastguard Worker #endif
2514*c9945492SAndroid Build Coastguard Worker trans++;
2515*c9945492SAndroid Build Coastguard Worker }
2516*c9945492SAndroid Build Coastguard Worker
2517*c9945492SAndroid Build Coastguard Worker if (trans->state == NULL)
2518*c9945492SAndroid Build Coastguard Worker (trans + 1)->state = NULL;
2519*c9945492SAndroid Build Coastguard Worker /* Use the character ranges, assertions, etc. from `p1' for
2520*c9945492SAndroid Build Coastguard Worker the transition from `p1' to `p2'. */
2521*c9945492SAndroid Build Coastguard Worker trans->code_min = p1->code_min;
2522*c9945492SAndroid Build Coastguard Worker trans->code_max = p1->code_max;
2523*c9945492SAndroid Build Coastguard Worker trans->state = transitions + offs[p2->position];
2524*c9945492SAndroid Build Coastguard Worker trans->state_id = p2->position;
2525*c9945492SAndroid Build Coastguard Worker trans->assertions = p1->assertions | p2->assertions
2526*c9945492SAndroid Build Coastguard Worker | (p1->class ? ASSERT_CHAR_CLASS : 0)
2527*c9945492SAndroid Build Coastguard Worker | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2528*c9945492SAndroid Build Coastguard Worker if (p1->backref >= 0)
2529*c9945492SAndroid Build Coastguard Worker {
2530*c9945492SAndroid Build Coastguard Worker assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2531*c9945492SAndroid Build Coastguard Worker assert(p2->backref < 0);
2532*c9945492SAndroid Build Coastguard Worker trans->u.backref = p1->backref;
2533*c9945492SAndroid Build Coastguard Worker trans->assertions |= ASSERT_BACKREF;
2534*c9945492SAndroid Build Coastguard Worker }
2535*c9945492SAndroid Build Coastguard Worker else
2536*c9945492SAndroid Build Coastguard Worker trans->u.class = p1->class;
2537*c9945492SAndroid Build Coastguard Worker if (p1->neg_classes != NULL)
2538*c9945492SAndroid Build Coastguard Worker {
2539*c9945492SAndroid Build Coastguard Worker for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2540*c9945492SAndroid Build Coastguard Worker trans->neg_classes =
2541*c9945492SAndroid Build Coastguard Worker xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2542*c9945492SAndroid Build Coastguard Worker if (trans->neg_classes == NULL)
2543*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2544*c9945492SAndroid Build Coastguard Worker for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2545*c9945492SAndroid Build Coastguard Worker trans->neg_classes[i] = p1->neg_classes[i];
2546*c9945492SAndroid Build Coastguard Worker trans->neg_classes[i] = (tre_ctype_t)0;
2547*c9945492SAndroid Build Coastguard Worker }
2548*c9945492SAndroid Build Coastguard Worker else
2549*c9945492SAndroid Build Coastguard Worker trans->neg_classes = NULL;
2550*c9945492SAndroid Build Coastguard Worker
2551*c9945492SAndroid Build Coastguard Worker /* Find out how many tags this transition has. */
2552*c9945492SAndroid Build Coastguard Worker i = 0;
2553*c9945492SAndroid Build Coastguard Worker if (p1->tags != NULL)
2554*c9945492SAndroid Build Coastguard Worker while(p1->tags[i] >= 0)
2555*c9945492SAndroid Build Coastguard Worker i++;
2556*c9945492SAndroid Build Coastguard Worker j = 0;
2557*c9945492SAndroid Build Coastguard Worker if (p2->tags != NULL)
2558*c9945492SAndroid Build Coastguard Worker while(p2->tags[j] >= 0)
2559*c9945492SAndroid Build Coastguard Worker j++;
2560*c9945492SAndroid Build Coastguard Worker
2561*c9945492SAndroid Build Coastguard Worker /* If we are overwriting a transition, free the old tag array. */
2562*c9945492SAndroid Build Coastguard Worker if (trans->tags != NULL)
2563*c9945492SAndroid Build Coastguard Worker xfree(trans->tags);
2564*c9945492SAndroid Build Coastguard Worker trans->tags = NULL;
2565*c9945492SAndroid Build Coastguard Worker
2566*c9945492SAndroid Build Coastguard Worker /* If there were any tags, allocate an array and fill it. */
2567*c9945492SAndroid Build Coastguard Worker if (i + j > 0)
2568*c9945492SAndroid Build Coastguard Worker {
2569*c9945492SAndroid Build Coastguard Worker trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2570*c9945492SAndroid Build Coastguard Worker if (!trans->tags)
2571*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2572*c9945492SAndroid Build Coastguard Worker i = 0;
2573*c9945492SAndroid Build Coastguard Worker if (p1->tags != NULL)
2574*c9945492SAndroid Build Coastguard Worker while(p1->tags[i] >= 0)
2575*c9945492SAndroid Build Coastguard Worker {
2576*c9945492SAndroid Build Coastguard Worker trans->tags[i] = p1->tags[i];
2577*c9945492SAndroid Build Coastguard Worker i++;
2578*c9945492SAndroid Build Coastguard Worker }
2579*c9945492SAndroid Build Coastguard Worker l = i;
2580*c9945492SAndroid Build Coastguard Worker j = 0;
2581*c9945492SAndroid Build Coastguard Worker if (p2->tags != NULL)
2582*c9945492SAndroid Build Coastguard Worker while (p2->tags[j] >= 0)
2583*c9945492SAndroid Build Coastguard Worker {
2584*c9945492SAndroid Build Coastguard Worker /* Don't add duplicates. */
2585*c9945492SAndroid Build Coastguard Worker dup = 0;
2586*c9945492SAndroid Build Coastguard Worker for (k = 0; k < i; k++)
2587*c9945492SAndroid Build Coastguard Worker if (trans->tags[k] == p2->tags[j])
2588*c9945492SAndroid Build Coastguard Worker {
2589*c9945492SAndroid Build Coastguard Worker dup = 1;
2590*c9945492SAndroid Build Coastguard Worker break;
2591*c9945492SAndroid Build Coastguard Worker }
2592*c9945492SAndroid Build Coastguard Worker if (!dup)
2593*c9945492SAndroid Build Coastguard Worker trans->tags[l++] = p2->tags[j];
2594*c9945492SAndroid Build Coastguard Worker j++;
2595*c9945492SAndroid Build Coastguard Worker }
2596*c9945492SAndroid Build Coastguard Worker trans->tags[l] = -1;
2597*c9945492SAndroid Build Coastguard Worker }
2598*c9945492SAndroid Build Coastguard Worker
2599*c9945492SAndroid Build Coastguard Worker p2++;
2600*c9945492SAndroid Build Coastguard Worker }
2601*c9945492SAndroid Build Coastguard Worker p1++;
2602*c9945492SAndroid Build Coastguard Worker }
2603*c9945492SAndroid Build Coastguard Worker else
2604*c9945492SAndroid Build Coastguard Worker /* Compute a maximum limit for the number of transitions leaving
2605*c9945492SAndroid Build Coastguard Worker from each state. */
2606*c9945492SAndroid Build Coastguard Worker while (p1->position >= 0)
2607*c9945492SAndroid Build Coastguard Worker {
2608*c9945492SAndroid Build Coastguard Worker p2 = orig_p2;
2609*c9945492SAndroid Build Coastguard Worker while (p2->position >= 0)
2610*c9945492SAndroid Build Coastguard Worker {
2611*c9945492SAndroid Build Coastguard Worker counts[p1->position]++;
2612*c9945492SAndroid Build Coastguard Worker p2++;
2613*c9945492SAndroid Build Coastguard Worker }
2614*c9945492SAndroid Build Coastguard Worker p1++;
2615*c9945492SAndroid Build Coastguard Worker }
2616*c9945492SAndroid Build Coastguard Worker return REG_OK;
2617*c9945492SAndroid Build Coastguard Worker }
2618*c9945492SAndroid Build Coastguard Worker
2619*c9945492SAndroid Build Coastguard Worker /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2620*c9945492SAndroid Build Coastguard Worker labelled with one character range (there are no transitions on empty
2621*c9945492SAndroid Build Coastguard Worker strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2622*c9945492SAndroid Build Coastguard Worker the regexp. */
2623*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_ast_to_tnfa(tre_ast_node_t * node,tre_tnfa_transition_t * transitions,int * counts,int * offs)2624*c9945492SAndroid Build Coastguard Worker tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2625*c9945492SAndroid Build Coastguard Worker int *counts, int *offs)
2626*c9945492SAndroid Build Coastguard Worker {
2627*c9945492SAndroid Build Coastguard Worker tre_union_t *uni;
2628*c9945492SAndroid Build Coastguard Worker tre_catenation_t *cat;
2629*c9945492SAndroid Build Coastguard Worker tre_iteration_t *iter;
2630*c9945492SAndroid Build Coastguard Worker reg_errcode_t errcode = REG_OK;
2631*c9945492SAndroid Build Coastguard Worker
2632*c9945492SAndroid Build Coastguard Worker /* XXX - recurse using a stack!. */
2633*c9945492SAndroid Build Coastguard Worker switch (node->type)
2634*c9945492SAndroid Build Coastguard Worker {
2635*c9945492SAndroid Build Coastguard Worker case LITERAL:
2636*c9945492SAndroid Build Coastguard Worker break;
2637*c9945492SAndroid Build Coastguard Worker case UNION:
2638*c9945492SAndroid Build Coastguard Worker uni = (tre_union_t *)node->obj;
2639*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2640*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2641*c9945492SAndroid Build Coastguard Worker return errcode;
2642*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2643*c9945492SAndroid Build Coastguard Worker break;
2644*c9945492SAndroid Build Coastguard Worker
2645*c9945492SAndroid Build Coastguard Worker case CATENATION:
2646*c9945492SAndroid Build Coastguard Worker cat = (tre_catenation_t *)node->obj;
2647*c9945492SAndroid Build Coastguard Worker /* Add a transition from each position in cat->left->lastpos
2648*c9945492SAndroid Build Coastguard Worker to each position in cat->right->firstpos. */
2649*c9945492SAndroid Build Coastguard Worker errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2650*c9945492SAndroid Build Coastguard Worker transitions, counts, offs);
2651*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2652*c9945492SAndroid Build Coastguard Worker return errcode;
2653*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2654*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2655*c9945492SAndroid Build Coastguard Worker return errcode;
2656*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2657*c9945492SAndroid Build Coastguard Worker break;
2658*c9945492SAndroid Build Coastguard Worker
2659*c9945492SAndroid Build Coastguard Worker case ITERATION:
2660*c9945492SAndroid Build Coastguard Worker iter = (tre_iteration_t *)node->obj;
2661*c9945492SAndroid Build Coastguard Worker assert(iter->max == -1 || iter->max == 1);
2662*c9945492SAndroid Build Coastguard Worker
2663*c9945492SAndroid Build Coastguard Worker if (iter->max == -1)
2664*c9945492SAndroid Build Coastguard Worker {
2665*c9945492SAndroid Build Coastguard Worker assert(iter->min == 0 || iter->min == 1);
2666*c9945492SAndroid Build Coastguard Worker /* Add a transition from each last position in the iterated
2667*c9945492SAndroid Build Coastguard Worker expression to each first position. */
2668*c9945492SAndroid Build Coastguard Worker errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2669*c9945492SAndroid Build Coastguard Worker transitions, counts, offs);
2670*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2671*c9945492SAndroid Build Coastguard Worker return errcode;
2672*c9945492SAndroid Build Coastguard Worker }
2673*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2674*c9945492SAndroid Build Coastguard Worker break;
2675*c9945492SAndroid Build Coastguard Worker }
2676*c9945492SAndroid Build Coastguard Worker return errcode;
2677*c9945492SAndroid Build Coastguard Worker }
2678*c9945492SAndroid Build Coastguard Worker
2679*c9945492SAndroid Build Coastguard Worker
2680*c9945492SAndroid Build Coastguard Worker #define ERROR_EXIT(err) \
2681*c9945492SAndroid Build Coastguard Worker do \
2682*c9945492SAndroid Build Coastguard Worker { \
2683*c9945492SAndroid Build Coastguard Worker errcode = err; \
2684*c9945492SAndroid Build Coastguard Worker if (/*CONSTCOND*/1) \
2685*c9945492SAndroid Build Coastguard Worker goto error_exit; \
2686*c9945492SAndroid Build Coastguard Worker } \
2687*c9945492SAndroid Build Coastguard Worker while (/*CONSTCOND*/0)
2688*c9945492SAndroid Build Coastguard Worker
2689*c9945492SAndroid Build Coastguard Worker
2690*c9945492SAndroid Build Coastguard Worker int
regcomp(regex_t * restrict preg,const char * restrict regex,int cflags)2691*c9945492SAndroid Build Coastguard Worker regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2692*c9945492SAndroid Build Coastguard Worker {
2693*c9945492SAndroid Build Coastguard Worker tre_stack_t *stack;
2694*c9945492SAndroid Build Coastguard Worker tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2695*c9945492SAndroid Build Coastguard Worker tre_pos_and_tags_t *p;
2696*c9945492SAndroid Build Coastguard Worker int *counts = NULL, *offs = NULL;
2697*c9945492SAndroid Build Coastguard Worker int i, add = 0;
2698*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *transitions, *initial;
2699*c9945492SAndroid Build Coastguard Worker tre_tnfa_t *tnfa = NULL;
2700*c9945492SAndroid Build Coastguard Worker tre_submatch_data_t *submatch_data;
2701*c9945492SAndroid Build Coastguard Worker tre_tag_direction_t *tag_directions = NULL;
2702*c9945492SAndroid Build Coastguard Worker reg_errcode_t errcode;
2703*c9945492SAndroid Build Coastguard Worker tre_mem_t mem;
2704*c9945492SAndroid Build Coastguard Worker
2705*c9945492SAndroid Build Coastguard Worker /* Parse context. */
2706*c9945492SAndroid Build Coastguard Worker tre_parse_ctx_t parse_ctx;
2707*c9945492SAndroid Build Coastguard Worker
2708*c9945492SAndroid Build Coastguard Worker /* Allocate a stack used throughout the compilation process for various
2709*c9945492SAndroid Build Coastguard Worker purposes. */
2710*c9945492SAndroid Build Coastguard Worker stack = tre_stack_new(512, 1024000, 128);
2711*c9945492SAndroid Build Coastguard Worker if (!stack)
2712*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2713*c9945492SAndroid Build Coastguard Worker /* Allocate a fast memory allocator. */
2714*c9945492SAndroid Build Coastguard Worker mem = tre_mem_new();
2715*c9945492SAndroid Build Coastguard Worker if (!mem)
2716*c9945492SAndroid Build Coastguard Worker {
2717*c9945492SAndroid Build Coastguard Worker tre_stack_destroy(stack);
2718*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
2719*c9945492SAndroid Build Coastguard Worker }
2720*c9945492SAndroid Build Coastguard Worker
2721*c9945492SAndroid Build Coastguard Worker /* Parse the regexp. */
2722*c9945492SAndroid Build Coastguard Worker memset(&parse_ctx, 0, sizeof(parse_ctx));
2723*c9945492SAndroid Build Coastguard Worker parse_ctx.mem = mem;
2724*c9945492SAndroid Build Coastguard Worker parse_ctx.stack = stack;
2725*c9945492SAndroid Build Coastguard Worker parse_ctx.start = regex;
2726*c9945492SAndroid Build Coastguard Worker parse_ctx.cflags = cflags;
2727*c9945492SAndroid Build Coastguard Worker parse_ctx.max_backref = -1;
2728*c9945492SAndroid Build Coastguard Worker errcode = tre_parse(&parse_ctx);
2729*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2730*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2731*c9945492SAndroid Build Coastguard Worker preg->re_nsub = parse_ctx.submatch_id - 1;
2732*c9945492SAndroid Build Coastguard Worker tree = parse_ctx.n;
2733*c9945492SAndroid Build Coastguard Worker
2734*c9945492SAndroid Build Coastguard Worker #ifdef TRE_DEBUG
2735*c9945492SAndroid Build Coastguard Worker tre_ast_print(tree);
2736*c9945492SAndroid Build Coastguard Worker #endif /* TRE_DEBUG */
2737*c9945492SAndroid Build Coastguard Worker
2738*c9945492SAndroid Build Coastguard Worker /* Referring to nonexistent subexpressions is illegal. */
2739*c9945492SAndroid Build Coastguard Worker if (parse_ctx.max_backref > (int)preg->re_nsub)
2740*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESUBREG);
2741*c9945492SAndroid Build Coastguard Worker
2742*c9945492SAndroid Build Coastguard Worker /* Allocate the TNFA struct. */
2743*c9945492SAndroid Build Coastguard Worker tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2744*c9945492SAndroid Build Coastguard Worker if (tnfa == NULL)
2745*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2746*c9945492SAndroid Build Coastguard Worker tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2747*c9945492SAndroid Build Coastguard Worker tnfa->have_approx = 0;
2748*c9945492SAndroid Build Coastguard Worker tnfa->num_submatches = parse_ctx.submatch_id;
2749*c9945492SAndroid Build Coastguard Worker
2750*c9945492SAndroid Build Coastguard Worker /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2751*c9945492SAndroid Build Coastguard Worker regexp does not have back references, this can be skipped. */
2752*c9945492SAndroid Build Coastguard Worker if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2753*c9945492SAndroid Build Coastguard Worker {
2754*c9945492SAndroid Build Coastguard Worker
2755*c9945492SAndroid Build Coastguard Worker /* Figure out how many tags we will need. */
2756*c9945492SAndroid Build Coastguard Worker errcode = tre_add_tags(NULL, stack, tree, tnfa);
2757*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2758*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2759*c9945492SAndroid Build Coastguard Worker
2760*c9945492SAndroid Build Coastguard Worker if (tnfa->num_tags > 0)
2761*c9945492SAndroid Build Coastguard Worker {
2762*c9945492SAndroid Build Coastguard Worker tag_directions = xmalloc(sizeof(*tag_directions)
2763*c9945492SAndroid Build Coastguard Worker * (tnfa->num_tags + 1));
2764*c9945492SAndroid Build Coastguard Worker if (tag_directions == NULL)
2765*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2766*c9945492SAndroid Build Coastguard Worker tnfa->tag_directions = tag_directions;
2767*c9945492SAndroid Build Coastguard Worker memset(tag_directions, -1,
2768*c9945492SAndroid Build Coastguard Worker sizeof(*tag_directions) * (tnfa->num_tags + 1));
2769*c9945492SAndroid Build Coastguard Worker }
2770*c9945492SAndroid Build Coastguard Worker tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2771*c9945492SAndroid Build Coastguard Worker sizeof(*tnfa->minimal_tags));
2772*c9945492SAndroid Build Coastguard Worker if (tnfa->minimal_tags == NULL)
2773*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2774*c9945492SAndroid Build Coastguard Worker
2775*c9945492SAndroid Build Coastguard Worker submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2776*c9945492SAndroid Build Coastguard Worker sizeof(*submatch_data));
2777*c9945492SAndroid Build Coastguard Worker if (submatch_data == NULL)
2778*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2779*c9945492SAndroid Build Coastguard Worker tnfa->submatch_data = submatch_data;
2780*c9945492SAndroid Build Coastguard Worker
2781*c9945492SAndroid Build Coastguard Worker errcode = tre_add_tags(mem, stack, tree, tnfa);
2782*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2783*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2784*c9945492SAndroid Build Coastguard Worker
2785*c9945492SAndroid Build Coastguard Worker }
2786*c9945492SAndroid Build Coastguard Worker
2787*c9945492SAndroid Build Coastguard Worker /* Expand iteration nodes. */
2788*c9945492SAndroid Build Coastguard Worker errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2789*c9945492SAndroid Build Coastguard Worker tag_directions);
2790*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2791*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2792*c9945492SAndroid Build Coastguard Worker
2793*c9945492SAndroid Build Coastguard Worker /* Add a dummy node for the final state.
2794*c9945492SAndroid Build Coastguard Worker XXX - For certain patterns this dummy node can be optimized away,
2795*c9945492SAndroid Build Coastguard Worker for example "a*" or "ab*". Figure out a simple way to detect
2796*c9945492SAndroid Build Coastguard Worker this possibility. */
2797*c9945492SAndroid Build Coastguard Worker tmp_ast_l = tree;
2798*c9945492SAndroid Build Coastguard Worker tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2799*c9945492SAndroid Build Coastguard Worker if (tmp_ast_r == NULL)
2800*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2801*c9945492SAndroid Build Coastguard Worker
2802*c9945492SAndroid Build Coastguard Worker tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2803*c9945492SAndroid Build Coastguard Worker if (tree == NULL)
2804*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2805*c9945492SAndroid Build Coastguard Worker
2806*c9945492SAndroid Build Coastguard Worker errcode = tre_compute_nfl(mem, stack, tree);
2807*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2808*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2809*c9945492SAndroid Build Coastguard Worker
2810*c9945492SAndroid Build Coastguard Worker counts = xmalloc(sizeof(int) * parse_ctx.position);
2811*c9945492SAndroid Build Coastguard Worker if (counts == NULL)
2812*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2813*c9945492SAndroid Build Coastguard Worker
2814*c9945492SAndroid Build Coastguard Worker offs = xmalloc(sizeof(int) * parse_ctx.position);
2815*c9945492SAndroid Build Coastguard Worker if (offs == NULL)
2816*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2817*c9945492SAndroid Build Coastguard Worker
2818*c9945492SAndroid Build Coastguard Worker for (i = 0; i < parse_ctx.position; i++)
2819*c9945492SAndroid Build Coastguard Worker counts[i] = 0;
2820*c9945492SAndroid Build Coastguard Worker tre_ast_to_tnfa(tree, NULL, counts, NULL);
2821*c9945492SAndroid Build Coastguard Worker
2822*c9945492SAndroid Build Coastguard Worker add = 0;
2823*c9945492SAndroid Build Coastguard Worker for (i = 0; i < parse_ctx.position; i++)
2824*c9945492SAndroid Build Coastguard Worker {
2825*c9945492SAndroid Build Coastguard Worker offs[i] = add;
2826*c9945492SAndroid Build Coastguard Worker add += counts[i] + 1;
2827*c9945492SAndroid Build Coastguard Worker counts[i] = 0;
2828*c9945492SAndroid Build Coastguard Worker }
2829*c9945492SAndroid Build Coastguard Worker transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2830*c9945492SAndroid Build Coastguard Worker if (transitions == NULL)
2831*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2832*c9945492SAndroid Build Coastguard Worker tnfa->transitions = transitions;
2833*c9945492SAndroid Build Coastguard Worker tnfa->num_transitions = add;
2834*c9945492SAndroid Build Coastguard Worker
2835*c9945492SAndroid Build Coastguard Worker errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2836*c9945492SAndroid Build Coastguard Worker if (errcode != REG_OK)
2837*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(errcode);
2838*c9945492SAndroid Build Coastguard Worker
2839*c9945492SAndroid Build Coastguard Worker tnfa->firstpos_chars = NULL;
2840*c9945492SAndroid Build Coastguard Worker
2841*c9945492SAndroid Build Coastguard Worker p = tree->firstpos;
2842*c9945492SAndroid Build Coastguard Worker i = 0;
2843*c9945492SAndroid Build Coastguard Worker while (p->position >= 0)
2844*c9945492SAndroid Build Coastguard Worker {
2845*c9945492SAndroid Build Coastguard Worker i++;
2846*c9945492SAndroid Build Coastguard Worker p++;
2847*c9945492SAndroid Build Coastguard Worker }
2848*c9945492SAndroid Build Coastguard Worker
2849*c9945492SAndroid Build Coastguard Worker initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2850*c9945492SAndroid Build Coastguard Worker if (initial == NULL)
2851*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2852*c9945492SAndroid Build Coastguard Worker tnfa->initial = initial;
2853*c9945492SAndroid Build Coastguard Worker
2854*c9945492SAndroid Build Coastguard Worker i = 0;
2855*c9945492SAndroid Build Coastguard Worker for (p = tree->firstpos; p->position >= 0; p++)
2856*c9945492SAndroid Build Coastguard Worker {
2857*c9945492SAndroid Build Coastguard Worker initial[i].state = transitions + offs[p->position];
2858*c9945492SAndroid Build Coastguard Worker initial[i].state_id = p->position;
2859*c9945492SAndroid Build Coastguard Worker initial[i].tags = NULL;
2860*c9945492SAndroid Build Coastguard Worker /* Copy the arrays p->tags, and p->params, they are allocated
2861*c9945492SAndroid Build Coastguard Worker from a tre_mem object. */
2862*c9945492SAndroid Build Coastguard Worker if (p->tags)
2863*c9945492SAndroid Build Coastguard Worker {
2864*c9945492SAndroid Build Coastguard Worker int j;
2865*c9945492SAndroid Build Coastguard Worker for (j = 0; p->tags[j] >= 0; j++);
2866*c9945492SAndroid Build Coastguard Worker initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2867*c9945492SAndroid Build Coastguard Worker if (!initial[i].tags)
2868*c9945492SAndroid Build Coastguard Worker ERROR_EXIT(REG_ESPACE);
2869*c9945492SAndroid Build Coastguard Worker memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2870*c9945492SAndroid Build Coastguard Worker }
2871*c9945492SAndroid Build Coastguard Worker initial[i].assertions = p->assertions;
2872*c9945492SAndroid Build Coastguard Worker i++;
2873*c9945492SAndroid Build Coastguard Worker }
2874*c9945492SAndroid Build Coastguard Worker initial[i].state = NULL;
2875*c9945492SAndroid Build Coastguard Worker
2876*c9945492SAndroid Build Coastguard Worker tnfa->num_transitions = add;
2877*c9945492SAndroid Build Coastguard Worker tnfa->final = transitions + offs[tree->lastpos[0].position];
2878*c9945492SAndroid Build Coastguard Worker tnfa->num_states = parse_ctx.position;
2879*c9945492SAndroid Build Coastguard Worker tnfa->cflags = cflags;
2880*c9945492SAndroid Build Coastguard Worker
2881*c9945492SAndroid Build Coastguard Worker tre_mem_destroy(mem);
2882*c9945492SAndroid Build Coastguard Worker tre_stack_destroy(stack);
2883*c9945492SAndroid Build Coastguard Worker xfree(counts);
2884*c9945492SAndroid Build Coastguard Worker xfree(offs);
2885*c9945492SAndroid Build Coastguard Worker
2886*c9945492SAndroid Build Coastguard Worker preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2887*c9945492SAndroid Build Coastguard Worker return REG_OK;
2888*c9945492SAndroid Build Coastguard Worker
2889*c9945492SAndroid Build Coastguard Worker error_exit:
2890*c9945492SAndroid Build Coastguard Worker /* Free everything that was allocated and return the error code. */
2891*c9945492SAndroid Build Coastguard Worker tre_mem_destroy(mem);
2892*c9945492SAndroid Build Coastguard Worker if (stack != NULL)
2893*c9945492SAndroid Build Coastguard Worker tre_stack_destroy(stack);
2894*c9945492SAndroid Build Coastguard Worker if (counts != NULL)
2895*c9945492SAndroid Build Coastguard Worker xfree(counts);
2896*c9945492SAndroid Build Coastguard Worker if (offs != NULL)
2897*c9945492SAndroid Build Coastguard Worker xfree(offs);
2898*c9945492SAndroid Build Coastguard Worker preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2899*c9945492SAndroid Build Coastguard Worker regfree(preg);
2900*c9945492SAndroid Build Coastguard Worker return errcode;
2901*c9945492SAndroid Build Coastguard Worker }
2902*c9945492SAndroid Build Coastguard Worker
2903*c9945492SAndroid Build Coastguard Worker
2904*c9945492SAndroid Build Coastguard Worker
2905*c9945492SAndroid Build Coastguard Worker
2906*c9945492SAndroid Build Coastguard Worker void
regfree(regex_t * preg)2907*c9945492SAndroid Build Coastguard Worker regfree(regex_t *preg)
2908*c9945492SAndroid Build Coastguard Worker {
2909*c9945492SAndroid Build Coastguard Worker tre_tnfa_t *tnfa;
2910*c9945492SAndroid Build Coastguard Worker unsigned int i;
2911*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *trans;
2912*c9945492SAndroid Build Coastguard Worker
2913*c9945492SAndroid Build Coastguard Worker tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2914*c9945492SAndroid Build Coastguard Worker if (!tnfa)
2915*c9945492SAndroid Build Coastguard Worker return;
2916*c9945492SAndroid Build Coastguard Worker
2917*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_transitions; i++)
2918*c9945492SAndroid Build Coastguard Worker if (tnfa->transitions[i].state)
2919*c9945492SAndroid Build Coastguard Worker {
2920*c9945492SAndroid Build Coastguard Worker if (tnfa->transitions[i].tags)
2921*c9945492SAndroid Build Coastguard Worker xfree(tnfa->transitions[i].tags);
2922*c9945492SAndroid Build Coastguard Worker if (tnfa->transitions[i].neg_classes)
2923*c9945492SAndroid Build Coastguard Worker xfree(tnfa->transitions[i].neg_classes);
2924*c9945492SAndroid Build Coastguard Worker }
2925*c9945492SAndroid Build Coastguard Worker if (tnfa->transitions)
2926*c9945492SAndroid Build Coastguard Worker xfree(tnfa->transitions);
2927*c9945492SAndroid Build Coastguard Worker
2928*c9945492SAndroid Build Coastguard Worker if (tnfa->initial)
2929*c9945492SAndroid Build Coastguard Worker {
2930*c9945492SAndroid Build Coastguard Worker for (trans = tnfa->initial; trans->state; trans++)
2931*c9945492SAndroid Build Coastguard Worker {
2932*c9945492SAndroid Build Coastguard Worker if (trans->tags)
2933*c9945492SAndroid Build Coastguard Worker xfree(trans->tags);
2934*c9945492SAndroid Build Coastguard Worker }
2935*c9945492SAndroid Build Coastguard Worker xfree(tnfa->initial);
2936*c9945492SAndroid Build Coastguard Worker }
2937*c9945492SAndroid Build Coastguard Worker
2938*c9945492SAndroid Build Coastguard Worker if (tnfa->submatch_data)
2939*c9945492SAndroid Build Coastguard Worker {
2940*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_submatches; i++)
2941*c9945492SAndroid Build Coastguard Worker if (tnfa->submatch_data[i].parents)
2942*c9945492SAndroid Build Coastguard Worker xfree(tnfa->submatch_data[i].parents);
2943*c9945492SAndroid Build Coastguard Worker xfree(tnfa->submatch_data);
2944*c9945492SAndroid Build Coastguard Worker }
2945*c9945492SAndroid Build Coastguard Worker
2946*c9945492SAndroid Build Coastguard Worker if (tnfa->tag_directions)
2947*c9945492SAndroid Build Coastguard Worker xfree(tnfa->tag_directions);
2948*c9945492SAndroid Build Coastguard Worker if (tnfa->firstpos_chars)
2949*c9945492SAndroid Build Coastguard Worker xfree(tnfa->firstpos_chars);
2950*c9945492SAndroid Build Coastguard Worker if (tnfa->minimal_tags)
2951*c9945492SAndroid Build Coastguard Worker xfree(tnfa->minimal_tags);
2952*c9945492SAndroid Build Coastguard Worker xfree(tnfa);
2953*c9945492SAndroid Build Coastguard Worker }
2954