xref: /aosp_15_r20/external/igt-gpu-tools/lib/uwildmat/uwildmat.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1*d83cc019SAndroid Build Coastguard Worker /* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
2*d83cc019SAndroid Build Coastguard Worker 
3*d83cc019SAndroid Build Coastguard Worker    This provides wild card matching originally used in InterNetNews and is
4*d83cc019SAndroid Build Coastguard Worker    described in https://tools.ietf.org/html/rfc3977#section-4
5*d83cc019SAndroid Build Coastguard Worker 
6*d83cc019SAndroid Build Coastguard Worker    INN licence:
7*d83cc019SAndroid Build Coastguard Worker    INN as a whole and all code contained in it not otherwise marked with
8*d83cc019SAndroid Build Coastguard Worker    different licenses and/or copyrights is covered by the following copyright
9*d83cc019SAndroid Build Coastguard Worker    and license:
10*d83cc019SAndroid Build Coastguard Worker 
11*d83cc019SAndroid Build Coastguard Worker    Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
12*d83cc019SAndroid Build Coastguard Worker    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
13*d83cc019SAndroid Build Coastguard Worker    Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
14*d83cc019SAndroid Build Coastguard Worker    2002, 2003 by The Internet Software Consortium and Rich Salz
15*d83cc019SAndroid Build Coastguard Worker 
16*d83cc019SAndroid Build Coastguard Worker    This code is derived from software contributed to the Internet Software
17*d83cc019SAndroid Build Coastguard Worker    Consortium by Rich Salz.
18*d83cc019SAndroid Build Coastguard Worker 
19*d83cc019SAndroid Build Coastguard Worker    Permission to use, copy, modify, and distribute this software for any
20*d83cc019SAndroid Build Coastguard Worker    purpose with or without fee is hereby granted, provided that the above
21*d83cc019SAndroid Build Coastguard Worker    copyright notice and this permission notice appear in all copies.
22*d83cc019SAndroid Build Coastguard Worker 
23*d83cc019SAndroid Build Coastguard Worker    THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
24*d83cc019SAndroid Build Coastguard Worker    REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
25*d83cc019SAndroid Build Coastguard Worker    MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
26*d83cc019SAndroid Build Coastguard Worker    SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
27*d83cc019SAndroid Build Coastguard Worker    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
28*d83cc019SAndroid Build Coastguard Worker    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
29*d83cc019SAndroid Build Coastguard Worker    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30*d83cc019SAndroid Build Coastguard Worker 
31*d83cc019SAndroid Build Coastguard Worker */
32*d83cc019SAndroid Build Coastguard Worker 
33*d83cc019SAndroid Build Coastguard Worker /*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
34*d83cc019SAndroid Build Coastguard Worker  **
35*d83cc019SAndroid Build Coastguard Worker  **  wildmat pattern matching with Unicode UTF-8 extensions.
36*d83cc019SAndroid Build Coastguard Worker  **
37*d83cc019SAndroid Build Coastguard Worker  **  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
38*d83cc019SAndroid Build Coastguard Worker  **  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
39*d83cc019SAndroid Build Coastguard Worker  **  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
40*d83cc019SAndroid Build Coastguard Worker  **  July 2000; all malformed patterns should now just fail to match anything.)
41*d83cc019SAndroid Build Coastguard Worker  **
42*d83cc019SAndroid Build Coastguard Worker  **  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
43*d83cc019SAndroid Build Coastguard Worker  **  Rich $alz is now <[email protected]>.
44*d83cc019SAndroid Build Coastguard Worker  **
45*d83cc019SAndroid Build Coastguard Worker  **  April, 1991:  Replaced mutually-recursive calls with in-line code for the
46*d83cc019SAndroid Build Coastguard Worker  **  star character.
47*d83cc019SAndroid Build Coastguard Worker  **
48*d83cc019SAndroid Build Coastguard Worker  **  Special thanks to Lars Mathiesen <[email protected]> for the ABORT code.
49*d83cc019SAndroid Build Coastguard Worker  **  This can greatly speed up failing wildcard patterns.  For example:
50*d83cc019SAndroid Build Coastguard Worker  **
51*d83cc019SAndroid Build Coastguard Worker  **	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
52*d83cc019SAndroid Build Coastguard Worker  **	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
53*d83cc019SAndroid Build Coastguard Worker  **	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
54*d83cc019SAndroid Build Coastguard Worker  **
55*d83cc019SAndroid Build Coastguard Worker  **  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
56*d83cc019SAndroid Build Coastguard Worker  **  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
57*d83cc019SAndroid Build Coastguard Worker  **  explanation is from Lars:
58*d83cc019SAndroid Build Coastguard Worker  **
59*d83cc019SAndroid Build Coastguard Worker  **  The precondition that must be fulfilled is that DoMatch will consume at
60*d83cc019SAndroid Build Coastguard Worker  **  least one character in text.  This is true if *p is neither '*' nor '\0'.)
61*d83cc019SAndroid Build Coastguard Worker  **  The last return has ABORT instead of false to avoid quadratic behaviour in
62*d83cc019SAndroid Build Coastguard Worker  **  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
63*d83cc019SAndroid Build Coastguard Worker  **  star-loop has to run to the end of the text; with ABORT only the last one
64*d83cc019SAndroid Build Coastguard Worker  **  does.
65*d83cc019SAndroid Build Coastguard Worker  **
66*d83cc019SAndroid Build Coastguard Worker  **  Once the control of one instance of DoMatch enters the star-loop, that
67*d83cc019SAndroid Build Coastguard Worker  **  instance will return either true or ABORT, and any calling instance will
68*d83cc019SAndroid Build Coastguard Worker  **  therefore return immediately after (without calling recursively again).
69*d83cc019SAndroid Build Coastguard Worker  **  In effect, only one star-loop is ever active.  It would be possible to
70*d83cc019SAndroid Build Coastguard Worker  **  modify the code to maintain this context explicitly, eliminating all
71*d83cc019SAndroid Build Coastguard Worker  **  recursive calls at the cost of some complication and loss of clarity (and
72*d83cc019SAndroid Build Coastguard Worker  **  the ABORT stuff seems to be unclear enough by itself).  I think it would
73*d83cc019SAndroid Build Coastguard Worker  **  be unwise to try to get this into a released version unless you have a
74*d83cc019SAndroid Build Coastguard Worker  **  good test data base to try it out on.
75*d83cc019SAndroid Build Coastguard Worker  **
76*d83cc019SAndroid Build Coastguard Worker  **  June, 1991:  Robert Elz <[email protected]> added minus and close bracket
77*d83cc019SAndroid Build Coastguard Worker  **  handling for character sets.
78*d83cc019SAndroid Build Coastguard Worker  **
79*d83cc019SAndroid Build Coastguard Worker  **  July, 2000:  Largely rewritten by Russ Allbery <[email protected]> to add
80*d83cc019SAndroid Build Coastguard Worker  **  support for ',', '!', and optionally '@' to the core wildmat routine.
81*d83cc019SAndroid Build Coastguard Worker  **  Broke the character class matching into a separate function for clarity
82*d83cc019SAndroid Build Coastguard Worker  **  since it's infrequently used in practice, and added some simple lookahead
83*d83cc019SAndroid Build Coastguard Worker  **  to significantly decrease the recursive calls in the '*' matching code.
84*d83cc019SAndroid Build Coastguard Worker  **  Added support for UTF-8 as the default character set for any high-bit
85*d83cc019SAndroid Build Coastguard Worker  **  characters.
86*d83cc019SAndroid Build Coastguard Worker  **
87*d83cc019SAndroid Build Coastguard Worker  **  For more information on UTF-8, see RFC 3629.
88*d83cc019SAndroid Build Coastguard Worker  **
89*d83cc019SAndroid Build Coastguard Worker  **  Please note that this file is intentionally written so that conditionally
90*d83cc019SAndroid Build Coastguard Worker  **  executed expressions are on separate lines from the condition to
91*d83cc019SAndroid Build Coastguard Worker  **  facilitate analysis of the coverage of the test suite using purecov.
92*d83cc019SAndroid Build Coastguard Worker  **  Please preserve this.  As of March 11, 2001, purecov reports that the
93*d83cc019SAndroid Build Coastguard Worker  **  accompanying test suite achieves 100% coverage of this file.
94*d83cc019SAndroid Build Coastguard Worker  */
95*d83cc019SAndroid Build Coastguard Worker 
96*d83cc019SAndroid Build Coastguard Worker #include <ctype.h>
97*d83cc019SAndroid Build Coastguard Worker #include <string.h>
98*d83cc019SAndroid Build Coastguard Worker #include <stdint.h>
99*d83cc019SAndroid Build Coastguard Worker #include "uwildmat/uwildmat.h"
100*d83cc019SAndroid Build Coastguard Worker 
101*d83cc019SAndroid Build Coastguard Worker #define ABORT -1
102*d83cc019SAndroid Build Coastguard Worker 
103*d83cc019SAndroid Build Coastguard Worker /* Whether or not an octet looks like the start of a UTF-8 character. */
104*d83cc019SAndroid Build Coastguard Worker #define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
105*d83cc019SAndroid Build Coastguard Worker 
106*d83cc019SAndroid Build Coastguard Worker 
107*d83cc019SAndroid Build Coastguard Worker /*
108*d83cc019SAndroid Build Coastguard Worker  **  Determine the length of a non-ASCII character in octets (for advancing
109*d83cc019SAndroid Build Coastguard Worker  **  pointers when skipping over characters).  Takes a pointer to the start of
110*d83cc019SAndroid Build Coastguard Worker  **  the character and to the last octet of the string.  If end is NULL, expect
111*d83cc019SAndroid Build Coastguard Worker  **  the string pointed to by start to be nul-terminated.  If the character is
112*d83cc019SAndroid Build Coastguard Worker  **  malformed UTF-8, return 1 to treat it like an eight-bit local character.
113*d83cc019SAndroid Build Coastguard Worker  */
114*d83cc019SAndroid Build Coastguard Worker static int
utf8_length(const unsigned char * start,const unsigned char * end)115*d83cc019SAndroid Build Coastguard Worker utf8_length(const unsigned char *start, const unsigned char *end)
116*d83cc019SAndroid Build Coastguard Worker {
117*d83cc019SAndroid Build Coastguard Worker 	unsigned char mask = 0x80;
118*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p;
119*d83cc019SAndroid Build Coastguard Worker 	int length = 0;
120*d83cc019SAndroid Build Coastguard Worker 	int left;
121*d83cc019SAndroid Build Coastguard Worker 
122*d83cc019SAndroid Build Coastguard Worker 	for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
123*d83cc019SAndroid Build Coastguard Worker 		length++;
124*d83cc019SAndroid Build Coastguard Worker 	if (length < 2 || length > 6)
125*d83cc019SAndroid Build Coastguard Worker 		return 1;
126*d83cc019SAndroid Build Coastguard Worker 	if (end != NULL && (end - start + 1) < length)
127*d83cc019SAndroid Build Coastguard Worker 		return 1;
128*d83cc019SAndroid Build Coastguard Worker 	left = length - 1;
129*d83cc019SAndroid Build Coastguard Worker 	for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
130*d83cc019SAndroid Build Coastguard Worker 		left--;
131*d83cc019SAndroid Build Coastguard Worker 	return (left == 0) ? length : 1;
132*d83cc019SAndroid Build Coastguard Worker }
133*d83cc019SAndroid Build Coastguard Worker 
134*d83cc019SAndroid Build Coastguard Worker 
135*d83cc019SAndroid Build Coastguard Worker /*
136*d83cc019SAndroid Build Coastguard Worker  **  Check whether a string contains only valid UTF-8 characters.
137*d83cc019SAndroid Build Coastguard Worker  */
138*d83cc019SAndroid Build Coastguard Worker bool
is_valid_utf8(const char * text)139*d83cc019SAndroid Build Coastguard Worker is_valid_utf8(const char *text)
140*d83cc019SAndroid Build Coastguard Worker {
141*d83cc019SAndroid Build Coastguard Worker 	unsigned char mask;
142*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p;
143*d83cc019SAndroid Build Coastguard Worker 	int length;
144*d83cc019SAndroid Build Coastguard Worker 	int left;
145*d83cc019SAndroid Build Coastguard Worker 
146*d83cc019SAndroid Build Coastguard Worker 	for (p = (const unsigned char *)text; *p != '\0';) {
147*d83cc019SAndroid Build Coastguard Worker 		mask = 0x80;
148*d83cc019SAndroid Build Coastguard Worker 		length = 0;
149*d83cc019SAndroid Build Coastguard Worker 
150*d83cc019SAndroid Build Coastguard Worker 		/* Find out the expected length of the character. */
151*d83cc019SAndroid Build Coastguard Worker 		for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
152*d83cc019SAndroid Build Coastguard Worker 			length++;
153*d83cc019SAndroid Build Coastguard Worker 
154*d83cc019SAndroid Build Coastguard Worker 		p++;
155*d83cc019SAndroid Build Coastguard Worker 
156*d83cc019SAndroid Build Coastguard Worker 		/* Valid ASCII. */
157*d83cc019SAndroid Build Coastguard Worker 		if (length == 0)
158*d83cc019SAndroid Build Coastguard Worker 			continue;
159*d83cc019SAndroid Build Coastguard Worker 
160*d83cc019SAndroid Build Coastguard Worker 		/* Invalid length. */
161*d83cc019SAndroid Build Coastguard Worker 		if (length < 2 || length > 6)
162*d83cc019SAndroid Build Coastguard Worker 			return false;
163*d83cc019SAndroid Build Coastguard Worker 
164*d83cc019SAndroid Build Coastguard Worker 		/* Check that each byte looks like 10xxxxxx, except for the first. */
165*d83cc019SAndroid Build Coastguard Worker 		left = length - 1;
166*d83cc019SAndroid Build Coastguard Worker 		for (; left > 0 && (*p & 0xc0) == 0x80; p++)
167*d83cc019SAndroid Build Coastguard Worker 			left--;
168*d83cc019SAndroid Build Coastguard Worker 
169*d83cc019SAndroid Build Coastguard Worker 		if (left > 0)
170*d83cc019SAndroid Build Coastguard Worker 			return false;
171*d83cc019SAndroid Build Coastguard Worker 	}
172*d83cc019SAndroid Build Coastguard Worker 
173*d83cc019SAndroid Build Coastguard Worker 	return true;
174*d83cc019SAndroid Build Coastguard Worker }
175*d83cc019SAndroid Build Coastguard Worker 
176*d83cc019SAndroid Build Coastguard Worker 
177*d83cc019SAndroid Build Coastguard Worker /*
178*d83cc019SAndroid Build Coastguard Worker  **  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
179*d83cc019SAndroid Build Coastguard Worker  **  character and to the last octet of the string, and to a uint32_t into
180*d83cc019SAndroid Build Coastguard Worker  **  which to put the decoded UCS-4 value.  If end is NULL, expect the string
181*d83cc019SAndroid Build Coastguard Worker  **  pointed to by start to be nul-terminated.  Returns the number of octets in
182*d83cc019SAndroid Build Coastguard Worker  **  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
183*d83cc019SAndroid Build Coastguard Worker  **  the decimal value of the first octet; this is wrong, but it will generally
184*d83cc019SAndroid Build Coastguard Worker  **  cause the rest of the wildmat matching to do the right thing for non-UTF-8
185*d83cc019SAndroid Build Coastguard Worker  **  input.
186*d83cc019SAndroid Build Coastguard Worker  */
187*d83cc019SAndroid Build Coastguard Worker static int
utf8_decode(const unsigned char * start,const unsigned char * end,uint32_t * result)188*d83cc019SAndroid Build Coastguard Worker utf8_decode(const unsigned char *start, const unsigned char *end,
189*d83cc019SAndroid Build Coastguard Worker 	    uint32_t *result)
190*d83cc019SAndroid Build Coastguard Worker {
191*d83cc019SAndroid Build Coastguard Worker 	uint32_t value = 0;
192*d83cc019SAndroid Build Coastguard Worker 	int length, i;
193*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p = start;
194*d83cc019SAndroid Build Coastguard Worker 	unsigned char mask;
195*d83cc019SAndroid Build Coastguard Worker 
196*d83cc019SAndroid Build Coastguard Worker 	length = utf8_length(start, end);
197*d83cc019SAndroid Build Coastguard Worker 	if (length < 2) {
198*d83cc019SAndroid Build Coastguard Worker 		*result = *start;
199*d83cc019SAndroid Build Coastguard Worker 		return 1;
200*d83cc019SAndroid Build Coastguard Worker 	}
201*d83cc019SAndroid Build Coastguard Worker 	mask = (1 << (7 - length)) - 1;
202*d83cc019SAndroid Build Coastguard Worker 	value = *p & mask;
203*d83cc019SAndroid Build Coastguard Worker 	p++;
204*d83cc019SAndroid Build Coastguard Worker 	for (i = length - 1; i > 0; i--) {
205*d83cc019SAndroid Build Coastguard Worker 		value = (value << 6) | (*p & 0x3f);
206*d83cc019SAndroid Build Coastguard Worker 		p++;
207*d83cc019SAndroid Build Coastguard Worker 	}
208*d83cc019SAndroid Build Coastguard Worker 	*result = value;
209*d83cc019SAndroid Build Coastguard Worker 	return length;
210*d83cc019SAndroid Build Coastguard Worker }
211*d83cc019SAndroid Build Coastguard Worker 
212*d83cc019SAndroid Build Coastguard Worker 
213*d83cc019SAndroid Build Coastguard Worker /*
214*d83cc019SAndroid Build Coastguard Worker  **  Match a character class against text, a UCS-4 character.  start is a
215*d83cc019SAndroid Build Coastguard Worker  **  pointer to the first character of the character class, end a pointer to
216*d83cc019SAndroid Build Coastguard Worker  **  the last.  Returns whether the class matches that character.
217*d83cc019SAndroid Build Coastguard Worker  */
218*d83cc019SAndroid Build Coastguard Worker static bool
match_class(uint32_t text,const unsigned char * start,const unsigned char * end)219*d83cc019SAndroid Build Coastguard Worker match_class(uint32_t text, const unsigned char *start,
220*d83cc019SAndroid Build Coastguard Worker 	    const unsigned char *end)
221*d83cc019SAndroid Build Coastguard Worker {
222*d83cc019SAndroid Build Coastguard Worker 	bool reversed, allowrange;
223*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p = start;
224*d83cc019SAndroid Build Coastguard Worker 	uint32_t first = 0;
225*d83cc019SAndroid Build Coastguard Worker 	uint32_t last;
226*d83cc019SAndroid Build Coastguard Worker 	uint32_t lc = tolower(text);
227*d83cc019SAndroid Build Coastguard Worker 
228*d83cc019SAndroid Build Coastguard Worker 	/* Check for an inverted character class (starting with ^).  If the
229*d83cc019SAndroid Build Coastguard Worker 	   character matches the character class, we return !reversed; that way,
230*d83cc019SAndroid Build Coastguard Worker 	   we return true if it's a regular character class and false if it's a
231*d83cc019SAndroid Build Coastguard Worker 	   reversed one.  If the character doesn't match, we return reversed. */
232*d83cc019SAndroid Build Coastguard Worker 	reversed = (*p == '^');
233*d83cc019SAndroid Build Coastguard Worker 	if (reversed)
234*d83cc019SAndroid Build Coastguard Worker 		p++;
235*d83cc019SAndroid Build Coastguard Worker 
236*d83cc019SAndroid Build Coastguard Worker 	/* Walk through the character class until we reach the end or find a
237*d83cc019SAndroid Build Coastguard Worker 	   match, handling character ranges as we go.  Only permit a range to
238*d83cc019SAndroid Build Coastguard Worker 	   start when allowrange is true; this allows - to be treated like a
239*d83cc019SAndroid Build Coastguard Worker 	   normal character as the first character of the class and catches
240*d83cc019SAndroid Build Coastguard Worker 	   malformed ranges like a-e-n.  We treat the character at the beginning
241*d83cc019SAndroid Build Coastguard Worker 	   of a range as both a regular member of the class and the beginning of
242*d83cc019SAndroid Build Coastguard Worker 	   the range; this is harmless (although it means that malformed ranges
243*d83cc019SAndroid Build Coastguard Worker 	   like m-a will match m and nothing else). */
244*d83cc019SAndroid Build Coastguard Worker 	allowrange = false;
245*d83cc019SAndroid Build Coastguard Worker 	while (p <= end) {
246*d83cc019SAndroid Build Coastguard Worker 		if (allowrange && *p == '-' && p < end) {
247*d83cc019SAndroid Build Coastguard Worker 			p++;
248*d83cc019SAndroid Build Coastguard Worker 			p += utf8_decode(p, end, &last);
249*d83cc019SAndroid Build Coastguard Worker 			if ((text >= first && text <= last) ||
250*d83cc019SAndroid Build Coastguard Worker 			    (lc >= first && lc <= last))
251*d83cc019SAndroid Build Coastguard Worker 				return !reversed;
252*d83cc019SAndroid Build Coastguard Worker 			allowrange = false;
253*d83cc019SAndroid Build Coastguard Worker 		} else {
254*d83cc019SAndroid Build Coastguard Worker 			p += utf8_decode(p, end, &first);
255*d83cc019SAndroid Build Coastguard Worker 			if (text == first || lc == first)
256*d83cc019SAndroid Build Coastguard Worker 				return !reversed;
257*d83cc019SAndroid Build Coastguard Worker 			allowrange = true;
258*d83cc019SAndroid Build Coastguard Worker 		}
259*d83cc019SAndroid Build Coastguard Worker 	}
260*d83cc019SAndroid Build Coastguard Worker 	return reversed;
261*d83cc019SAndroid Build Coastguard Worker }
262*d83cc019SAndroid Build Coastguard Worker 
263*d83cc019SAndroid Build Coastguard Worker 
264*d83cc019SAndroid Build Coastguard Worker /*
265*d83cc019SAndroid Build Coastguard Worker  **  Match the text against the pattern between start and end.  This is a
266*d83cc019SAndroid Build Coastguard Worker  **  single pattern; a leading ! or @ must already be taken care of, and
267*d83cc019SAndroid Build Coastguard Worker  **  commas must be dealt with outside of this routine.
268*d83cc019SAndroid Build Coastguard Worker  */
269*d83cc019SAndroid Build Coastguard Worker static int
match_pattern(const unsigned char * text,const unsigned char * start,const unsigned char * end)270*d83cc019SAndroid Build Coastguard Worker match_pattern(const unsigned char *text, const unsigned char *start,
271*d83cc019SAndroid Build Coastguard Worker 	      const unsigned char *end)
272*d83cc019SAndroid Build Coastguard Worker {
273*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *q, *endclass;
274*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p = start;
275*d83cc019SAndroid Build Coastguard Worker 	bool ismeta;
276*d83cc019SAndroid Build Coastguard Worker 	int matched, width;
277*d83cc019SAndroid Build Coastguard Worker 	uint32_t c;
278*d83cc019SAndroid Build Coastguard Worker 	unsigned char lc;
279*d83cc019SAndroid Build Coastguard Worker 
280*d83cc019SAndroid Build Coastguard Worker 	for (; p <= end; p++) {
281*d83cc019SAndroid Build Coastguard Worker 		if (!*text && *p != '*')
282*d83cc019SAndroid Build Coastguard Worker 			return ABORT;
283*d83cc019SAndroid Build Coastguard Worker 
284*d83cc019SAndroid Build Coastguard Worker 		switch (*p) {
285*d83cc019SAndroid Build Coastguard Worker 		case '\\':
286*d83cc019SAndroid Build Coastguard Worker 			if (!*++p)
287*d83cc019SAndroid Build Coastguard Worker 				return ABORT;
288*d83cc019SAndroid Build Coastguard Worker 			/* Fall through. */
289*d83cc019SAndroid Build Coastguard Worker 
290*d83cc019SAndroid Build Coastguard Worker 		default:
291*d83cc019SAndroid Build Coastguard Worker 			lc = tolower(*text);
292*d83cc019SAndroid Build Coastguard Worker 			if (*text++ != *p && lc != *p)
293*d83cc019SAndroid Build Coastguard Worker 				return false;
294*d83cc019SAndroid Build Coastguard Worker 			break;
295*d83cc019SAndroid Build Coastguard Worker 
296*d83cc019SAndroid Build Coastguard Worker 		case '?':
297*d83cc019SAndroid Build Coastguard Worker 			text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
298*d83cc019SAndroid Build Coastguard Worker 			break;
299*d83cc019SAndroid Build Coastguard Worker 
300*d83cc019SAndroid Build Coastguard Worker 		case '*':
301*d83cc019SAndroid Build Coastguard Worker 			/* Consecutive stars are equivalent to one.  Advance pattern to
302*d83cc019SAndroid Build Coastguard Worker 			   the character after the star. */
303*d83cc019SAndroid Build Coastguard Worker 			for (++p; *p == '*'; p++)
304*d83cc019SAndroid Build Coastguard Worker 				;
305*d83cc019SAndroid Build Coastguard Worker 
306*d83cc019SAndroid Build Coastguard Worker 			/* A trailing star will match anything. */
307*d83cc019SAndroid Build Coastguard Worker 			if (p > end)
308*d83cc019SAndroid Build Coastguard Worker 				return true;
309*d83cc019SAndroid Build Coastguard Worker 
310*d83cc019SAndroid Build Coastguard Worker 			/* Basic algorithm: Recurse at each point where the * could
311*d83cc019SAndroid Build Coastguard Worker 			   possibly match.  If the match succeeds or aborts, return
312*d83cc019SAndroid Build Coastguard Worker 			   immediately; otherwise, try the next position.
313*d83cc019SAndroid Build Coastguard Worker 
314*d83cc019SAndroid Build Coastguard Worker Optimization: If the character after the * in the pattern
315*d83cc019SAndroid Build Coastguard Worker isn't a metacharacter (the common case), then the * has to
316*d83cc019SAndroid Build Coastguard Worker consume characters at least up to the next occurrence of that
317*d83cc019SAndroid Build Coastguard Worker character in the text.  Scan forward for those points rather
318*d83cc019SAndroid Build Coastguard Worker than recursing at every possible point to save the extra
319*d83cc019SAndroid Build Coastguard Worker function call overhead. */
320*d83cc019SAndroid Build Coastguard Worker 			ismeta = (*p == '[' || *p == '?' || *p == '\\');
321*d83cc019SAndroid Build Coastguard Worker 			while (*text) {
322*d83cc019SAndroid Build Coastguard Worker 				width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
323*d83cc019SAndroid Build Coastguard Worker 				if (ismeta) {
324*d83cc019SAndroid Build Coastguard Worker 					matched = match_pattern(text, p, end);
325*d83cc019SAndroid Build Coastguard Worker 					text += width;
326*d83cc019SAndroid Build Coastguard Worker 				} else {
327*d83cc019SAndroid Build Coastguard Worker 					while (*text && *text != *p) {
328*d83cc019SAndroid Build Coastguard Worker 						text += width;
329*d83cc019SAndroid Build Coastguard Worker 						width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
330*d83cc019SAndroid Build Coastguard Worker 					}
331*d83cc019SAndroid Build Coastguard Worker 					if (!*text)
332*d83cc019SAndroid Build Coastguard Worker 						return ABORT;
333*d83cc019SAndroid Build Coastguard Worker 					matched = match_pattern(++text, p + 1, end);
334*d83cc019SAndroid Build Coastguard Worker 				}
335*d83cc019SAndroid Build Coastguard Worker 				if (matched != false)
336*d83cc019SAndroid Build Coastguard Worker 					return matched;
337*d83cc019SAndroid Build Coastguard Worker 			}
338*d83cc019SAndroid Build Coastguard Worker 			return ABORT;
339*d83cc019SAndroid Build Coastguard Worker 
340*d83cc019SAndroid Build Coastguard Worker 		case '[':
341*d83cc019SAndroid Build Coastguard Worker 			/* Find the end of the character class, making sure not to pick
342*d83cc019SAndroid Build Coastguard Worker 			   up a close bracket at the beginning of the class. */
343*d83cc019SAndroid Build Coastguard Worker 			p++;
344*d83cc019SAndroid Build Coastguard Worker 			q = p + (*p == '^') + 1;
345*d83cc019SAndroid Build Coastguard Worker 			if (q > end)
346*d83cc019SAndroid Build Coastguard Worker 				return ABORT;
347*d83cc019SAndroid Build Coastguard Worker 			endclass = memchr(q, ']', (size_t) (end - q + 1));
348*d83cc019SAndroid Build Coastguard Worker 			if (!endclass)
349*d83cc019SAndroid Build Coastguard Worker 				return ABORT;
350*d83cc019SAndroid Build Coastguard Worker 
351*d83cc019SAndroid Build Coastguard Worker 			/* Do the heavy lifting in another function for clarity, since
352*d83cc019SAndroid Build Coastguard Worker 			   character classes are an uncommon case. */
353*d83cc019SAndroid Build Coastguard Worker 			text += utf8_decode(text, NULL, &c);
354*d83cc019SAndroid Build Coastguard Worker 			if (!match_class(c, p, endclass - 1))
355*d83cc019SAndroid Build Coastguard Worker 				return false;
356*d83cc019SAndroid Build Coastguard Worker 			p = endclass;
357*d83cc019SAndroid Build Coastguard Worker 			break;
358*d83cc019SAndroid Build Coastguard Worker 		}
359*d83cc019SAndroid Build Coastguard Worker 	}
360*d83cc019SAndroid Build Coastguard Worker 
361*d83cc019SAndroid Build Coastguard Worker 	return (*text == '\0');
362*d83cc019SAndroid Build Coastguard Worker }
363*d83cc019SAndroid Build Coastguard Worker 
364*d83cc019SAndroid Build Coastguard Worker 
365*d83cc019SAndroid Build Coastguard Worker /*
366*d83cc019SAndroid Build Coastguard Worker  **  Takes text and a wildmat expression; a wildmat expression is a
367*d83cc019SAndroid Build Coastguard Worker  **  comma-separated list of wildmat patterns, optionally preceded by ! to
368*d83cc019SAndroid Build Coastguard Worker  **  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
369*d83cc019SAndroid Build Coastguard Worker  **  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
370*d83cc019SAndroid Build Coastguard Worker  **  set, allow @ to introduce a poison expression (the same as !, but if it
371*d83cc019SAndroid Build Coastguard Worker  **  triggers the failed match the routine returns UWILDMAT_POISON instead).
372*d83cc019SAndroid Build Coastguard Worker  */
373*d83cc019SAndroid Build Coastguard Worker static enum uwildmat
match_expression(const unsigned char * text,const unsigned char * start,bool allowpoison)374*d83cc019SAndroid Build Coastguard Worker match_expression(const unsigned char *text, const unsigned char *start,
375*d83cc019SAndroid Build Coastguard Worker 		 bool allowpoison)
376*d83cc019SAndroid Build Coastguard Worker {
377*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *end, *split;
378*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *p = start;
379*d83cc019SAndroid Build Coastguard Worker 	bool reverse, escaped;
380*d83cc019SAndroid Build Coastguard Worker 	bool match = false;
381*d83cc019SAndroid Build Coastguard Worker 	bool poison = false;
382*d83cc019SAndroid Build Coastguard Worker 	bool poisoned = false;
383*d83cc019SAndroid Build Coastguard Worker 
384*d83cc019SAndroid Build Coastguard Worker 	/* Handle the empty expression separately, since otherwise end will be
385*d83cc019SAndroid Build Coastguard Worker 	   set to an invalid pointer. */
386*d83cc019SAndroid Build Coastguard Worker 	if (!*p)
387*d83cc019SAndroid Build Coastguard Worker 		return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
388*d83cc019SAndroid Build Coastguard Worker 	end = start + strlen((const char *) start) - 1;
389*d83cc019SAndroid Build Coastguard Worker 
390*d83cc019SAndroid Build Coastguard Worker 	/* Main match loop.  Find each comma that separates patterns, and attempt
391*d83cc019SAndroid Build Coastguard Worker 	   to match the text with each pattern in order.  The last matching
392*d83cc019SAndroid Build Coastguard Worker 	   pattern determines whether the whole expression matches. */
393*d83cc019SAndroid Build Coastguard Worker 	for (; p <= end + 1; p = split + 1) {
394*d83cc019SAndroid Build Coastguard Worker 		if (allowpoison)
395*d83cc019SAndroid Build Coastguard Worker 			poison = (*p == '@');
396*d83cc019SAndroid Build Coastguard Worker 		reverse = (*p == '!') || poison;
397*d83cc019SAndroid Build Coastguard Worker 		if (reverse)
398*d83cc019SAndroid Build Coastguard Worker 			p++;
399*d83cc019SAndroid Build Coastguard Worker 
400*d83cc019SAndroid Build Coastguard Worker 		/* Find the first unescaped comma, if any.  If there is none, split
401*d83cc019SAndroid Build Coastguard Worker 		   will be one greater than end and point at the nul at the end of
402*d83cc019SAndroid Build Coastguard Worker 		   the string. */
403*d83cc019SAndroid Build Coastguard Worker 		for (escaped = false, split = p; split <= end; split++) {
404*d83cc019SAndroid Build Coastguard Worker 			if (*split == '[') {
405*d83cc019SAndroid Build Coastguard Worker 				split++;
406*d83cc019SAndroid Build Coastguard Worker 				if (*split == ']')
407*d83cc019SAndroid Build Coastguard Worker 					split++;
408*d83cc019SAndroid Build Coastguard Worker 				while (split <= end && *split != ']')
409*d83cc019SAndroid Build Coastguard Worker 					split++;
410*d83cc019SAndroid Build Coastguard Worker 			}
411*d83cc019SAndroid Build Coastguard Worker 			if (*split == ',' && !escaped)
412*d83cc019SAndroid Build Coastguard Worker 				break;
413*d83cc019SAndroid Build Coastguard Worker 			escaped = (*split == '\\') ? !escaped : false;
414*d83cc019SAndroid Build Coastguard Worker 		}
415*d83cc019SAndroid Build Coastguard Worker 
416*d83cc019SAndroid Build Coastguard Worker 		/* Optimization: If match == !reverse and poison == poisoned, this
417*d83cc019SAndroid Build Coastguard Worker 		   pattern can't change the result, so don't do any work. */
418*d83cc019SAndroid Build Coastguard Worker 		if (match == !reverse && poison == poisoned)
419*d83cc019SAndroid Build Coastguard Worker 			continue;
420*d83cc019SAndroid Build Coastguard Worker 		if (match_pattern(text, p, split - 1) == true) {
421*d83cc019SAndroid Build Coastguard Worker 			poisoned = poison;
422*d83cc019SAndroid Build Coastguard Worker 			match = !reverse;
423*d83cc019SAndroid Build Coastguard Worker 		}
424*d83cc019SAndroid Build Coastguard Worker 	}
425*d83cc019SAndroid Build Coastguard Worker 	if (poisoned)
426*d83cc019SAndroid Build Coastguard Worker 		return UWILDMAT_POISON;
427*d83cc019SAndroid Build Coastguard Worker 	return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
428*d83cc019SAndroid Build Coastguard Worker }
429*d83cc019SAndroid Build Coastguard Worker 
430*d83cc019SAndroid Build Coastguard Worker 
431*d83cc019SAndroid Build Coastguard Worker /*
432*d83cc019SAndroid Build Coastguard Worker  **  User-level routine used for wildmats where @ should be treated as a
433*d83cc019SAndroid Build Coastguard Worker  **  regular character.
434*d83cc019SAndroid Build Coastguard Worker  */
435*d83cc019SAndroid Build Coastguard Worker bool
uwildmat(const char * text,const char * pat)436*d83cc019SAndroid Build Coastguard Worker uwildmat(const char *text, const char *pat)
437*d83cc019SAndroid Build Coastguard Worker {
438*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *utext = (const unsigned char *) text;
439*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *upat = (const unsigned char *) pat;
440*d83cc019SAndroid Build Coastguard Worker 
441*d83cc019SAndroid Build Coastguard Worker 	if (upat[0] == '*' && upat[1] == '\0')
442*d83cc019SAndroid Build Coastguard Worker 		return true;
443*d83cc019SAndroid Build Coastguard Worker 	else
444*d83cc019SAndroid Build Coastguard Worker 		return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
445*d83cc019SAndroid Build Coastguard Worker }
446*d83cc019SAndroid Build Coastguard Worker 
447*d83cc019SAndroid Build Coastguard Worker 
448*d83cc019SAndroid Build Coastguard Worker /*
449*d83cc019SAndroid Build Coastguard Worker  **  User-level routine used for wildmats that support poison matches.
450*d83cc019SAndroid Build Coastguard Worker  */
451*d83cc019SAndroid Build Coastguard Worker enum uwildmat
uwildmat_poison(const char * text,const char * pat)452*d83cc019SAndroid Build Coastguard Worker uwildmat_poison(const char *text, const char *pat)
453*d83cc019SAndroid Build Coastguard Worker {
454*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *utext = (const unsigned char *) text;
455*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *upat = (const unsigned char *) pat;
456*d83cc019SAndroid Build Coastguard Worker 
457*d83cc019SAndroid Build Coastguard Worker 	if (upat[0] == '*' && upat[1] == '\0')
458*d83cc019SAndroid Build Coastguard Worker 		return UWILDMAT_MATCH;
459*d83cc019SAndroid Build Coastguard Worker 	else
460*d83cc019SAndroid Build Coastguard Worker 		return match_expression(utext, upat, true);
461*d83cc019SAndroid Build Coastguard Worker }
462*d83cc019SAndroid Build Coastguard Worker 
463*d83cc019SAndroid Build Coastguard Worker 
464*d83cc019SAndroid Build Coastguard Worker /*
465*d83cc019SAndroid Build Coastguard Worker  **  User-level routine for simple expressions (neither , nor ! are special).
466*d83cc019SAndroid Build Coastguard Worker  */
467*d83cc019SAndroid Build Coastguard Worker bool
uwildmat_simple(const char * text,const char * pat)468*d83cc019SAndroid Build Coastguard Worker uwildmat_simple(const char *text, const char *pat)
469*d83cc019SAndroid Build Coastguard Worker {
470*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *utext = (const unsigned char *) text;
471*d83cc019SAndroid Build Coastguard Worker 	const unsigned char *upat = (const unsigned char *) pat;
472*d83cc019SAndroid Build Coastguard Worker 	size_t length;
473*d83cc019SAndroid Build Coastguard Worker 
474*d83cc019SAndroid Build Coastguard Worker 	if (upat[0] == '*' && upat[1] == '\0')
475*d83cc019SAndroid Build Coastguard Worker 		return true;
476*d83cc019SAndroid Build Coastguard Worker 	else {
477*d83cc019SAndroid Build Coastguard Worker 		length = strlen(pat);
478*d83cc019SAndroid Build Coastguard Worker 		return (match_pattern(utext, upat, upat + length - 1) == true);
479*d83cc019SAndroid Build Coastguard Worker 	}
480*d83cc019SAndroid Build Coastguard Worker }
481