xref: /aosp_15_r20/external/pcre/src/pcre2_string_utils.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2018-2021 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi /* This module contains internal functions for comparing and finding the length
42*22dc650dSSadaf Ebrahimi of strings. These are used instead of strcmp() etc because the standard
43*22dc650dSSadaf Ebrahimi functions work only on 8-bit data. */
44*22dc650dSSadaf Ebrahimi 
45*22dc650dSSadaf Ebrahimi 
46*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
47*22dc650dSSadaf Ebrahimi #include "config.h"
48*22dc650dSSadaf Ebrahimi #endif
49*22dc650dSSadaf Ebrahimi 
50*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
51*22dc650dSSadaf Ebrahimi 
52*22dc650dSSadaf Ebrahimi 
53*22dc650dSSadaf Ebrahimi /*************************************************
54*22dc650dSSadaf Ebrahimi *    Emulated memmove() for systems without it   *
55*22dc650dSSadaf Ebrahimi *************************************************/
56*22dc650dSSadaf Ebrahimi 
57*22dc650dSSadaf Ebrahimi /* This function can make use of bcopy() if it is available. Otherwise do it by
58*22dc650dSSadaf Ebrahimi steam, as there some non-Unix environments that lack both memmove() and
59*22dc650dSSadaf Ebrahimi bcopy(). */
60*22dc650dSSadaf Ebrahimi 
61*22dc650dSSadaf Ebrahimi #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
62*22dc650dSSadaf Ebrahimi void *
PRIV(memmove)63*22dc650dSSadaf Ebrahimi PRIV(memmove)(void *d, const void *s, size_t n)
64*22dc650dSSadaf Ebrahimi {
65*22dc650dSSadaf Ebrahimi #ifdef HAVE_BCOPY
66*22dc650dSSadaf Ebrahimi bcopy(s, d, n);
67*22dc650dSSadaf Ebrahimi return d;
68*22dc650dSSadaf Ebrahimi #else
69*22dc650dSSadaf Ebrahimi size_t i;
70*22dc650dSSadaf Ebrahimi unsigned char *dest = (unsigned char *)d;
71*22dc650dSSadaf Ebrahimi const unsigned char *src = (const unsigned char *)s;
72*22dc650dSSadaf Ebrahimi if (dest > src)
73*22dc650dSSadaf Ebrahimi   {
74*22dc650dSSadaf Ebrahimi   dest += n;
75*22dc650dSSadaf Ebrahimi   src += n;
76*22dc650dSSadaf Ebrahimi   for (i = 0; i < n; ++i) *(--dest) = *(--src);
77*22dc650dSSadaf Ebrahimi   return (void *)dest;
78*22dc650dSSadaf Ebrahimi   }
79*22dc650dSSadaf Ebrahimi else
80*22dc650dSSadaf Ebrahimi   {
81*22dc650dSSadaf Ebrahimi   for (i = 0; i < n; ++i) *dest++ = *src++;
82*22dc650dSSadaf Ebrahimi   return (void *)(dest - n);
83*22dc650dSSadaf Ebrahimi   }
84*22dc650dSSadaf Ebrahimi #endif   /* not HAVE_BCOPY */
85*22dc650dSSadaf Ebrahimi }
86*22dc650dSSadaf Ebrahimi #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
87*22dc650dSSadaf Ebrahimi 
88*22dc650dSSadaf Ebrahimi 
89*22dc650dSSadaf Ebrahimi /*************************************************
90*22dc650dSSadaf Ebrahimi *    Compare two zero-terminated PCRE2 strings   *
91*22dc650dSSadaf Ebrahimi *************************************************/
92*22dc650dSSadaf Ebrahimi 
93*22dc650dSSadaf Ebrahimi /*
94*22dc650dSSadaf Ebrahimi Arguments:
95*22dc650dSSadaf Ebrahimi   str1        first string
96*22dc650dSSadaf Ebrahimi   str2        second string
97*22dc650dSSadaf Ebrahimi 
98*22dc650dSSadaf Ebrahimi Returns:      0, 1, or -1
99*22dc650dSSadaf Ebrahimi */
100*22dc650dSSadaf Ebrahimi 
101*22dc650dSSadaf Ebrahimi int
PRIV(strcmp)102*22dc650dSSadaf Ebrahimi PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
103*22dc650dSSadaf Ebrahimi {
104*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c1, c2;
105*22dc650dSSadaf Ebrahimi while (*str1 != '\0' || *str2 != '\0')
106*22dc650dSSadaf Ebrahimi   {
107*22dc650dSSadaf Ebrahimi   c1 = *str1++;
108*22dc650dSSadaf Ebrahimi   c2 = *str2++;
109*22dc650dSSadaf Ebrahimi   if (c1 != c2) return ((c1 > c2) << 1) - 1;
110*22dc650dSSadaf Ebrahimi   }
111*22dc650dSSadaf Ebrahimi return 0;
112*22dc650dSSadaf Ebrahimi }
113*22dc650dSSadaf Ebrahimi 
114*22dc650dSSadaf Ebrahimi 
115*22dc650dSSadaf Ebrahimi /*************************************************
116*22dc650dSSadaf Ebrahimi *  Compare zero-terminated PCRE2 & 8-bit strings *
117*22dc650dSSadaf Ebrahimi *************************************************/
118*22dc650dSSadaf Ebrahimi 
119*22dc650dSSadaf Ebrahimi /* As the 8-bit string is almost always a literal, its type is specified as
120*22dc650dSSadaf Ebrahimi const char *.
121*22dc650dSSadaf Ebrahimi 
122*22dc650dSSadaf Ebrahimi Arguments:
123*22dc650dSSadaf Ebrahimi   str1        first string
124*22dc650dSSadaf Ebrahimi   str2        second string
125*22dc650dSSadaf Ebrahimi 
126*22dc650dSSadaf Ebrahimi Returns:      0, 1, or -1
127*22dc650dSSadaf Ebrahimi */
128*22dc650dSSadaf Ebrahimi 
129*22dc650dSSadaf Ebrahimi int
PRIV(strcmp_c8)130*22dc650dSSadaf Ebrahimi PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
131*22dc650dSSadaf Ebrahimi {
132*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c1, c2;
133*22dc650dSSadaf Ebrahimi while (*str1 != '\0' || *str2 != '\0')
134*22dc650dSSadaf Ebrahimi   {
135*22dc650dSSadaf Ebrahimi   c1 = *str1++;
136*22dc650dSSadaf Ebrahimi   c2 = *str2++;
137*22dc650dSSadaf Ebrahimi   if (c1 != c2) return ((c1 > c2) << 1) - 1;
138*22dc650dSSadaf Ebrahimi   }
139*22dc650dSSadaf Ebrahimi return 0;
140*22dc650dSSadaf Ebrahimi }
141*22dc650dSSadaf Ebrahimi 
142*22dc650dSSadaf Ebrahimi 
143*22dc650dSSadaf Ebrahimi /*************************************************
144*22dc650dSSadaf Ebrahimi *    Compare two PCRE2 strings, given a length   *
145*22dc650dSSadaf Ebrahimi *************************************************/
146*22dc650dSSadaf Ebrahimi 
147*22dc650dSSadaf Ebrahimi /*
148*22dc650dSSadaf Ebrahimi Arguments:
149*22dc650dSSadaf Ebrahimi   str1        first string
150*22dc650dSSadaf Ebrahimi   str2        second string
151*22dc650dSSadaf Ebrahimi   len         the length
152*22dc650dSSadaf Ebrahimi 
153*22dc650dSSadaf Ebrahimi Returns:      0, 1, or -1
154*22dc650dSSadaf Ebrahimi */
155*22dc650dSSadaf Ebrahimi 
156*22dc650dSSadaf Ebrahimi int
PRIV(strncmp)157*22dc650dSSadaf Ebrahimi PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
158*22dc650dSSadaf Ebrahimi {
159*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c1, c2;
160*22dc650dSSadaf Ebrahimi for (; len > 0; len--)
161*22dc650dSSadaf Ebrahimi   {
162*22dc650dSSadaf Ebrahimi   c1 = *str1++;
163*22dc650dSSadaf Ebrahimi   c2 = *str2++;
164*22dc650dSSadaf Ebrahimi   if (c1 != c2) return ((c1 > c2) << 1) - 1;
165*22dc650dSSadaf Ebrahimi   }
166*22dc650dSSadaf Ebrahimi return 0;
167*22dc650dSSadaf Ebrahimi }
168*22dc650dSSadaf Ebrahimi 
169*22dc650dSSadaf Ebrahimi 
170*22dc650dSSadaf Ebrahimi /*************************************************
171*22dc650dSSadaf Ebrahimi * Compare PCRE2 string to 8-bit string by length *
172*22dc650dSSadaf Ebrahimi *************************************************/
173*22dc650dSSadaf Ebrahimi 
174*22dc650dSSadaf Ebrahimi /* As the 8-bit string is almost always a literal, its type is specified as
175*22dc650dSSadaf Ebrahimi const char *.
176*22dc650dSSadaf Ebrahimi 
177*22dc650dSSadaf Ebrahimi Arguments:
178*22dc650dSSadaf Ebrahimi   str1        first string
179*22dc650dSSadaf Ebrahimi   str2        second string
180*22dc650dSSadaf Ebrahimi   len         the length
181*22dc650dSSadaf Ebrahimi 
182*22dc650dSSadaf Ebrahimi Returns:      0, 1, or -1
183*22dc650dSSadaf Ebrahimi */
184*22dc650dSSadaf Ebrahimi 
185*22dc650dSSadaf Ebrahimi int
PRIV(strncmp_c8)186*22dc650dSSadaf Ebrahimi PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
187*22dc650dSSadaf Ebrahimi {
188*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c1, c2;
189*22dc650dSSadaf Ebrahimi for (; len > 0; len--)
190*22dc650dSSadaf Ebrahimi   {
191*22dc650dSSadaf Ebrahimi   c1 = *str1++;
192*22dc650dSSadaf Ebrahimi   c2 = *str2++;
193*22dc650dSSadaf Ebrahimi   if (c1 != c2) return ((c1 > c2) << 1) - 1;
194*22dc650dSSadaf Ebrahimi   }
195*22dc650dSSadaf Ebrahimi return 0;
196*22dc650dSSadaf Ebrahimi }
197*22dc650dSSadaf Ebrahimi 
198*22dc650dSSadaf Ebrahimi 
199*22dc650dSSadaf Ebrahimi /*************************************************
200*22dc650dSSadaf Ebrahimi *        Find the length of a PCRE2 string       *
201*22dc650dSSadaf Ebrahimi *************************************************/
202*22dc650dSSadaf Ebrahimi 
203*22dc650dSSadaf Ebrahimi /*
204*22dc650dSSadaf Ebrahimi Argument:    the string
205*22dc650dSSadaf Ebrahimi Returns:     the length
206*22dc650dSSadaf Ebrahimi */
207*22dc650dSSadaf Ebrahimi 
208*22dc650dSSadaf Ebrahimi PCRE2_SIZE
PRIV(strlen)209*22dc650dSSadaf Ebrahimi PRIV(strlen)(PCRE2_SPTR str)
210*22dc650dSSadaf Ebrahimi {
211*22dc650dSSadaf Ebrahimi PCRE2_SIZE c = 0;
212*22dc650dSSadaf Ebrahimi while (*str++ != 0) c++;
213*22dc650dSSadaf Ebrahimi return c;
214*22dc650dSSadaf Ebrahimi }
215*22dc650dSSadaf Ebrahimi 
216*22dc650dSSadaf Ebrahimi 
217*22dc650dSSadaf Ebrahimi /*************************************************
218*22dc650dSSadaf Ebrahimi * Copy 8-bit 0-terminated string to PCRE2 string *
219*22dc650dSSadaf Ebrahimi *************************************************/
220*22dc650dSSadaf Ebrahimi 
221*22dc650dSSadaf Ebrahimi /* Arguments:
222*22dc650dSSadaf Ebrahimi   str1     buffer to receive the string
223*22dc650dSSadaf Ebrahimi   str2     8-bit string to be copied
224*22dc650dSSadaf Ebrahimi 
225*22dc650dSSadaf Ebrahimi Returns:   the number of code units used (excluding trailing zero)
226*22dc650dSSadaf Ebrahimi */
227*22dc650dSSadaf Ebrahimi 
228*22dc650dSSadaf Ebrahimi PCRE2_SIZE
PRIV(strcpy_c8)229*22dc650dSSadaf Ebrahimi PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
230*22dc650dSSadaf Ebrahimi {
231*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *t = str1;
232*22dc650dSSadaf Ebrahimi while (*str2 != 0) *t++ = *str2++;
233*22dc650dSSadaf Ebrahimi *t = 0;
234*22dc650dSSadaf Ebrahimi return t - str1;
235*22dc650dSSadaf Ebrahimi }
236*22dc650dSSadaf Ebrahimi 
237*22dc650dSSadaf Ebrahimi /* End of pcre2_string_utils.c */
238