1*22dc650dSSadaf Ebrahimi /************************************************* 2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions * 3*22dc650dSSadaf Ebrahimi *************************************************/ 4*22dc650dSSadaf Ebrahimi 5*22dc650dSSadaf Ebrahimi /* PCRE2 is a library of functions to support regular expressions whose syntax 6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language. 7*22dc650dSSadaf Ebrahimi 8*22dc650dSSadaf Ebrahimi Written by Philip Hazel 9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge 10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2023 University of Cambridge 11*22dc650dSSadaf Ebrahimi 12*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without 14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met: 15*22dc650dSSadaf Ebrahimi 16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice, 17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer. 18*22dc650dSSadaf Ebrahimi 19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright 20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the 21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution. 22*22dc650dSSadaf Ebrahimi 23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its 24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from 25*22dc650dSSadaf Ebrahimi this software without specific prior written permission. 26*22dc650dSSadaf Ebrahimi 27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE. 38*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 39*22dc650dSSadaf Ebrahimi */ 40*22dc650dSSadaf Ebrahimi 41*22dc650dSSadaf Ebrahimi #ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD 42*22dc650dSSadaf Ebrahimi #define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD 43*22dc650dSSadaf Ebrahimi 44*22dc650dSSadaf Ebrahimi /* We do not support both EBCDIC and Unicode at the same time. The "configure" 45*22dc650dSSadaf Ebrahimi script prevents both being selected, but not everybody uses "configure". EBCDIC 46*22dc650dSSadaf Ebrahimi is only supported for the 8-bit library, but the check for this has to be later 47*22dc650dSSadaf Ebrahimi in this file, because the first part is not width-dependent, and is included by 48*22dc650dSSadaf Ebrahimi pcre2test.c with CODE_UNIT_WIDTH == 0. */ 49*22dc650dSSadaf Ebrahimi 50*22dc650dSSadaf Ebrahimi #if defined EBCDIC && defined SUPPORT_UNICODE 51*22dc650dSSadaf Ebrahimi #error The use of both EBCDIC and SUPPORT_UNICODE is not supported. 52*22dc650dSSadaf Ebrahimi #endif 53*22dc650dSSadaf Ebrahimi 54*22dc650dSSadaf Ebrahimi /* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must 55*22dc650dSSadaf Ebrahimi be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but 56*22dc650dSSadaf Ebrahimi other other building methods may not, so here is a check. It is cut out when 57*22dc650dSSadaf Ebrahimi building pcre2test, bcause that sets the value to zero. No other source should 58*22dc650dSSadaf Ebrahimi be including this file. There is no explicit way of forcing a compile to be 59*22dc650dSSadaf Ebrahimi abandoned, but trying to include a non-existent file seems cleanest. Otherwise 60*22dc650dSSadaf Ebrahimi there will be many irrelevant consequential errors. */ 61*22dc650dSSadaf Ebrahimi 62*22dc650dSSadaf Ebrahimi #if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \ 63*22dc650dSSadaf Ebrahimi (!defined PCRE2_CODE_UNIT_WIDTH || \ 64*22dc650dSSadaf Ebrahimi (PCRE2_CODE_UNIT_WIDTH != 8 && \ 65*22dc650dSSadaf Ebrahimi PCRE2_CODE_UNIT_WIDTH != 16 && \ 66*22dc650dSSadaf Ebrahimi PCRE2_CODE_UNIT_WIDTH != 32)) 67*22dc650dSSadaf Ebrahimi #error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32. 68*22dc650dSSadaf Ebrahimi #include <AbandonCompile> 69*22dc650dSSadaf Ebrahimi #endif 70*22dc650dSSadaf Ebrahimi 71*22dc650dSSadaf Ebrahimi 72*22dc650dSSadaf Ebrahimi /* Standard C headers */ 73*22dc650dSSadaf Ebrahimi 74*22dc650dSSadaf Ebrahimi #include <ctype.h> 75*22dc650dSSadaf Ebrahimi #include <limits.h> 76*22dc650dSSadaf Ebrahimi #include <stddef.h> 77*22dc650dSSadaf Ebrahimi #include <stdio.h> 78*22dc650dSSadaf Ebrahimi #include <stdlib.h> 79*22dc650dSSadaf Ebrahimi #include <string.h> 80*22dc650dSSadaf Ebrahimi 81*22dc650dSSadaf Ebrahimi /* Macros to make boolean values more obvious. The #ifndef is to pacify 82*22dc650dSSadaf Ebrahimi compiler warnings in environments where these macros are defined elsewhere. 83*22dc650dSSadaf Ebrahimi Unfortunately, there is no way to do the same for the typedef. */ 84*22dc650dSSadaf Ebrahimi 85*22dc650dSSadaf Ebrahimi typedef int BOOL; 86*22dc650dSSadaf Ebrahimi #ifndef FALSE 87*22dc650dSSadaf Ebrahimi #define FALSE 0 88*22dc650dSSadaf Ebrahimi #define TRUE 1 89*22dc650dSSadaf Ebrahimi #endif 90*22dc650dSSadaf Ebrahimi 91*22dc650dSSadaf Ebrahimi /* Valgrind (memcheck) support */ 92*22dc650dSSadaf Ebrahimi 93*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_VALGRIND 94*22dc650dSSadaf Ebrahimi #include <valgrind/memcheck.h> 95*22dc650dSSadaf Ebrahimi #endif 96*22dc650dSSadaf Ebrahimi 97*22dc650dSSadaf Ebrahimi /* -ftrivial-auto-var-init support supports initializing all local variables 98*22dc650dSSadaf Ebrahimi to avoid some classes of bug, but this can cause an unacceptable slowdown 99*22dc650dSSadaf Ebrahimi for large on-stack arrays in hot functions. This macro lets us annotate 100*22dc650dSSadaf Ebrahimi such arrays. */ 101*22dc650dSSadaf Ebrahimi 102*22dc650dSSadaf Ebrahimi #ifdef HAVE_ATTRIBUTE_UNINITIALIZED 103*22dc650dSSadaf Ebrahimi #define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized)) 104*22dc650dSSadaf Ebrahimi #else 105*22dc650dSSadaf Ebrahimi #define PCRE2_KEEP_UNINITIALIZED 106*22dc650dSSadaf Ebrahimi #endif 107*22dc650dSSadaf Ebrahimi 108*22dc650dSSadaf Ebrahimi /* Older versions of MSVC lack snprintf(). This define allows for 109*22dc650dSSadaf Ebrahimi warning/error-free compilation and testing with MSVC compilers back to at least 110*22dc650dSSadaf Ebrahimi MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ 111*22dc650dSSadaf Ebrahimi 112*22dc650dSSadaf Ebrahimi #if defined(_MSC_VER) && (_MSC_VER < 1900) 113*22dc650dSSadaf Ebrahimi #define snprintf _snprintf 114*22dc650dSSadaf Ebrahimi #endif 115*22dc650dSSadaf Ebrahimi 116*22dc650dSSadaf Ebrahimi /* When compiling a DLL for Windows, the exported symbols have to be declared 117*22dc650dSSadaf Ebrahimi using some MS magic. I found some useful information on this web page: 118*22dc650dSSadaf Ebrahimi http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the 119*22dc650dSSadaf Ebrahimi information there, using __declspec(dllexport) without "extern" we have a 120*22dc650dSSadaf Ebrahimi definition; with "extern" we have a declaration. The settings here override the 121*22dc650dSSadaf Ebrahimi setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL, 122*22dc650dSSadaf Ebrahimi which is all that is needed for applications (they just import the symbols). We 123*22dc650dSSadaf Ebrahimi use: 124*22dc650dSSadaf Ebrahimi 125*22dc650dSSadaf Ebrahimi PCRE2_EXP_DECL for declarations 126*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN for definitions 127*22dc650dSSadaf Ebrahimi 128*22dc650dSSadaf Ebrahimi The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test, 129*22dc650dSSadaf Ebrahimi which is an application, but needs to import this file in order to "peek" at 130*22dc650dSSadaf Ebrahimi internals, can #include pcre2.h first to get an application's-eye view. 131*22dc650dSSadaf Ebrahimi 132*22dc650dSSadaf Ebrahimi In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, 133*22dc650dSSadaf Ebrahimi special-purpose environments) might want to stick other stuff in front of 134*22dc650dSSadaf Ebrahimi exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN 135*22dc650dSSadaf Ebrahimi only if it is not already set. */ 136*22dc650dSSadaf Ebrahimi 137*22dc650dSSadaf Ebrahimi #ifndef PCRE2_EXP_DECL 138*22dc650dSSadaf Ebrahimi # ifdef _WIN32 139*22dc650dSSadaf Ebrahimi # ifndef PCRE2_STATIC 140*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DECL extern __declspec(dllexport) 141*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DEFN __declspec(dllexport) 142*22dc650dSSadaf Ebrahimi # else 143*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DECL extern PCRE2_EXPORT 144*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DEFN 145*22dc650dSSadaf Ebrahimi # endif 146*22dc650dSSadaf Ebrahimi # else 147*22dc650dSSadaf Ebrahimi # ifdef __cplusplus 148*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DECL extern "C" PCRE2_EXPORT 149*22dc650dSSadaf Ebrahimi # else 150*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DECL extern PCRE2_EXPORT 151*22dc650dSSadaf Ebrahimi # endif 152*22dc650dSSadaf Ebrahimi # ifndef PCRE2_EXP_DEFN 153*22dc650dSSadaf Ebrahimi # define PCRE2_EXP_DEFN PCRE2_EXP_DECL 154*22dc650dSSadaf Ebrahimi # endif 155*22dc650dSSadaf Ebrahimi # endif 156*22dc650dSSadaf Ebrahimi #endif 157*22dc650dSSadaf Ebrahimi 158*22dc650dSSadaf Ebrahimi /* Include the public PCRE2 header and the definitions of UCP character 159*22dc650dSSadaf Ebrahimi property values. This must follow the setting of PCRE2_EXP_DECL above. */ 160*22dc650dSSadaf Ebrahimi 161*22dc650dSSadaf Ebrahimi #include "pcre2.h" 162*22dc650dSSadaf Ebrahimi #include "pcre2_ucp.h" 163*22dc650dSSadaf Ebrahimi 164*22dc650dSSadaf Ebrahimi /* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced 165*22dc650dSSadaf Ebrahimi with a custom type. This makes it possible, for example, to allow pcre2_match() 166*22dc650dSSadaf Ebrahimi to process subject strings that are discontinuous by using a smart pointer 167*22dc650dSSadaf Ebrahimi class. It must always be possible to inspect all of the subject string in 168*22dc650dSSadaf Ebrahimi pcre2_match() because of the way it backtracks. */ 169*22dc650dSSadaf Ebrahimi 170*22dc650dSSadaf Ebrahimi /* WARNING: This is as yet untested for PCRE2. */ 171*22dc650dSSadaf Ebrahimi 172*22dc650dSSadaf Ebrahimi #ifdef CUSTOM_SUBJECT_PTR 173*22dc650dSSadaf Ebrahimi #undef PCRE2_SPTR 174*22dc650dSSadaf Ebrahimi #define PCRE2_SPTR CUSTOM_SUBJECT_PTR 175*22dc650dSSadaf Ebrahimi #endif 176*22dc650dSSadaf Ebrahimi 177*22dc650dSSadaf Ebrahimi /* When checking for integer overflow, we need to handle large integers. 178*22dc650dSSadaf Ebrahimi If a 64-bit integer type is available, we can use that. 179*22dc650dSSadaf Ebrahimi Otherwise we have to cast to double, which of course requires floating point 180*22dc650dSSadaf Ebrahimi arithmetic. Handle this by defining a macro for the appropriate type. */ 181*22dc650dSSadaf Ebrahimi 182*22dc650dSSadaf Ebrahimi #if defined INT64_MAX || defined int64_t 183*22dc650dSSadaf Ebrahimi #define INT64_OR_DOUBLE int64_t 184*22dc650dSSadaf Ebrahimi #else 185*22dc650dSSadaf Ebrahimi #define INT64_OR_DOUBLE double 186*22dc650dSSadaf Ebrahimi #endif 187*22dc650dSSadaf Ebrahimi 188*22dc650dSSadaf Ebrahimi /* External (in the C sense) functions and tables that are private to the 189*22dc650dSSadaf Ebrahimi libraries are always referenced using the PRIV macro. This makes it possible 190*22dc650dSSadaf Ebrahimi for pcre2test.c to include some of the source files from the libraries using a 191*22dc650dSSadaf Ebrahimi different PRIV definition to avoid name clashes. It also makes it clear in the 192*22dc650dSSadaf Ebrahimi code that a non-static object is being referenced. */ 193*22dc650dSSadaf Ebrahimi 194*22dc650dSSadaf Ebrahimi #ifndef PRIV 195*22dc650dSSadaf Ebrahimi #define PRIV(name) _pcre2_##name 196*22dc650dSSadaf Ebrahimi #endif 197*22dc650dSSadaf Ebrahimi 198*22dc650dSSadaf Ebrahimi /* When compiling for use with the Virtual Pascal compiler, these functions 199*22dc650dSSadaf Ebrahimi need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT 200*22dc650dSSadaf Ebrahimi option on the command line. */ 201*22dc650dSSadaf Ebrahimi 202*22dc650dSSadaf Ebrahimi #ifdef VPCOMPAT 203*22dc650dSSadaf Ebrahimi #define strlen(s) _strlen(s) 204*22dc650dSSadaf Ebrahimi #define strncmp(s1,s2,m) _strncmp(s1,s2,m) 205*22dc650dSSadaf Ebrahimi #define memcmp(s,c,n) _memcmp(s,c,n) 206*22dc650dSSadaf Ebrahimi #define memcpy(d,s,n) _memcpy(d,s,n) 207*22dc650dSSadaf Ebrahimi #define memmove(d,s,n) _memmove(d,s,n) 208*22dc650dSSadaf Ebrahimi #define memset(s,c,n) _memset(s,c,n) 209*22dc650dSSadaf Ebrahimi #else /* VPCOMPAT */ 210*22dc650dSSadaf Ebrahimi 211*22dc650dSSadaf Ebrahimi /* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define 212*22dc650dSSadaf Ebrahimi a macro that calls an emulating function. */ 213*22dc650dSSadaf Ebrahimi 214*22dc650dSSadaf Ebrahimi #ifndef HAVE_MEMMOVE 215*22dc650dSSadaf Ebrahimi #undef memmove /* Some systems may have a macro */ 216*22dc650dSSadaf Ebrahimi #define memmove(a, b, c) PRIV(memmove)(a, b, c) 217*22dc650dSSadaf Ebrahimi #endif /* not HAVE_MEMMOVE */ 218*22dc650dSSadaf Ebrahimi #endif /* not VPCOMPAT */ 219*22dc650dSSadaf Ebrahimi 220*22dc650dSSadaf Ebrahimi /* This is an unsigned int value that no UTF character can ever have, as 221*22dc650dSSadaf Ebrahimi Unicode doesn't go beyond 0x0010ffff. */ 222*22dc650dSSadaf Ebrahimi 223*22dc650dSSadaf Ebrahimi #define NOTACHAR 0xffffffff 224*22dc650dSSadaf Ebrahimi 225*22dc650dSSadaf Ebrahimi /* This is the largest valid UTF/Unicode code point. */ 226*22dc650dSSadaf Ebrahimi 227*22dc650dSSadaf Ebrahimi #define MAX_UTF_CODE_POINT 0x10ffff 228*22dc650dSSadaf Ebrahimi 229*22dc650dSSadaf Ebrahimi /* Compile-time positive error numbers (all except UTF errors, which are 230*22dc650dSSadaf Ebrahimi negative) start at this value. It should probably never be changed, in case 231*22dc650dSSadaf Ebrahimi some application is checking for specific numbers. There is a copy of this 232*22dc650dSSadaf Ebrahimi #define in pcre2posix.c (which now no longer includes this file). Ideally, a 233*22dc650dSSadaf Ebrahimi way of having a single definition should be found, but as the number is 234*22dc650dSSadaf Ebrahimi unlikely to change, this is not a pressing issue. The original reason for 235*22dc650dSSadaf Ebrahimi having a base other than 0 was to keep the absolute values of compile-time and 236*22dc650dSSadaf Ebrahimi run-time error numbers numerically different, but in the event the code does 237*22dc650dSSadaf Ebrahimi not rely on this. */ 238*22dc650dSSadaf Ebrahimi 239*22dc650dSSadaf Ebrahimi #define COMPILE_ERROR_BASE 100 240*22dc650dSSadaf Ebrahimi 241*22dc650dSSadaf Ebrahimi /* The initial frames vector for remembering pcre2_match() backtracking points 242*22dc650dSSadaf Ebrahimi is allocated on the heap, of this size (bytes) or ten times the frame size if 243*22dc650dSSadaf Ebrahimi larger, unless the heap limit is smaller. Typical frame sizes are a few hundred 244*22dc650dSSadaf Ebrahimi bytes (it depends on the number of capturing parentheses) so 20KiB handles 245*22dc650dSSadaf Ebrahimi quite a few frames. A larger vector on the heap is obtained for matches that 246*22dc650dSSadaf Ebrahimi need more frames, subject to the heap limit. */ 247*22dc650dSSadaf Ebrahimi 248*22dc650dSSadaf Ebrahimi #define START_FRAMES_SIZE 20480 249*22dc650dSSadaf Ebrahimi 250*22dc650dSSadaf Ebrahimi /* For DFA matching, an initial internal workspace vector is allocated on the 251*22dc650dSSadaf Ebrahimi stack. The heap is used only if this turns out to be too small. */ 252*22dc650dSSadaf Ebrahimi 253*22dc650dSSadaf Ebrahimi #define DFA_START_RWS_SIZE 30720 254*22dc650dSSadaf Ebrahimi 255*22dc650dSSadaf Ebrahimi /* Define the default BSR convention. */ 256*22dc650dSSadaf Ebrahimi 257*22dc650dSSadaf Ebrahimi #ifdef BSR_ANYCRLF 258*22dc650dSSadaf Ebrahimi #define BSR_DEFAULT PCRE2_BSR_ANYCRLF 259*22dc650dSSadaf Ebrahimi #else 260*22dc650dSSadaf Ebrahimi #define BSR_DEFAULT PCRE2_BSR_UNICODE 261*22dc650dSSadaf Ebrahimi #endif 262*22dc650dSSadaf Ebrahimi 263*22dc650dSSadaf Ebrahimi 264*22dc650dSSadaf Ebrahimi /* ---------------- Basic UTF-8 macros ---------------- */ 265*22dc650dSSadaf Ebrahimi 266*22dc650dSSadaf Ebrahimi /* These UTF-8 macros are always defined because they are used in pcre2test for 267*22dc650dSSadaf Ebrahimi handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library 268*22dc650dSSadaf Ebrahimi is not supported. */ 269*22dc650dSSadaf Ebrahimi 270*22dc650dSSadaf Ebrahimi /* Tests whether a UTF-8 code point needs extra bytes to decode. */ 271*22dc650dSSadaf Ebrahimi 272*22dc650dSSadaf Ebrahimi #define HASUTF8EXTRALEN(c) ((c) >= 0xc0) 273*22dc650dSSadaf Ebrahimi 274*22dc650dSSadaf Ebrahimi /* The following macros were originally written in the form of loops that used 275*22dc650dSSadaf Ebrahimi data from the tables whose names start with PRIV(utf8_table). They were 276*22dc650dSSadaf Ebrahimi rewritten by a user so as not to use loops, because in some environments this 277*22dc650dSSadaf Ebrahimi gives a significant performance advantage, and it seems never to do any harm. 278*22dc650dSSadaf Ebrahimi */ 279*22dc650dSSadaf Ebrahimi 280*22dc650dSSadaf Ebrahimi /* Base macro to pick up the remaining bytes of a UTF-8 character, not 281*22dc650dSSadaf Ebrahimi advancing the pointer. */ 282*22dc650dSSadaf Ebrahimi 283*22dc650dSSadaf Ebrahimi #define GETUTF8(c, eptr) \ 284*22dc650dSSadaf Ebrahimi { \ 285*22dc650dSSadaf Ebrahimi if ((c & 0x20u) == 0) \ 286*22dc650dSSadaf Ebrahimi c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ 287*22dc650dSSadaf Ebrahimi else if ((c & 0x10u) == 0) \ 288*22dc650dSSadaf Ebrahimi c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ 289*22dc650dSSadaf Ebrahimi else if ((c & 0x08u) == 0) \ 290*22dc650dSSadaf Ebrahimi c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ 291*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ 292*22dc650dSSadaf Ebrahimi else if ((c & 0x04u) == 0) \ 293*22dc650dSSadaf Ebrahimi c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ 294*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ 295*22dc650dSSadaf Ebrahimi (eptr[4] & 0x3fu); \ 296*22dc650dSSadaf Ebrahimi else \ 297*22dc650dSSadaf Ebrahimi c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ 298*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ 299*22dc650dSSadaf Ebrahimi ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ 300*22dc650dSSadaf Ebrahimi } 301*22dc650dSSadaf Ebrahimi 302*22dc650dSSadaf Ebrahimi /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing 303*22dc650dSSadaf Ebrahimi the pointer. */ 304*22dc650dSSadaf Ebrahimi 305*22dc650dSSadaf Ebrahimi #define GETUTF8INC(c, eptr) \ 306*22dc650dSSadaf Ebrahimi { \ 307*22dc650dSSadaf Ebrahimi if ((c & 0x20u) == 0) \ 308*22dc650dSSadaf Ebrahimi c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ 309*22dc650dSSadaf Ebrahimi else if ((c & 0x10u) == 0) \ 310*22dc650dSSadaf Ebrahimi { \ 311*22dc650dSSadaf Ebrahimi c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ 312*22dc650dSSadaf Ebrahimi eptr += 2; \ 313*22dc650dSSadaf Ebrahimi } \ 314*22dc650dSSadaf Ebrahimi else if ((c & 0x08u) == 0) \ 315*22dc650dSSadaf Ebrahimi { \ 316*22dc650dSSadaf Ebrahimi c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ 317*22dc650dSSadaf Ebrahimi ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ 318*22dc650dSSadaf Ebrahimi eptr += 3; \ 319*22dc650dSSadaf Ebrahimi } \ 320*22dc650dSSadaf Ebrahimi else if ((c & 0x04u) == 0) \ 321*22dc650dSSadaf Ebrahimi { \ 322*22dc650dSSadaf Ebrahimi c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ 323*22dc650dSSadaf Ebrahimi ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ 324*22dc650dSSadaf Ebrahimi (eptr[3] & 0x3fu); \ 325*22dc650dSSadaf Ebrahimi eptr += 4; \ 326*22dc650dSSadaf Ebrahimi } \ 327*22dc650dSSadaf Ebrahimi else \ 328*22dc650dSSadaf Ebrahimi { \ 329*22dc650dSSadaf Ebrahimi c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ 330*22dc650dSSadaf Ebrahimi ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ 331*22dc650dSSadaf Ebrahimi ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ 332*22dc650dSSadaf Ebrahimi eptr += 5; \ 333*22dc650dSSadaf Ebrahimi } \ 334*22dc650dSSadaf Ebrahimi } 335*22dc650dSSadaf Ebrahimi 336*22dc650dSSadaf Ebrahimi /* Base macro to pick up the remaining bytes of a UTF-8 character, not 337*22dc650dSSadaf Ebrahimi advancing the pointer, incrementing the length. */ 338*22dc650dSSadaf Ebrahimi 339*22dc650dSSadaf Ebrahimi #define GETUTF8LEN(c, eptr, len) \ 340*22dc650dSSadaf Ebrahimi { \ 341*22dc650dSSadaf Ebrahimi if ((c & 0x20u) == 0) \ 342*22dc650dSSadaf Ebrahimi { \ 343*22dc650dSSadaf Ebrahimi c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ 344*22dc650dSSadaf Ebrahimi len++; \ 345*22dc650dSSadaf Ebrahimi } \ 346*22dc650dSSadaf Ebrahimi else if ((c & 0x10u) == 0) \ 347*22dc650dSSadaf Ebrahimi { \ 348*22dc650dSSadaf Ebrahimi c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ 349*22dc650dSSadaf Ebrahimi len += 2; \ 350*22dc650dSSadaf Ebrahimi } \ 351*22dc650dSSadaf Ebrahimi else if ((c & 0x08u) == 0) \ 352*22dc650dSSadaf Ebrahimi {\ 353*22dc650dSSadaf Ebrahimi c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ 354*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ 355*22dc650dSSadaf Ebrahimi len += 3; \ 356*22dc650dSSadaf Ebrahimi } \ 357*22dc650dSSadaf Ebrahimi else if ((c & 0x04u) == 0) \ 358*22dc650dSSadaf Ebrahimi { \ 359*22dc650dSSadaf Ebrahimi c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ 360*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ 361*22dc650dSSadaf Ebrahimi (eptr[4] & 0x3fu); \ 362*22dc650dSSadaf Ebrahimi len += 4; \ 363*22dc650dSSadaf Ebrahimi } \ 364*22dc650dSSadaf Ebrahimi else \ 365*22dc650dSSadaf Ebrahimi {\ 366*22dc650dSSadaf Ebrahimi c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ 367*22dc650dSSadaf Ebrahimi ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ 368*22dc650dSSadaf Ebrahimi ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ 369*22dc650dSSadaf Ebrahimi len += 5; \ 370*22dc650dSSadaf Ebrahimi } \ 371*22dc650dSSadaf Ebrahimi } 372*22dc650dSSadaf Ebrahimi 373*22dc650dSSadaf Ebrahimi /* --------------- Whitespace macros ---------------- */ 374*22dc650dSSadaf Ebrahimi 375*22dc650dSSadaf Ebrahimi /* Tests for Unicode horizontal and vertical whitespace characters must check a 376*22dc650dSSadaf Ebrahimi number of different values. Using a switch statement for this generates the 377*22dc650dSSadaf Ebrahimi fastest code (no loop, no memory access), and there are several places in the 378*22dc650dSSadaf Ebrahimi interpreter code where this happens. In order to ensure that all the case lists 379*22dc650dSSadaf Ebrahimi remain in step, we use macros so that there is only one place where the lists 380*22dc650dSSadaf Ebrahimi are defined. 381*22dc650dSSadaf Ebrahimi 382*22dc650dSSadaf Ebrahimi These values are also required as lists in pcre2_compile.c when processing \h, 383*22dc650dSSadaf Ebrahimi \H, \v and \V in a character class. The lists are defined in pcre2_tables.c, 384*22dc650dSSadaf Ebrahimi but macros that define the values are here so that all the definitions are 385*22dc650dSSadaf Ebrahimi together. The lists must be in ascending character order, terminated by 386*22dc650dSSadaf Ebrahimi NOTACHAR (which is 0xffffffff). 387*22dc650dSSadaf Ebrahimi 388*22dc650dSSadaf Ebrahimi Any changes should ensure that the various macros are kept in step with each 389*22dc650dSSadaf Ebrahimi other. NOTE: The values also appear in pcre2_jit_compile.c. */ 390*22dc650dSSadaf Ebrahimi 391*22dc650dSSadaf Ebrahimi /* -------------- ASCII/Unicode environments -------------- */ 392*22dc650dSSadaf Ebrahimi 393*22dc650dSSadaf Ebrahimi #ifndef EBCDIC 394*22dc650dSSadaf Ebrahimi 395*22dc650dSSadaf Ebrahimi /* Character U+180E (Mongolian Vowel Separator) is not included in the list of 396*22dc650dSSadaf Ebrahimi spaces in the Unicode file PropList.txt, and Perl does not recognize it as a 397*22dc650dSSadaf Ebrahimi space. However, in many other sources it is listed as a space and has been in 398*22dc650dSSadaf Ebrahimi PCRE (both APIs) for a long time. */ 399*22dc650dSSadaf Ebrahimi 400*22dc650dSSadaf Ebrahimi #define HSPACE_LIST \ 401*22dc650dSSadaf Ebrahimi CHAR_HT, CHAR_SPACE, CHAR_NBSP, \ 402*22dc650dSSadaf Ebrahimi 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \ 403*22dc650dSSadaf Ebrahimi 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \ 404*22dc650dSSadaf Ebrahimi NOTACHAR 405*22dc650dSSadaf Ebrahimi 406*22dc650dSSadaf Ebrahimi #define HSPACE_MULTIBYTE_CASES \ 407*22dc650dSSadaf Ebrahimi case 0x1680: /* OGHAM SPACE MARK */ \ 408*22dc650dSSadaf Ebrahimi case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \ 409*22dc650dSSadaf Ebrahimi case 0x2000: /* EN QUAD */ \ 410*22dc650dSSadaf Ebrahimi case 0x2001: /* EM QUAD */ \ 411*22dc650dSSadaf Ebrahimi case 0x2002: /* EN SPACE */ \ 412*22dc650dSSadaf Ebrahimi case 0x2003: /* EM SPACE */ \ 413*22dc650dSSadaf Ebrahimi case 0x2004: /* THREE-PER-EM SPACE */ \ 414*22dc650dSSadaf Ebrahimi case 0x2005: /* FOUR-PER-EM SPACE */ \ 415*22dc650dSSadaf Ebrahimi case 0x2006: /* SIX-PER-EM SPACE */ \ 416*22dc650dSSadaf Ebrahimi case 0x2007: /* FIGURE SPACE */ \ 417*22dc650dSSadaf Ebrahimi case 0x2008: /* PUNCTUATION SPACE */ \ 418*22dc650dSSadaf Ebrahimi case 0x2009: /* THIN SPACE */ \ 419*22dc650dSSadaf Ebrahimi case 0x200A: /* HAIR SPACE */ \ 420*22dc650dSSadaf Ebrahimi case 0x202f: /* NARROW NO-BREAK SPACE */ \ 421*22dc650dSSadaf Ebrahimi case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \ 422*22dc650dSSadaf Ebrahimi case 0x3000 /* IDEOGRAPHIC SPACE */ 423*22dc650dSSadaf Ebrahimi 424*22dc650dSSadaf Ebrahimi #define HSPACE_BYTE_CASES \ 425*22dc650dSSadaf Ebrahimi case CHAR_HT: \ 426*22dc650dSSadaf Ebrahimi case CHAR_SPACE: \ 427*22dc650dSSadaf Ebrahimi case CHAR_NBSP 428*22dc650dSSadaf Ebrahimi 429*22dc650dSSadaf Ebrahimi #define HSPACE_CASES \ 430*22dc650dSSadaf Ebrahimi HSPACE_BYTE_CASES: \ 431*22dc650dSSadaf Ebrahimi HSPACE_MULTIBYTE_CASES 432*22dc650dSSadaf Ebrahimi 433*22dc650dSSadaf Ebrahimi #define VSPACE_LIST \ 434*22dc650dSSadaf Ebrahimi CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR 435*22dc650dSSadaf Ebrahimi 436*22dc650dSSadaf Ebrahimi #define VSPACE_MULTIBYTE_CASES \ 437*22dc650dSSadaf Ebrahimi case 0x2028: /* LINE SEPARATOR */ \ 438*22dc650dSSadaf Ebrahimi case 0x2029 /* PARAGRAPH SEPARATOR */ 439*22dc650dSSadaf Ebrahimi 440*22dc650dSSadaf Ebrahimi #define VSPACE_BYTE_CASES \ 441*22dc650dSSadaf Ebrahimi case CHAR_LF: \ 442*22dc650dSSadaf Ebrahimi case CHAR_VT: \ 443*22dc650dSSadaf Ebrahimi case CHAR_FF: \ 444*22dc650dSSadaf Ebrahimi case CHAR_CR: \ 445*22dc650dSSadaf Ebrahimi case CHAR_NEL 446*22dc650dSSadaf Ebrahimi 447*22dc650dSSadaf Ebrahimi #define VSPACE_CASES \ 448*22dc650dSSadaf Ebrahimi VSPACE_BYTE_CASES: \ 449*22dc650dSSadaf Ebrahimi VSPACE_MULTIBYTE_CASES 450*22dc650dSSadaf Ebrahimi 451*22dc650dSSadaf Ebrahimi /* -------------- EBCDIC environments -------------- */ 452*22dc650dSSadaf Ebrahimi 453*22dc650dSSadaf Ebrahimi #else 454*22dc650dSSadaf Ebrahimi #define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR 455*22dc650dSSadaf Ebrahimi 456*22dc650dSSadaf Ebrahimi #define HSPACE_BYTE_CASES \ 457*22dc650dSSadaf Ebrahimi case CHAR_HT: \ 458*22dc650dSSadaf Ebrahimi case CHAR_SPACE: \ 459*22dc650dSSadaf Ebrahimi case CHAR_NBSP 460*22dc650dSSadaf Ebrahimi 461*22dc650dSSadaf Ebrahimi #define HSPACE_CASES HSPACE_BYTE_CASES 462*22dc650dSSadaf Ebrahimi 463*22dc650dSSadaf Ebrahimi #ifdef EBCDIC_NL25 464*22dc650dSSadaf Ebrahimi #define VSPACE_LIST \ 465*22dc650dSSadaf Ebrahimi CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR 466*22dc650dSSadaf Ebrahimi #else 467*22dc650dSSadaf Ebrahimi #define VSPACE_LIST \ 468*22dc650dSSadaf Ebrahimi CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR 469*22dc650dSSadaf Ebrahimi #endif 470*22dc650dSSadaf Ebrahimi 471*22dc650dSSadaf Ebrahimi #define VSPACE_BYTE_CASES \ 472*22dc650dSSadaf Ebrahimi case CHAR_LF: \ 473*22dc650dSSadaf Ebrahimi case CHAR_VT: \ 474*22dc650dSSadaf Ebrahimi case CHAR_FF: \ 475*22dc650dSSadaf Ebrahimi case CHAR_CR: \ 476*22dc650dSSadaf Ebrahimi case CHAR_NEL 477*22dc650dSSadaf Ebrahimi 478*22dc650dSSadaf Ebrahimi #define VSPACE_CASES VSPACE_BYTE_CASES 479*22dc650dSSadaf Ebrahimi #endif /* EBCDIC */ 480*22dc650dSSadaf Ebrahimi 481*22dc650dSSadaf Ebrahimi /* -------------- End of whitespace macros -------------- */ 482*22dc650dSSadaf Ebrahimi 483*22dc650dSSadaf Ebrahimi 484*22dc650dSSadaf Ebrahimi /* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF, 485*22dc650dSSadaf Ebrahimi "any" and "anycrlf" at present). The following macros are used to package up 486*22dc650dSSadaf Ebrahimi testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various 487*22dc650dSSadaf Ebrahimi modules to indicate in which datablock the parameters exist, and what the 488*22dc650dSSadaf Ebrahimi start/end of string field names are. */ 489*22dc650dSSadaf Ebrahimi 490*22dc650dSSadaf Ebrahimi #define NLTYPE_FIXED 0 /* Newline is a fixed length string */ 491*22dc650dSSadaf Ebrahimi #define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ 492*22dc650dSSadaf Ebrahimi #define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ 493*22dc650dSSadaf Ebrahimi 494*22dc650dSSadaf Ebrahimi /* This macro checks for a newline at the given position */ 495*22dc650dSSadaf Ebrahimi 496*22dc650dSSadaf Ebrahimi #define IS_NEWLINE(p) \ 497*22dc650dSSadaf Ebrahimi ((NLBLOCK->nltype != NLTYPE_FIXED)? \ 498*22dc650dSSadaf Ebrahimi ((p) < NLBLOCK->PSEND && \ 499*22dc650dSSadaf Ebrahimi PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ 500*22dc650dSSadaf Ebrahimi &(NLBLOCK->nllen), utf)) \ 501*22dc650dSSadaf Ebrahimi : \ 502*22dc650dSSadaf Ebrahimi ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ 503*22dc650dSSadaf Ebrahimi UCHAR21TEST(p) == NLBLOCK->nl[0] && \ 504*22dc650dSSadaf Ebrahimi (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \ 505*22dc650dSSadaf Ebrahimi ) \ 506*22dc650dSSadaf Ebrahimi ) 507*22dc650dSSadaf Ebrahimi 508*22dc650dSSadaf Ebrahimi /* This macro checks for a newline immediately preceding the given position */ 509*22dc650dSSadaf Ebrahimi 510*22dc650dSSadaf Ebrahimi #define WAS_NEWLINE(p) \ 511*22dc650dSSadaf Ebrahimi ((NLBLOCK->nltype != NLTYPE_FIXED)? \ 512*22dc650dSSadaf Ebrahimi ((p) > NLBLOCK->PSSTART && \ 513*22dc650dSSadaf Ebrahimi PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ 514*22dc650dSSadaf Ebrahimi &(NLBLOCK->nllen), utf)) \ 515*22dc650dSSadaf Ebrahimi : \ 516*22dc650dSSadaf Ebrahimi ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ 517*22dc650dSSadaf Ebrahimi UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ 518*22dc650dSSadaf Ebrahimi (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ 519*22dc650dSSadaf Ebrahimi ) \ 520*22dc650dSSadaf Ebrahimi ) 521*22dc650dSSadaf Ebrahimi 522*22dc650dSSadaf Ebrahimi /* Private flags containing information about the compiled pattern. The first 523*22dc650dSSadaf Ebrahimi three must not be changed, because whichever is set is actually the number of 524*22dc650dSSadaf Ebrahimi bytes in a code unit in that mode. */ 525*22dc650dSSadaf Ebrahimi 526*22dc650dSSadaf Ebrahimi #define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ 527*22dc650dSSadaf Ebrahimi #define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ 528*22dc650dSSadaf Ebrahimi #define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ 529*22dc650dSSadaf Ebrahimi #define PCRE2_FIRSTSET 0x00000010 /* first_code unit is set */ 530*22dc650dSSadaf Ebrahimi #define PCRE2_FIRSTCASELESS 0x00000020 /* caseless first code unit */ 531*22dc650dSSadaf Ebrahimi #define PCRE2_FIRSTMAPSET 0x00000040 /* bitmap of first code units is set */ 532*22dc650dSSadaf Ebrahimi #define PCRE2_LASTSET 0x00000080 /* last code unit is set */ 533*22dc650dSSadaf Ebrahimi #define PCRE2_LASTCASELESS 0x00000100 /* caseless last code unit */ 534*22dc650dSSadaf Ebrahimi #define PCRE2_STARTLINE 0x00000200 /* start after \n for multiline */ 535*22dc650dSSadaf Ebrahimi #define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ 536*22dc650dSSadaf Ebrahimi #define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ 537*22dc650dSSadaf Ebrahimi #define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ 538*22dc650dSSadaf Ebrahimi #define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ 539*22dc650dSSadaf Ebrahimi #define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ 540*22dc650dSSadaf Ebrahimi #define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ 541*22dc650dSSadaf Ebrahimi #define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ 542*22dc650dSSadaf Ebrahimi #define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ 543*22dc650dSSadaf Ebrahimi #define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ 544*22dc650dSSadaf Ebrahimi #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ 545*22dc650dSSadaf Ebrahimi #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ 546*22dc650dSSadaf Ebrahimi #define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ 547*22dc650dSSadaf Ebrahimi #define PCRE2_HASBKC 0x00400000 /* contains \C */ 548*22dc650dSSadaf Ebrahimi #define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ 549*22dc650dSSadaf Ebrahimi 550*22dc650dSSadaf Ebrahimi #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) 551*22dc650dSSadaf Ebrahimi 552*22dc650dSSadaf Ebrahimi /* Values for the matchedby field in a match data block. */ 553*22dc650dSSadaf Ebrahimi 554*22dc650dSSadaf Ebrahimi enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */ 555*22dc650dSSadaf Ebrahimi PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */ 556*22dc650dSSadaf Ebrahimi PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */ 557*22dc650dSSadaf Ebrahimi 558*22dc650dSSadaf Ebrahimi /* Values for the flags field in a match data block. */ 559*22dc650dSSadaf Ebrahimi 560*22dc650dSSadaf Ebrahimi #define PCRE2_MD_COPIED_SUBJECT 0x01u 561*22dc650dSSadaf Ebrahimi 562*22dc650dSSadaf Ebrahimi /* Magic number to provide a small check against being handed junk. */ 563*22dc650dSSadaf Ebrahimi 564*22dc650dSSadaf Ebrahimi #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ 565*22dc650dSSadaf Ebrahimi 566*22dc650dSSadaf Ebrahimi /* The maximum remaining length of subject we are prepared to search for a 567*22dc650dSSadaf Ebrahimi req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is 568*22dc650dSSadaf Ebrahimi much faster than the search loop that has to be used in 16-bit and 32-bit 569*22dc650dSSadaf Ebrahimi modes. */ 570*22dc650dSSadaf Ebrahimi 571*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 572*22dc650dSSadaf Ebrahimi #define REQ_CU_MAX 5000 573*22dc650dSSadaf Ebrahimi #else 574*22dc650dSSadaf Ebrahimi #define REQ_CU_MAX 2000 575*22dc650dSSadaf Ebrahimi #endif 576*22dc650dSSadaf Ebrahimi 577*22dc650dSSadaf Ebrahimi /* Offsets for the bitmap tables in the cbits set of tables. Each table 578*22dc650dSSadaf Ebrahimi contains a set of bits for a class map. Some classes are built by combining 579*22dc650dSSadaf Ebrahimi these tables. */ 580*22dc650dSSadaf Ebrahimi 581*22dc650dSSadaf Ebrahimi #define cbit_space 0 /* [:space:] or \s */ 582*22dc650dSSadaf Ebrahimi #define cbit_xdigit 32 /* [:xdigit:] */ 583*22dc650dSSadaf Ebrahimi #define cbit_digit 64 /* [:digit:] or \d */ 584*22dc650dSSadaf Ebrahimi #define cbit_upper 96 /* [:upper:] */ 585*22dc650dSSadaf Ebrahimi #define cbit_lower 128 /* [:lower:] */ 586*22dc650dSSadaf Ebrahimi #define cbit_word 160 /* [:word:] or \w */ 587*22dc650dSSadaf Ebrahimi #define cbit_graph 192 /* [:graph:] */ 588*22dc650dSSadaf Ebrahimi #define cbit_print 224 /* [:print:] */ 589*22dc650dSSadaf Ebrahimi #define cbit_punct 256 /* [:punct:] */ 590*22dc650dSSadaf Ebrahimi #define cbit_cntrl 288 /* [:cntrl:] */ 591*22dc650dSSadaf Ebrahimi #define cbit_length 320 /* Length of the cbits table */ 592*22dc650dSSadaf Ebrahimi 593*22dc650dSSadaf Ebrahimi /* Bit definitions for entries in the ctypes table. Do not change these values 594*22dc650dSSadaf Ebrahimi without checking pcre2_jit_compile.c, which has an assertion to ensure that 595*22dc650dSSadaf Ebrahimi ctype_word has the value 16. */ 596*22dc650dSSadaf Ebrahimi 597*22dc650dSSadaf Ebrahimi #define ctype_space 0x01 598*22dc650dSSadaf Ebrahimi #define ctype_letter 0x02 599*22dc650dSSadaf Ebrahimi #define ctype_lcletter 0x04 600*22dc650dSSadaf Ebrahimi #define ctype_digit 0x08 601*22dc650dSSadaf Ebrahimi #define ctype_word 0x10 /* alphanumeric or '_' */ 602*22dc650dSSadaf Ebrahimi 603*22dc650dSSadaf Ebrahimi /* Offsets of the various tables from the base tables pointer, and 604*22dc650dSSadaf Ebrahimi total length of the tables. */ 605*22dc650dSSadaf Ebrahimi 606*22dc650dSSadaf Ebrahimi #define lcc_offset 0 /* Lower case */ 607*22dc650dSSadaf Ebrahimi #define fcc_offset 256 /* Flip case */ 608*22dc650dSSadaf Ebrahimi #define cbits_offset 512 /* Character classes */ 609*22dc650dSSadaf Ebrahimi #define ctypes_offset (cbits_offset + cbit_length) /* Character types */ 610*22dc650dSSadaf Ebrahimi #define TABLES_LENGTH (ctypes_offset + 256) 611*22dc650dSSadaf Ebrahimi 612*22dc650dSSadaf Ebrahimi 613*22dc650dSSadaf Ebrahimi /* -------------------- Character and string names ------------------------ */ 614*22dc650dSSadaf Ebrahimi 615*22dc650dSSadaf Ebrahimi /* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal 616*22dc650dSSadaf Ebrahimi character constants like '*' because the compiler would emit their EBCDIC code, 617*22dc650dSSadaf Ebrahimi which is different from their ASCII/UTF-8 code. Instead we define macros for 618*22dc650dSSadaf Ebrahimi the characters so that they always use the ASCII/UTF-8 code when UTF-8 support 619*22dc650dSSadaf Ebrahimi is enabled. When UTF-8 support is not enabled, the definitions use character 620*22dc650dSSadaf Ebrahimi literals. Both character and string versions of each character are needed, and 621*22dc650dSSadaf Ebrahimi there are some longer strings as well. 622*22dc650dSSadaf Ebrahimi 623*22dc650dSSadaf Ebrahimi This means that, on EBCDIC platforms, the PCRE2 library can handle either 624*22dc650dSSadaf Ebrahimi EBCDIC, or UTF-8, but not both. To support both in the same compiled library 625*22dc650dSSadaf Ebrahimi would need different lookups depending on whether PCRE2_UTF was set or not. 626*22dc650dSSadaf Ebrahimi This would make it impossible to use characters in switch/case statements, 627*22dc650dSSadaf Ebrahimi which would reduce performance. For a theoretical use (which nobody has asked 628*22dc650dSSadaf Ebrahimi for) in a minority area (EBCDIC platforms), this is not sensible. Any 629*22dc650dSSadaf Ebrahimi application that did need both could compile two versions of the library, using 630*22dc650dSSadaf Ebrahimi macros to give the functions distinct names. */ 631*22dc650dSSadaf Ebrahimi 632*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE 633*22dc650dSSadaf Ebrahimi 634*22dc650dSSadaf Ebrahimi /* UTF-8 support is not enabled; use the platform-dependent character literals 635*22dc650dSSadaf Ebrahimi so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF 636*22dc650dSSadaf Ebrahimi mode. Newline characters are problematic in EBCDIC. Though it has CR and LF 637*22dc650dSSadaf Ebrahimi characters, a common practice has been to use its NL (0x15) character as the 638*22dc650dSSadaf Ebrahimi line terminator in C-like processing environments. However, sometimes the LF 639*22dc650dSSadaf Ebrahimi (0x25) character is used instead, according to this Unicode document: 640*22dc650dSSadaf Ebrahimi 641*22dc650dSSadaf Ebrahimi http://unicode.org/standard/reports/tr13/tr13-5.html 642*22dc650dSSadaf Ebrahimi 643*22dc650dSSadaf Ebrahimi PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 644*22dc650dSSadaf Ebrahimi instead. Whichever is *not* chosen is defined as NEL. 645*22dc650dSSadaf Ebrahimi 646*22dc650dSSadaf Ebrahimi In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the 647*22dc650dSSadaf Ebrahimi same code point. */ 648*22dc650dSSadaf Ebrahimi 649*22dc650dSSadaf Ebrahimi #ifdef EBCDIC 650*22dc650dSSadaf Ebrahimi 651*22dc650dSSadaf Ebrahimi #ifndef EBCDIC_NL25 652*22dc650dSSadaf Ebrahimi #define CHAR_NL '\x15' 653*22dc650dSSadaf Ebrahimi #define CHAR_NEL '\x25' 654*22dc650dSSadaf Ebrahimi #define STR_NL "\x15" 655*22dc650dSSadaf Ebrahimi #define STR_NEL "\x25" 656*22dc650dSSadaf Ebrahimi #else 657*22dc650dSSadaf Ebrahimi #define CHAR_NL '\x25' 658*22dc650dSSadaf Ebrahimi #define CHAR_NEL '\x15' 659*22dc650dSSadaf Ebrahimi #define STR_NL "\x25" 660*22dc650dSSadaf Ebrahimi #define STR_NEL "\x15" 661*22dc650dSSadaf Ebrahimi #endif 662*22dc650dSSadaf Ebrahimi 663*22dc650dSSadaf Ebrahimi #define CHAR_LF CHAR_NL 664*22dc650dSSadaf Ebrahimi #define STR_LF STR_NL 665*22dc650dSSadaf Ebrahimi 666*22dc650dSSadaf Ebrahimi #define CHAR_ESC '\047' 667*22dc650dSSadaf Ebrahimi #define CHAR_DEL '\007' 668*22dc650dSSadaf Ebrahimi #define CHAR_NBSP ((unsigned char)'\x41') 669*22dc650dSSadaf Ebrahimi #define STR_ESC "\047" 670*22dc650dSSadaf Ebrahimi #define STR_DEL "\007" 671*22dc650dSSadaf Ebrahimi 672*22dc650dSSadaf Ebrahimi #else /* Not EBCDIC */ 673*22dc650dSSadaf Ebrahimi 674*22dc650dSSadaf Ebrahimi /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for 675*22dc650dSSadaf Ebrahimi compatibility. NEL is the Unicode newline character; make sure it is 676*22dc650dSSadaf Ebrahimi a positive value. */ 677*22dc650dSSadaf Ebrahimi 678*22dc650dSSadaf Ebrahimi #define CHAR_LF '\n' 679*22dc650dSSadaf Ebrahimi #define CHAR_NL CHAR_LF 680*22dc650dSSadaf Ebrahimi #define CHAR_NEL ((unsigned char)'\x85') 681*22dc650dSSadaf Ebrahimi #define CHAR_ESC '\033' 682*22dc650dSSadaf Ebrahimi #define CHAR_DEL '\177' 683*22dc650dSSadaf Ebrahimi #define CHAR_NBSP ((unsigned char)'\xa0') 684*22dc650dSSadaf Ebrahimi 685*22dc650dSSadaf Ebrahimi #define STR_LF "\n" 686*22dc650dSSadaf Ebrahimi #define STR_NL STR_LF 687*22dc650dSSadaf Ebrahimi #define STR_NEL "\x85" 688*22dc650dSSadaf Ebrahimi #define STR_ESC "\033" 689*22dc650dSSadaf Ebrahimi #define STR_DEL "\177" 690*22dc650dSSadaf Ebrahimi 691*22dc650dSSadaf Ebrahimi #endif /* EBCDIC */ 692*22dc650dSSadaf Ebrahimi 693*22dc650dSSadaf Ebrahimi /* The remaining definitions work in both environments. */ 694*22dc650dSSadaf Ebrahimi 695*22dc650dSSadaf Ebrahimi #define CHAR_NUL '\0' 696*22dc650dSSadaf Ebrahimi #define CHAR_HT '\t' 697*22dc650dSSadaf Ebrahimi #define CHAR_VT '\v' 698*22dc650dSSadaf Ebrahimi #define CHAR_FF '\f' 699*22dc650dSSadaf Ebrahimi #define CHAR_CR '\r' 700*22dc650dSSadaf Ebrahimi #define CHAR_BS '\b' 701*22dc650dSSadaf Ebrahimi #define CHAR_BEL '\a' 702*22dc650dSSadaf Ebrahimi 703*22dc650dSSadaf Ebrahimi #define CHAR_SPACE ' ' 704*22dc650dSSadaf Ebrahimi #define CHAR_EXCLAMATION_MARK '!' 705*22dc650dSSadaf Ebrahimi #define CHAR_QUOTATION_MARK '"' 706*22dc650dSSadaf Ebrahimi #define CHAR_NUMBER_SIGN '#' 707*22dc650dSSadaf Ebrahimi #define CHAR_DOLLAR_SIGN '$' 708*22dc650dSSadaf Ebrahimi #define CHAR_PERCENT_SIGN '%' 709*22dc650dSSadaf Ebrahimi #define CHAR_AMPERSAND '&' 710*22dc650dSSadaf Ebrahimi #define CHAR_APOSTROPHE '\'' 711*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_PARENTHESIS '(' 712*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_PARENTHESIS ')' 713*22dc650dSSadaf Ebrahimi #define CHAR_ASTERISK '*' 714*22dc650dSSadaf Ebrahimi #define CHAR_PLUS '+' 715*22dc650dSSadaf Ebrahimi #define CHAR_COMMA ',' 716*22dc650dSSadaf Ebrahimi #define CHAR_MINUS '-' 717*22dc650dSSadaf Ebrahimi #define CHAR_DOT '.' 718*22dc650dSSadaf Ebrahimi #define CHAR_SLASH '/' 719*22dc650dSSadaf Ebrahimi #define CHAR_0 '0' 720*22dc650dSSadaf Ebrahimi #define CHAR_1 '1' 721*22dc650dSSadaf Ebrahimi #define CHAR_2 '2' 722*22dc650dSSadaf Ebrahimi #define CHAR_3 '3' 723*22dc650dSSadaf Ebrahimi #define CHAR_4 '4' 724*22dc650dSSadaf Ebrahimi #define CHAR_5 '5' 725*22dc650dSSadaf Ebrahimi #define CHAR_6 '6' 726*22dc650dSSadaf Ebrahimi #define CHAR_7 '7' 727*22dc650dSSadaf Ebrahimi #define CHAR_8 '8' 728*22dc650dSSadaf Ebrahimi #define CHAR_9 '9' 729*22dc650dSSadaf Ebrahimi #define CHAR_COLON ':' 730*22dc650dSSadaf Ebrahimi #define CHAR_SEMICOLON ';' 731*22dc650dSSadaf Ebrahimi #define CHAR_LESS_THAN_SIGN '<' 732*22dc650dSSadaf Ebrahimi #define CHAR_EQUALS_SIGN '=' 733*22dc650dSSadaf Ebrahimi #define CHAR_GREATER_THAN_SIGN '>' 734*22dc650dSSadaf Ebrahimi #define CHAR_QUESTION_MARK '?' 735*22dc650dSSadaf Ebrahimi #define CHAR_COMMERCIAL_AT '@' 736*22dc650dSSadaf Ebrahimi #define CHAR_A 'A' 737*22dc650dSSadaf Ebrahimi #define CHAR_B 'B' 738*22dc650dSSadaf Ebrahimi #define CHAR_C 'C' 739*22dc650dSSadaf Ebrahimi #define CHAR_D 'D' 740*22dc650dSSadaf Ebrahimi #define CHAR_E 'E' 741*22dc650dSSadaf Ebrahimi #define CHAR_F 'F' 742*22dc650dSSadaf Ebrahimi #define CHAR_G 'G' 743*22dc650dSSadaf Ebrahimi #define CHAR_H 'H' 744*22dc650dSSadaf Ebrahimi #define CHAR_I 'I' 745*22dc650dSSadaf Ebrahimi #define CHAR_J 'J' 746*22dc650dSSadaf Ebrahimi #define CHAR_K 'K' 747*22dc650dSSadaf Ebrahimi #define CHAR_L 'L' 748*22dc650dSSadaf Ebrahimi #define CHAR_M 'M' 749*22dc650dSSadaf Ebrahimi #define CHAR_N 'N' 750*22dc650dSSadaf Ebrahimi #define CHAR_O 'O' 751*22dc650dSSadaf Ebrahimi #define CHAR_P 'P' 752*22dc650dSSadaf Ebrahimi #define CHAR_Q 'Q' 753*22dc650dSSadaf Ebrahimi #define CHAR_R 'R' 754*22dc650dSSadaf Ebrahimi #define CHAR_S 'S' 755*22dc650dSSadaf Ebrahimi #define CHAR_T 'T' 756*22dc650dSSadaf Ebrahimi #define CHAR_U 'U' 757*22dc650dSSadaf Ebrahimi #define CHAR_V 'V' 758*22dc650dSSadaf Ebrahimi #define CHAR_W 'W' 759*22dc650dSSadaf Ebrahimi #define CHAR_X 'X' 760*22dc650dSSadaf Ebrahimi #define CHAR_Y 'Y' 761*22dc650dSSadaf Ebrahimi #define CHAR_Z 'Z' 762*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_SQUARE_BRACKET '[' 763*22dc650dSSadaf Ebrahimi #define CHAR_BACKSLASH '\\' 764*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_SQUARE_BRACKET ']' 765*22dc650dSSadaf Ebrahimi #define CHAR_CIRCUMFLEX_ACCENT '^' 766*22dc650dSSadaf Ebrahimi #define CHAR_UNDERSCORE '_' 767*22dc650dSSadaf Ebrahimi #define CHAR_GRAVE_ACCENT '`' 768*22dc650dSSadaf Ebrahimi #define CHAR_a 'a' 769*22dc650dSSadaf Ebrahimi #define CHAR_b 'b' 770*22dc650dSSadaf Ebrahimi #define CHAR_c 'c' 771*22dc650dSSadaf Ebrahimi #define CHAR_d 'd' 772*22dc650dSSadaf Ebrahimi #define CHAR_e 'e' 773*22dc650dSSadaf Ebrahimi #define CHAR_f 'f' 774*22dc650dSSadaf Ebrahimi #define CHAR_g 'g' 775*22dc650dSSadaf Ebrahimi #define CHAR_h 'h' 776*22dc650dSSadaf Ebrahimi #define CHAR_i 'i' 777*22dc650dSSadaf Ebrahimi #define CHAR_j 'j' 778*22dc650dSSadaf Ebrahimi #define CHAR_k 'k' 779*22dc650dSSadaf Ebrahimi #define CHAR_l 'l' 780*22dc650dSSadaf Ebrahimi #define CHAR_m 'm' 781*22dc650dSSadaf Ebrahimi #define CHAR_n 'n' 782*22dc650dSSadaf Ebrahimi #define CHAR_o 'o' 783*22dc650dSSadaf Ebrahimi #define CHAR_p 'p' 784*22dc650dSSadaf Ebrahimi #define CHAR_q 'q' 785*22dc650dSSadaf Ebrahimi #define CHAR_r 'r' 786*22dc650dSSadaf Ebrahimi #define CHAR_s 's' 787*22dc650dSSadaf Ebrahimi #define CHAR_t 't' 788*22dc650dSSadaf Ebrahimi #define CHAR_u 'u' 789*22dc650dSSadaf Ebrahimi #define CHAR_v 'v' 790*22dc650dSSadaf Ebrahimi #define CHAR_w 'w' 791*22dc650dSSadaf Ebrahimi #define CHAR_x 'x' 792*22dc650dSSadaf Ebrahimi #define CHAR_y 'y' 793*22dc650dSSadaf Ebrahimi #define CHAR_z 'z' 794*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_CURLY_BRACKET '{' 795*22dc650dSSadaf Ebrahimi #define CHAR_VERTICAL_LINE '|' 796*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_CURLY_BRACKET '}' 797*22dc650dSSadaf Ebrahimi #define CHAR_TILDE '~' 798*22dc650dSSadaf Ebrahimi 799*22dc650dSSadaf Ebrahimi #define STR_HT "\t" 800*22dc650dSSadaf Ebrahimi #define STR_VT "\v" 801*22dc650dSSadaf Ebrahimi #define STR_FF "\f" 802*22dc650dSSadaf Ebrahimi #define STR_CR "\r" 803*22dc650dSSadaf Ebrahimi #define STR_BS "\b" 804*22dc650dSSadaf Ebrahimi #define STR_BEL "\a" 805*22dc650dSSadaf Ebrahimi 806*22dc650dSSadaf Ebrahimi #define STR_SPACE " " 807*22dc650dSSadaf Ebrahimi #define STR_EXCLAMATION_MARK "!" 808*22dc650dSSadaf Ebrahimi #define STR_QUOTATION_MARK "\"" 809*22dc650dSSadaf Ebrahimi #define STR_NUMBER_SIGN "#" 810*22dc650dSSadaf Ebrahimi #define STR_DOLLAR_SIGN "$" 811*22dc650dSSadaf Ebrahimi #define STR_PERCENT_SIGN "%" 812*22dc650dSSadaf Ebrahimi #define STR_AMPERSAND "&" 813*22dc650dSSadaf Ebrahimi #define STR_APOSTROPHE "'" 814*22dc650dSSadaf Ebrahimi #define STR_LEFT_PARENTHESIS "(" 815*22dc650dSSadaf Ebrahimi #define STR_RIGHT_PARENTHESIS ")" 816*22dc650dSSadaf Ebrahimi #define STR_ASTERISK "*" 817*22dc650dSSadaf Ebrahimi #define STR_PLUS "+" 818*22dc650dSSadaf Ebrahimi #define STR_COMMA "," 819*22dc650dSSadaf Ebrahimi #define STR_MINUS "-" 820*22dc650dSSadaf Ebrahimi #define STR_DOT "." 821*22dc650dSSadaf Ebrahimi #define STR_SLASH "/" 822*22dc650dSSadaf Ebrahimi #define STR_0 "0" 823*22dc650dSSadaf Ebrahimi #define STR_1 "1" 824*22dc650dSSadaf Ebrahimi #define STR_2 "2" 825*22dc650dSSadaf Ebrahimi #define STR_3 "3" 826*22dc650dSSadaf Ebrahimi #define STR_4 "4" 827*22dc650dSSadaf Ebrahimi #define STR_5 "5" 828*22dc650dSSadaf Ebrahimi #define STR_6 "6" 829*22dc650dSSadaf Ebrahimi #define STR_7 "7" 830*22dc650dSSadaf Ebrahimi #define STR_8 "8" 831*22dc650dSSadaf Ebrahimi #define STR_9 "9" 832*22dc650dSSadaf Ebrahimi #define STR_COLON ":" 833*22dc650dSSadaf Ebrahimi #define STR_SEMICOLON ";" 834*22dc650dSSadaf Ebrahimi #define STR_LESS_THAN_SIGN "<" 835*22dc650dSSadaf Ebrahimi #define STR_EQUALS_SIGN "=" 836*22dc650dSSadaf Ebrahimi #define STR_GREATER_THAN_SIGN ">" 837*22dc650dSSadaf Ebrahimi #define STR_QUESTION_MARK "?" 838*22dc650dSSadaf Ebrahimi #define STR_COMMERCIAL_AT "@" 839*22dc650dSSadaf Ebrahimi #define STR_A "A" 840*22dc650dSSadaf Ebrahimi #define STR_B "B" 841*22dc650dSSadaf Ebrahimi #define STR_C "C" 842*22dc650dSSadaf Ebrahimi #define STR_D "D" 843*22dc650dSSadaf Ebrahimi #define STR_E "E" 844*22dc650dSSadaf Ebrahimi #define STR_F "F" 845*22dc650dSSadaf Ebrahimi #define STR_G "G" 846*22dc650dSSadaf Ebrahimi #define STR_H "H" 847*22dc650dSSadaf Ebrahimi #define STR_I "I" 848*22dc650dSSadaf Ebrahimi #define STR_J "J" 849*22dc650dSSadaf Ebrahimi #define STR_K "K" 850*22dc650dSSadaf Ebrahimi #define STR_L "L" 851*22dc650dSSadaf Ebrahimi #define STR_M "M" 852*22dc650dSSadaf Ebrahimi #define STR_N "N" 853*22dc650dSSadaf Ebrahimi #define STR_O "O" 854*22dc650dSSadaf Ebrahimi #define STR_P "P" 855*22dc650dSSadaf Ebrahimi #define STR_Q "Q" 856*22dc650dSSadaf Ebrahimi #define STR_R "R" 857*22dc650dSSadaf Ebrahimi #define STR_S "S" 858*22dc650dSSadaf Ebrahimi #define STR_T "T" 859*22dc650dSSadaf Ebrahimi #define STR_U "U" 860*22dc650dSSadaf Ebrahimi #define STR_V "V" 861*22dc650dSSadaf Ebrahimi #define STR_W "W" 862*22dc650dSSadaf Ebrahimi #define STR_X "X" 863*22dc650dSSadaf Ebrahimi #define STR_Y "Y" 864*22dc650dSSadaf Ebrahimi #define STR_Z "Z" 865*22dc650dSSadaf Ebrahimi #define STR_LEFT_SQUARE_BRACKET "[" 866*22dc650dSSadaf Ebrahimi #define STR_BACKSLASH "\\" 867*22dc650dSSadaf Ebrahimi #define STR_RIGHT_SQUARE_BRACKET "]" 868*22dc650dSSadaf Ebrahimi #define STR_CIRCUMFLEX_ACCENT "^" 869*22dc650dSSadaf Ebrahimi #define STR_UNDERSCORE "_" 870*22dc650dSSadaf Ebrahimi #define STR_GRAVE_ACCENT "`" 871*22dc650dSSadaf Ebrahimi #define STR_a "a" 872*22dc650dSSadaf Ebrahimi #define STR_b "b" 873*22dc650dSSadaf Ebrahimi #define STR_c "c" 874*22dc650dSSadaf Ebrahimi #define STR_d "d" 875*22dc650dSSadaf Ebrahimi #define STR_e "e" 876*22dc650dSSadaf Ebrahimi #define STR_f "f" 877*22dc650dSSadaf Ebrahimi #define STR_g "g" 878*22dc650dSSadaf Ebrahimi #define STR_h "h" 879*22dc650dSSadaf Ebrahimi #define STR_i "i" 880*22dc650dSSadaf Ebrahimi #define STR_j "j" 881*22dc650dSSadaf Ebrahimi #define STR_k "k" 882*22dc650dSSadaf Ebrahimi #define STR_l "l" 883*22dc650dSSadaf Ebrahimi #define STR_m "m" 884*22dc650dSSadaf Ebrahimi #define STR_n "n" 885*22dc650dSSadaf Ebrahimi #define STR_o "o" 886*22dc650dSSadaf Ebrahimi #define STR_p "p" 887*22dc650dSSadaf Ebrahimi #define STR_q "q" 888*22dc650dSSadaf Ebrahimi #define STR_r "r" 889*22dc650dSSadaf Ebrahimi #define STR_s "s" 890*22dc650dSSadaf Ebrahimi #define STR_t "t" 891*22dc650dSSadaf Ebrahimi #define STR_u "u" 892*22dc650dSSadaf Ebrahimi #define STR_v "v" 893*22dc650dSSadaf Ebrahimi #define STR_w "w" 894*22dc650dSSadaf Ebrahimi #define STR_x "x" 895*22dc650dSSadaf Ebrahimi #define STR_y "y" 896*22dc650dSSadaf Ebrahimi #define STR_z "z" 897*22dc650dSSadaf Ebrahimi #define STR_LEFT_CURLY_BRACKET "{" 898*22dc650dSSadaf Ebrahimi #define STR_VERTICAL_LINE "|" 899*22dc650dSSadaf Ebrahimi #define STR_RIGHT_CURLY_BRACKET "}" 900*22dc650dSSadaf Ebrahimi #define STR_TILDE "~" 901*22dc650dSSadaf Ebrahimi 902*22dc650dSSadaf Ebrahimi #define STRING_ACCEPT0 "ACCEPT\0" 903*22dc650dSSadaf Ebrahimi #define STRING_COMMIT0 "COMMIT\0" 904*22dc650dSSadaf Ebrahimi #define STRING_F0 "F\0" 905*22dc650dSSadaf Ebrahimi #define STRING_FAIL0 "FAIL\0" 906*22dc650dSSadaf Ebrahimi #define STRING_MARK0 "MARK\0" 907*22dc650dSSadaf Ebrahimi #define STRING_PRUNE0 "PRUNE\0" 908*22dc650dSSadaf Ebrahimi #define STRING_SKIP0 "SKIP\0" 909*22dc650dSSadaf Ebrahimi #define STRING_THEN "THEN" 910*22dc650dSSadaf Ebrahimi 911*22dc650dSSadaf Ebrahimi #define STRING_atomic0 "atomic\0" 912*22dc650dSSadaf Ebrahimi #define STRING_pla0 "pla\0" 913*22dc650dSSadaf Ebrahimi #define STRING_plb0 "plb\0" 914*22dc650dSSadaf Ebrahimi #define STRING_napla0 "napla\0" 915*22dc650dSSadaf Ebrahimi #define STRING_naplb0 "naplb\0" 916*22dc650dSSadaf Ebrahimi #define STRING_nla0 "nla\0" 917*22dc650dSSadaf Ebrahimi #define STRING_nlb0 "nlb\0" 918*22dc650dSSadaf Ebrahimi #define STRING_sr0 "sr\0" 919*22dc650dSSadaf Ebrahimi #define STRING_asr0 "asr\0" 920*22dc650dSSadaf Ebrahimi #define STRING_positive_lookahead0 "positive_lookahead\0" 921*22dc650dSSadaf Ebrahimi #define STRING_positive_lookbehind0 "positive_lookbehind\0" 922*22dc650dSSadaf Ebrahimi #define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0" 923*22dc650dSSadaf Ebrahimi #define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0" 924*22dc650dSSadaf Ebrahimi #define STRING_negative_lookahead0 "negative_lookahead\0" 925*22dc650dSSadaf Ebrahimi #define STRING_negative_lookbehind0 "negative_lookbehind\0" 926*22dc650dSSadaf Ebrahimi #define STRING_script_run0 "script_run\0" 927*22dc650dSSadaf Ebrahimi #define STRING_atomic_script_run "atomic_script_run" 928*22dc650dSSadaf Ebrahimi 929*22dc650dSSadaf Ebrahimi #define STRING_alpha0 "alpha\0" 930*22dc650dSSadaf Ebrahimi #define STRING_lower0 "lower\0" 931*22dc650dSSadaf Ebrahimi #define STRING_upper0 "upper\0" 932*22dc650dSSadaf Ebrahimi #define STRING_alnum0 "alnum\0" 933*22dc650dSSadaf Ebrahimi #define STRING_ascii0 "ascii\0" 934*22dc650dSSadaf Ebrahimi #define STRING_blank0 "blank\0" 935*22dc650dSSadaf Ebrahimi #define STRING_cntrl0 "cntrl\0" 936*22dc650dSSadaf Ebrahimi #define STRING_digit0 "digit\0" 937*22dc650dSSadaf Ebrahimi #define STRING_graph0 "graph\0" 938*22dc650dSSadaf Ebrahimi #define STRING_print0 "print\0" 939*22dc650dSSadaf Ebrahimi #define STRING_punct0 "punct\0" 940*22dc650dSSadaf Ebrahimi #define STRING_space0 "space\0" 941*22dc650dSSadaf Ebrahimi #define STRING_word0 "word\0" 942*22dc650dSSadaf Ebrahimi #define STRING_xdigit "xdigit" 943*22dc650dSSadaf Ebrahimi 944*22dc650dSSadaf Ebrahimi #define STRING_DEFINE "DEFINE" 945*22dc650dSSadaf Ebrahimi #define STRING_VERSION "VERSION" 946*22dc650dSSadaf Ebrahimi #define STRING_WEIRD_STARTWORD "[:<:]]" 947*22dc650dSSadaf Ebrahimi #define STRING_WEIRD_ENDWORD "[:>:]]" 948*22dc650dSSadaf Ebrahimi 949*22dc650dSSadaf Ebrahimi #define STRING_CR_RIGHTPAR "CR)" 950*22dc650dSSadaf Ebrahimi #define STRING_LF_RIGHTPAR "LF)" 951*22dc650dSSadaf Ebrahimi #define STRING_CRLF_RIGHTPAR "CRLF)" 952*22dc650dSSadaf Ebrahimi #define STRING_ANY_RIGHTPAR "ANY)" 953*22dc650dSSadaf Ebrahimi #define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" 954*22dc650dSSadaf Ebrahimi #define STRING_NUL_RIGHTPAR "NUL)" 955*22dc650dSSadaf Ebrahimi #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" 956*22dc650dSSadaf Ebrahimi #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" 957*22dc650dSSadaf Ebrahimi #define STRING_UTF8_RIGHTPAR "UTF8)" 958*22dc650dSSadaf Ebrahimi #define STRING_UTF16_RIGHTPAR "UTF16)" 959*22dc650dSSadaf Ebrahimi #define STRING_UTF32_RIGHTPAR "UTF32)" 960*22dc650dSSadaf Ebrahimi #define STRING_UTF_RIGHTPAR "UTF)" 961*22dc650dSSadaf Ebrahimi #define STRING_UCP_RIGHTPAR "UCP)" 962*22dc650dSSadaf Ebrahimi #define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)" 963*22dc650dSSadaf Ebrahimi #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)" 964*22dc650dSSadaf Ebrahimi #define STRING_NO_JIT_RIGHTPAR "NO_JIT)" 965*22dc650dSSadaf Ebrahimi #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" 966*22dc650dSSadaf Ebrahimi #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" 967*22dc650dSSadaf Ebrahimi #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" 968*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP=" 969*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" 970*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" 971*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" 972*22dc650dSSadaf Ebrahimi #define STRING_MARK "MARK" 973*22dc650dSSadaf Ebrahimi 974*22dc650dSSadaf Ebrahimi #define STRING_bc "bc" 975*22dc650dSSadaf Ebrahimi #define STRING_bidiclass "bidiclass" 976*22dc650dSSadaf Ebrahimi #define STRING_sc "sc" 977*22dc650dSSadaf Ebrahimi #define STRING_script "script" 978*22dc650dSSadaf Ebrahimi #define STRING_scriptextensions "scriptextensions" 979*22dc650dSSadaf Ebrahimi #define STRING_scx "scx" 980*22dc650dSSadaf Ebrahimi 981*22dc650dSSadaf Ebrahimi #else /* SUPPORT_UNICODE */ 982*22dc650dSSadaf Ebrahimi 983*22dc650dSSadaf Ebrahimi /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This 984*22dc650dSSadaf Ebrahimi works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode 985*22dc650dSSadaf Ebrahimi only. */ 986*22dc650dSSadaf Ebrahimi 987*22dc650dSSadaf Ebrahimi #define CHAR_HT '\011' 988*22dc650dSSadaf Ebrahimi #define CHAR_VT '\013' 989*22dc650dSSadaf Ebrahimi #define CHAR_FF '\014' 990*22dc650dSSadaf Ebrahimi #define CHAR_CR '\015' 991*22dc650dSSadaf Ebrahimi #define CHAR_LF '\012' 992*22dc650dSSadaf Ebrahimi #define CHAR_NL CHAR_LF 993*22dc650dSSadaf Ebrahimi #define CHAR_NEL ((unsigned char)'\x85') 994*22dc650dSSadaf Ebrahimi #define CHAR_BS '\010' 995*22dc650dSSadaf Ebrahimi #define CHAR_BEL '\007' 996*22dc650dSSadaf Ebrahimi #define CHAR_ESC '\033' 997*22dc650dSSadaf Ebrahimi #define CHAR_DEL '\177' 998*22dc650dSSadaf Ebrahimi 999*22dc650dSSadaf Ebrahimi #define CHAR_NUL '\0' 1000*22dc650dSSadaf Ebrahimi #define CHAR_SPACE '\040' 1001*22dc650dSSadaf Ebrahimi #define CHAR_EXCLAMATION_MARK '\041' 1002*22dc650dSSadaf Ebrahimi #define CHAR_QUOTATION_MARK '\042' 1003*22dc650dSSadaf Ebrahimi #define CHAR_NUMBER_SIGN '\043' 1004*22dc650dSSadaf Ebrahimi #define CHAR_DOLLAR_SIGN '\044' 1005*22dc650dSSadaf Ebrahimi #define CHAR_PERCENT_SIGN '\045' 1006*22dc650dSSadaf Ebrahimi #define CHAR_AMPERSAND '\046' 1007*22dc650dSSadaf Ebrahimi #define CHAR_APOSTROPHE '\047' 1008*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_PARENTHESIS '\050' 1009*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_PARENTHESIS '\051' 1010*22dc650dSSadaf Ebrahimi #define CHAR_ASTERISK '\052' 1011*22dc650dSSadaf Ebrahimi #define CHAR_PLUS '\053' 1012*22dc650dSSadaf Ebrahimi #define CHAR_COMMA '\054' 1013*22dc650dSSadaf Ebrahimi #define CHAR_MINUS '\055' 1014*22dc650dSSadaf Ebrahimi #define CHAR_DOT '\056' 1015*22dc650dSSadaf Ebrahimi #define CHAR_SLASH '\057' 1016*22dc650dSSadaf Ebrahimi #define CHAR_0 '\060' 1017*22dc650dSSadaf Ebrahimi #define CHAR_1 '\061' 1018*22dc650dSSadaf Ebrahimi #define CHAR_2 '\062' 1019*22dc650dSSadaf Ebrahimi #define CHAR_3 '\063' 1020*22dc650dSSadaf Ebrahimi #define CHAR_4 '\064' 1021*22dc650dSSadaf Ebrahimi #define CHAR_5 '\065' 1022*22dc650dSSadaf Ebrahimi #define CHAR_6 '\066' 1023*22dc650dSSadaf Ebrahimi #define CHAR_7 '\067' 1024*22dc650dSSadaf Ebrahimi #define CHAR_8 '\070' 1025*22dc650dSSadaf Ebrahimi #define CHAR_9 '\071' 1026*22dc650dSSadaf Ebrahimi #define CHAR_COLON '\072' 1027*22dc650dSSadaf Ebrahimi #define CHAR_SEMICOLON '\073' 1028*22dc650dSSadaf Ebrahimi #define CHAR_LESS_THAN_SIGN '\074' 1029*22dc650dSSadaf Ebrahimi #define CHAR_EQUALS_SIGN '\075' 1030*22dc650dSSadaf Ebrahimi #define CHAR_GREATER_THAN_SIGN '\076' 1031*22dc650dSSadaf Ebrahimi #define CHAR_QUESTION_MARK '\077' 1032*22dc650dSSadaf Ebrahimi #define CHAR_COMMERCIAL_AT '\100' 1033*22dc650dSSadaf Ebrahimi #define CHAR_A '\101' 1034*22dc650dSSadaf Ebrahimi #define CHAR_B '\102' 1035*22dc650dSSadaf Ebrahimi #define CHAR_C '\103' 1036*22dc650dSSadaf Ebrahimi #define CHAR_D '\104' 1037*22dc650dSSadaf Ebrahimi #define CHAR_E '\105' 1038*22dc650dSSadaf Ebrahimi #define CHAR_F '\106' 1039*22dc650dSSadaf Ebrahimi #define CHAR_G '\107' 1040*22dc650dSSadaf Ebrahimi #define CHAR_H '\110' 1041*22dc650dSSadaf Ebrahimi #define CHAR_I '\111' 1042*22dc650dSSadaf Ebrahimi #define CHAR_J '\112' 1043*22dc650dSSadaf Ebrahimi #define CHAR_K '\113' 1044*22dc650dSSadaf Ebrahimi #define CHAR_L '\114' 1045*22dc650dSSadaf Ebrahimi #define CHAR_M '\115' 1046*22dc650dSSadaf Ebrahimi #define CHAR_N '\116' 1047*22dc650dSSadaf Ebrahimi #define CHAR_O '\117' 1048*22dc650dSSadaf Ebrahimi #define CHAR_P '\120' 1049*22dc650dSSadaf Ebrahimi #define CHAR_Q '\121' 1050*22dc650dSSadaf Ebrahimi #define CHAR_R '\122' 1051*22dc650dSSadaf Ebrahimi #define CHAR_S '\123' 1052*22dc650dSSadaf Ebrahimi #define CHAR_T '\124' 1053*22dc650dSSadaf Ebrahimi #define CHAR_U '\125' 1054*22dc650dSSadaf Ebrahimi #define CHAR_V '\126' 1055*22dc650dSSadaf Ebrahimi #define CHAR_W '\127' 1056*22dc650dSSadaf Ebrahimi #define CHAR_X '\130' 1057*22dc650dSSadaf Ebrahimi #define CHAR_Y '\131' 1058*22dc650dSSadaf Ebrahimi #define CHAR_Z '\132' 1059*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_SQUARE_BRACKET '\133' 1060*22dc650dSSadaf Ebrahimi #define CHAR_BACKSLASH '\134' 1061*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_SQUARE_BRACKET '\135' 1062*22dc650dSSadaf Ebrahimi #define CHAR_CIRCUMFLEX_ACCENT '\136' 1063*22dc650dSSadaf Ebrahimi #define CHAR_UNDERSCORE '\137' 1064*22dc650dSSadaf Ebrahimi #define CHAR_GRAVE_ACCENT '\140' 1065*22dc650dSSadaf Ebrahimi #define CHAR_a '\141' 1066*22dc650dSSadaf Ebrahimi #define CHAR_b '\142' 1067*22dc650dSSadaf Ebrahimi #define CHAR_c '\143' 1068*22dc650dSSadaf Ebrahimi #define CHAR_d '\144' 1069*22dc650dSSadaf Ebrahimi #define CHAR_e '\145' 1070*22dc650dSSadaf Ebrahimi #define CHAR_f '\146' 1071*22dc650dSSadaf Ebrahimi #define CHAR_g '\147' 1072*22dc650dSSadaf Ebrahimi #define CHAR_h '\150' 1073*22dc650dSSadaf Ebrahimi #define CHAR_i '\151' 1074*22dc650dSSadaf Ebrahimi #define CHAR_j '\152' 1075*22dc650dSSadaf Ebrahimi #define CHAR_k '\153' 1076*22dc650dSSadaf Ebrahimi #define CHAR_l '\154' 1077*22dc650dSSadaf Ebrahimi #define CHAR_m '\155' 1078*22dc650dSSadaf Ebrahimi #define CHAR_n '\156' 1079*22dc650dSSadaf Ebrahimi #define CHAR_o '\157' 1080*22dc650dSSadaf Ebrahimi #define CHAR_p '\160' 1081*22dc650dSSadaf Ebrahimi #define CHAR_q '\161' 1082*22dc650dSSadaf Ebrahimi #define CHAR_r '\162' 1083*22dc650dSSadaf Ebrahimi #define CHAR_s '\163' 1084*22dc650dSSadaf Ebrahimi #define CHAR_t '\164' 1085*22dc650dSSadaf Ebrahimi #define CHAR_u '\165' 1086*22dc650dSSadaf Ebrahimi #define CHAR_v '\166' 1087*22dc650dSSadaf Ebrahimi #define CHAR_w '\167' 1088*22dc650dSSadaf Ebrahimi #define CHAR_x '\170' 1089*22dc650dSSadaf Ebrahimi #define CHAR_y '\171' 1090*22dc650dSSadaf Ebrahimi #define CHAR_z '\172' 1091*22dc650dSSadaf Ebrahimi #define CHAR_LEFT_CURLY_BRACKET '\173' 1092*22dc650dSSadaf Ebrahimi #define CHAR_VERTICAL_LINE '\174' 1093*22dc650dSSadaf Ebrahimi #define CHAR_RIGHT_CURLY_BRACKET '\175' 1094*22dc650dSSadaf Ebrahimi #define CHAR_TILDE '\176' 1095*22dc650dSSadaf Ebrahimi #define CHAR_NBSP ((unsigned char)'\xa0') 1096*22dc650dSSadaf Ebrahimi 1097*22dc650dSSadaf Ebrahimi #define STR_HT "\011" 1098*22dc650dSSadaf Ebrahimi #define STR_VT "\013" 1099*22dc650dSSadaf Ebrahimi #define STR_FF "\014" 1100*22dc650dSSadaf Ebrahimi #define STR_CR "\015" 1101*22dc650dSSadaf Ebrahimi #define STR_NL "\012" 1102*22dc650dSSadaf Ebrahimi #define STR_BS "\010" 1103*22dc650dSSadaf Ebrahimi #define STR_BEL "\007" 1104*22dc650dSSadaf Ebrahimi #define STR_ESC "\033" 1105*22dc650dSSadaf Ebrahimi #define STR_DEL "\177" 1106*22dc650dSSadaf Ebrahimi 1107*22dc650dSSadaf Ebrahimi #define STR_SPACE "\040" 1108*22dc650dSSadaf Ebrahimi #define STR_EXCLAMATION_MARK "\041" 1109*22dc650dSSadaf Ebrahimi #define STR_QUOTATION_MARK "\042" 1110*22dc650dSSadaf Ebrahimi #define STR_NUMBER_SIGN "\043" 1111*22dc650dSSadaf Ebrahimi #define STR_DOLLAR_SIGN "\044" 1112*22dc650dSSadaf Ebrahimi #define STR_PERCENT_SIGN "\045" 1113*22dc650dSSadaf Ebrahimi #define STR_AMPERSAND "\046" 1114*22dc650dSSadaf Ebrahimi #define STR_APOSTROPHE "\047" 1115*22dc650dSSadaf Ebrahimi #define STR_LEFT_PARENTHESIS "\050" 1116*22dc650dSSadaf Ebrahimi #define STR_RIGHT_PARENTHESIS "\051" 1117*22dc650dSSadaf Ebrahimi #define STR_ASTERISK "\052" 1118*22dc650dSSadaf Ebrahimi #define STR_PLUS "\053" 1119*22dc650dSSadaf Ebrahimi #define STR_COMMA "\054" 1120*22dc650dSSadaf Ebrahimi #define STR_MINUS "\055" 1121*22dc650dSSadaf Ebrahimi #define STR_DOT "\056" 1122*22dc650dSSadaf Ebrahimi #define STR_SLASH "\057" 1123*22dc650dSSadaf Ebrahimi #define STR_0 "\060" 1124*22dc650dSSadaf Ebrahimi #define STR_1 "\061" 1125*22dc650dSSadaf Ebrahimi #define STR_2 "\062" 1126*22dc650dSSadaf Ebrahimi #define STR_3 "\063" 1127*22dc650dSSadaf Ebrahimi #define STR_4 "\064" 1128*22dc650dSSadaf Ebrahimi #define STR_5 "\065" 1129*22dc650dSSadaf Ebrahimi #define STR_6 "\066" 1130*22dc650dSSadaf Ebrahimi #define STR_7 "\067" 1131*22dc650dSSadaf Ebrahimi #define STR_8 "\070" 1132*22dc650dSSadaf Ebrahimi #define STR_9 "\071" 1133*22dc650dSSadaf Ebrahimi #define STR_COLON "\072" 1134*22dc650dSSadaf Ebrahimi #define STR_SEMICOLON "\073" 1135*22dc650dSSadaf Ebrahimi #define STR_LESS_THAN_SIGN "\074" 1136*22dc650dSSadaf Ebrahimi #define STR_EQUALS_SIGN "\075" 1137*22dc650dSSadaf Ebrahimi #define STR_GREATER_THAN_SIGN "\076" 1138*22dc650dSSadaf Ebrahimi #define STR_QUESTION_MARK "\077" 1139*22dc650dSSadaf Ebrahimi #define STR_COMMERCIAL_AT "\100" 1140*22dc650dSSadaf Ebrahimi #define STR_A "\101" 1141*22dc650dSSadaf Ebrahimi #define STR_B "\102" 1142*22dc650dSSadaf Ebrahimi #define STR_C "\103" 1143*22dc650dSSadaf Ebrahimi #define STR_D "\104" 1144*22dc650dSSadaf Ebrahimi #define STR_E "\105" 1145*22dc650dSSadaf Ebrahimi #define STR_F "\106" 1146*22dc650dSSadaf Ebrahimi #define STR_G "\107" 1147*22dc650dSSadaf Ebrahimi #define STR_H "\110" 1148*22dc650dSSadaf Ebrahimi #define STR_I "\111" 1149*22dc650dSSadaf Ebrahimi #define STR_J "\112" 1150*22dc650dSSadaf Ebrahimi #define STR_K "\113" 1151*22dc650dSSadaf Ebrahimi #define STR_L "\114" 1152*22dc650dSSadaf Ebrahimi #define STR_M "\115" 1153*22dc650dSSadaf Ebrahimi #define STR_N "\116" 1154*22dc650dSSadaf Ebrahimi #define STR_O "\117" 1155*22dc650dSSadaf Ebrahimi #define STR_P "\120" 1156*22dc650dSSadaf Ebrahimi #define STR_Q "\121" 1157*22dc650dSSadaf Ebrahimi #define STR_R "\122" 1158*22dc650dSSadaf Ebrahimi #define STR_S "\123" 1159*22dc650dSSadaf Ebrahimi #define STR_T "\124" 1160*22dc650dSSadaf Ebrahimi #define STR_U "\125" 1161*22dc650dSSadaf Ebrahimi #define STR_V "\126" 1162*22dc650dSSadaf Ebrahimi #define STR_W "\127" 1163*22dc650dSSadaf Ebrahimi #define STR_X "\130" 1164*22dc650dSSadaf Ebrahimi #define STR_Y "\131" 1165*22dc650dSSadaf Ebrahimi #define STR_Z "\132" 1166*22dc650dSSadaf Ebrahimi #define STR_LEFT_SQUARE_BRACKET "\133" 1167*22dc650dSSadaf Ebrahimi #define STR_BACKSLASH "\134" 1168*22dc650dSSadaf Ebrahimi #define STR_RIGHT_SQUARE_BRACKET "\135" 1169*22dc650dSSadaf Ebrahimi #define STR_CIRCUMFLEX_ACCENT "\136" 1170*22dc650dSSadaf Ebrahimi #define STR_UNDERSCORE "\137" 1171*22dc650dSSadaf Ebrahimi #define STR_GRAVE_ACCENT "\140" 1172*22dc650dSSadaf Ebrahimi #define STR_a "\141" 1173*22dc650dSSadaf Ebrahimi #define STR_b "\142" 1174*22dc650dSSadaf Ebrahimi #define STR_c "\143" 1175*22dc650dSSadaf Ebrahimi #define STR_d "\144" 1176*22dc650dSSadaf Ebrahimi #define STR_e "\145" 1177*22dc650dSSadaf Ebrahimi #define STR_f "\146" 1178*22dc650dSSadaf Ebrahimi #define STR_g "\147" 1179*22dc650dSSadaf Ebrahimi #define STR_h "\150" 1180*22dc650dSSadaf Ebrahimi #define STR_i "\151" 1181*22dc650dSSadaf Ebrahimi #define STR_j "\152" 1182*22dc650dSSadaf Ebrahimi #define STR_k "\153" 1183*22dc650dSSadaf Ebrahimi #define STR_l "\154" 1184*22dc650dSSadaf Ebrahimi #define STR_m "\155" 1185*22dc650dSSadaf Ebrahimi #define STR_n "\156" 1186*22dc650dSSadaf Ebrahimi #define STR_o "\157" 1187*22dc650dSSadaf Ebrahimi #define STR_p "\160" 1188*22dc650dSSadaf Ebrahimi #define STR_q "\161" 1189*22dc650dSSadaf Ebrahimi #define STR_r "\162" 1190*22dc650dSSadaf Ebrahimi #define STR_s "\163" 1191*22dc650dSSadaf Ebrahimi #define STR_t "\164" 1192*22dc650dSSadaf Ebrahimi #define STR_u "\165" 1193*22dc650dSSadaf Ebrahimi #define STR_v "\166" 1194*22dc650dSSadaf Ebrahimi #define STR_w "\167" 1195*22dc650dSSadaf Ebrahimi #define STR_x "\170" 1196*22dc650dSSadaf Ebrahimi #define STR_y "\171" 1197*22dc650dSSadaf Ebrahimi #define STR_z "\172" 1198*22dc650dSSadaf Ebrahimi #define STR_LEFT_CURLY_BRACKET "\173" 1199*22dc650dSSadaf Ebrahimi #define STR_VERTICAL_LINE "\174" 1200*22dc650dSSadaf Ebrahimi #define STR_RIGHT_CURLY_BRACKET "\175" 1201*22dc650dSSadaf Ebrahimi #define STR_TILDE "\176" 1202*22dc650dSSadaf Ebrahimi 1203*22dc650dSSadaf Ebrahimi #define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0" 1204*22dc650dSSadaf Ebrahimi #define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0" 1205*22dc650dSSadaf Ebrahimi #define STRING_F0 STR_F "\0" 1206*22dc650dSSadaf Ebrahimi #define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0" 1207*22dc650dSSadaf Ebrahimi #define STRING_MARK0 STR_M STR_A STR_R STR_K "\0" 1208*22dc650dSSadaf Ebrahimi #define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0" 1209*22dc650dSSadaf Ebrahimi #define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0" 1210*22dc650dSSadaf Ebrahimi #define STRING_THEN STR_T STR_H STR_E STR_N 1211*22dc650dSSadaf Ebrahimi 1212*22dc650dSSadaf Ebrahimi #define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0" 1213*22dc650dSSadaf Ebrahimi #define STRING_pla0 STR_p STR_l STR_a "\0" 1214*22dc650dSSadaf Ebrahimi #define STRING_plb0 STR_p STR_l STR_b "\0" 1215*22dc650dSSadaf Ebrahimi #define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0" 1216*22dc650dSSadaf Ebrahimi #define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0" 1217*22dc650dSSadaf Ebrahimi #define STRING_nla0 STR_n STR_l STR_a "\0" 1218*22dc650dSSadaf Ebrahimi #define STRING_nlb0 STR_n STR_l STR_b "\0" 1219*22dc650dSSadaf Ebrahimi #define STRING_sr0 STR_s STR_r "\0" 1220*22dc650dSSadaf Ebrahimi #define STRING_asr0 STR_a STR_s STR_r "\0" 1221*22dc650dSSadaf Ebrahimi #define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" 1222*22dc650dSSadaf Ebrahimi #define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" 1223*22dc650dSSadaf Ebrahimi #define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" 1224*22dc650dSSadaf Ebrahimi #define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" 1225*22dc650dSSadaf Ebrahimi #define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" 1226*22dc650dSSadaf Ebrahimi #define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" 1227*22dc650dSSadaf Ebrahimi #define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0" 1228*22dc650dSSadaf Ebrahimi #define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n 1229*22dc650dSSadaf Ebrahimi 1230*22dc650dSSadaf Ebrahimi #define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" 1231*22dc650dSSadaf Ebrahimi #define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" 1232*22dc650dSSadaf Ebrahimi #define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0" 1233*22dc650dSSadaf Ebrahimi #define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0" 1234*22dc650dSSadaf Ebrahimi #define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" 1235*22dc650dSSadaf Ebrahimi #define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0" 1236*22dc650dSSadaf Ebrahimi #define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0" 1237*22dc650dSSadaf Ebrahimi #define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0" 1238*22dc650dSSadaf Ebrahimi #define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0" 1239*22dc650dSSadaf Ebrahimi #define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0" 1240*22dc650dSSadaf Ebrahimi #define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0" 1241*22dc650dSSadaf Ebrahimi #define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0" 1242*22dc650dSSadaf Ebrahimi #define STRING_word0 STR_w STR_o STR_r STR_d "\0" 1243*22dc650dSSadaf Ebrahimi #define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t 1244*22dc650dSSadaf Ebrahimi 1245*22dc650dSSadaf Ebrahimi #define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E 1246*22dc650dSSadaf Ebrahimi #define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N 1247*22dc650dSSadaf Ebrahimi #define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET 1248*22dc650dSSadaf Ebrahimi #define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET 1249*22dc650dSSadaf Ebrahimi 1250*22dc650dSSadaf Ebrahimi #define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS 1251*22dc650dSSadaf Ebrahimi #define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS 1252*22dc650dSSadaf Ebrahimi #define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS 1253*22dc650dSSadaf Ebrahimi #define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS 1254*22dc650dSSadaf Ebrahimi #define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS 1255*22dc650dSSadaf Ebrahimi #define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS 1256*22dc650dSSadaf Ebrahimi #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS 1257*22dc650dSSadaf Ebrahimi #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS 1258*22dc650dSSadaf Ebrahimi #define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS 1259*22dc650dSSadaf Ebrahimi #define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS 1260*22dc650dSSadaf Ebrahimi #define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS 1261*22dc650dSSadaf Ebrahimi #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS 1262*22dc650dSSadaf Ebrahimi #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS 1263*22dc650dSSadaf Ebrahimi #define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS 1264*22dc650dSSadaf Ebrahimi #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS 1265*22dc650dSSadaf Ebrahimi #define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS 1266*22dc650dSSadaf Ebrahimi #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS 1267*22dc650dSSadaf Ebrahimi #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS 1268*22dc650dSSadaf Ebrahimi #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS 1269*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN 1270*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN 1271*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN 1272*22dc650dSSadaf Ebrahimi #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN 1273*22dc650dSSadaf Ebrahimi #define STRING_MARK STR_M STR_A STR_R STR_K 1274*22dc650dSSadaf Ebrahimi 1275*22dc650dSSadaf Ebrahimi #define STRING_bc STR_b STR_c 1276*22dc650dSSadaf Ebrahimi #define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s 1277*22dc650dSSadaf Ebrahimi #define STRING_sc STR_s STR_c 1278*22dc650dSSadaf Ebrahimi #define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t 1279*22dc650dSSadaf Ebrahimi #define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s 1280*22dc650dSSadaf Ebrahimi #define STRING_scx STR_s STR_c STR_x 1281*22dc650dSSadaf Ebrahimi 1282*22dc650dSSadaf Ebrahimi 1283*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */ 1284*22dc650dSSadaf Ebrahimi 1285*22dc650dSSadaf Ebrahimi /* -------------------- End of character and string names -------------------*/ 1286*22dc650dSSadaf Ebrahimi 1287*22dc650dSSadaf Ebrahimi /* -------------------- Definitions for compiled patterns -------------------*/ 1288*22dc650dSSadaf Ebrahimi 1289*22dc650dSSadaf Ebrahimi /* Codes for different types of Unicode property. If these definitions are 1290*22dc650dSSadaf Ebrahimi changed, the autopossessifying table in pcre2_auto_possess.c must be updated to 1291*22dc650dSSadaf Ebrahimi match. */ 1292*22dc650dSSadaf Ebrahimi 1293*22dc650dSSadaf Ebrahimi #define PT_ANY 0 /* Any property - matches all chars */ 1294*22dc650dSSadaf Ebrahimi #define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ 1295*22dc650dSSadaf Ebrahimi #define PT_GC 2 /* Specified general characteristic (e.g. L) */ 1296*22dc650dSSadaf Ebrahimi #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ 1297*22dc650dSSadaf Ebrahimi #define PT_SC 4 /* Script only (e.g. Han) */ 1298*22dc650dSSadaf Ebrahimi #define PT_SCX 5 /* Script extensions (includes SC) */ 1299*22dc650dSSadaf Ebrahimi #define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ 1300*22dc650dSSadaf Ebrahimi #define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ 1301*22dc650dSSadaf Ebrahimi #define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ 1302*22dc650dSSadaf Ebrahimi #define PT_WORD 9 /* Word - L, N, Mn, or Pc */ 1303*22dc650dSSadaf Ebrahimi #define PT_CLIST 10 /* Pseudo-property: match character list */ 1304*22dc650dSSadaf Ebrahimi #define PT_UCNC 11 /* Universal Character nameable character */ 1305*22dc650dSSadaf Ebrahimi #define PT_BIDICL 12 /* Specified bidi class */ 1306*22dc650dSSadaf Ebrahimi #define PT_BOOL 13 /* Boolean property */ 1307*22dc650dSSadaf Ebrahimi #define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ 1308*22dc650dSSadaf Ebrahimi 1309*22dc650dSSadaf Ebrahimi /* The following special properties are used only in XCLASS items, when POSIX 1310*22dc650dSSadaf Ebrahimi classes are specified and PCRE2_UCP is set - in other words, for Unicode 1311*22dc650dSSadaf Ebrahimi handling of these classes. They are not available via the \p or \P escapes like 1312*22dc650dSSadaf Ebrahimi those in the above list, and so they do not take part in the autopossessifying 1313*22dc650dSSadaf Ebrahimi table. */ 1314*22dc650dSSadaf Ebrahimi 1315*22dc650dSSadaf Ebrahimi #define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */ 1316*22dc650dSSadaf Ebrahimi #define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */ 1317*22dc650dSSadaf Ebrahimi #define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */ 1318*22dc650dSSadaf Ebrahimi #define PT_PXXDIGIT 17 /* [:xdigit:] - hex digits */ 1319*22dc650dSSadaf Ebrahimi 1320*22dc650dSSadaf Ebrahimi /* This value is used when parsing \p and \P escapes to indicate that neither 1321*22dc650dSSadaf Ebrahimi \p{script:...} nor \p{scx:...} has been encountered. */ 1322*22dc650dSSadaf Ebrahimi 1323*22dc650dSSadaf Ebrahimi #define PT_NOTSCRIPT 255 1324*22dc650dSSadaf Ebrahimi 1325*22dc650dSSadaf Ebrahimi /* Flag bits and data types for the extended class (OP_XCLASS) for classes that 1326*22dc650dSSadaf Ebrahimi contain characters with values greater than 255. */ 1327*22dc650dSSadaf Ebrahimi 1328*22dc650dSSadaf Ebrahimi #define XCL_NOT 0x01 /* Flag: this is a negative class */ 1329*22dc650dSSadaf Ebrahimi #define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ 1330*22dc650dSSadaf Ebrahimi #define XCL_HASPROP 0x04 /* Flag: property checks are present. */ 1331*22dc650dSSadaf Ebrahimi 1332*22dc650dSSadaf Ebrahimi #define XCL_END 0 /* Marks end of individual items */ 1333*22dc650dSSadaf Ebrahimi #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ 1334*22dc650dSSadaf Ebrahimi #define XCL_RANGE 2 /* A range (two multibyte chars) follows */ 1335*22dc650dSSadaf Ebrahimi #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ 1336*22dc650dSSadaf Ebrahimi #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ 1337*22dc650dSSadaf Ebrahimi 1338*22dc650dSSadaf Ebrahimi /* These are escaped items that aren't just an encoding of a particular data 1339*22dc650dSSadaf Ebrahimi value such as \n. They must have non-zero values, as check_escape() returns 0 1340*22dc650dSSadaf Ebrahimi for a data character. In the escapes[] table in pcre2_compile.c their values 1341*22dc650dSSadaf Ebrahimi are negated in order to distinguish them from data values. 1342*22dc650dSSadaf Ebrahimi 1343*22dc650dSSadaf Ebrahimi They must appear here in the same order as in the opcode definitions below, up 1344*22dc650dSSadaf Ebrahimi to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL 1345*22dc650dSSadaf Ebrahimi mode rather than an escape sequence. It is also used for [^] in JavaScript 1346*22dc650dSSadaf Ebrahimi compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves 1347*22dc650dSSadaf Ebrahimi like \N. 1348*22dc650dSSadaf Ebrahimi 1349*22dc650dSSadaf Ebrahimi ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not 1350*22dc650dSSadaf Ebrahimi followed by hex digits and }, in which case it should mean a literal "u" 1351*22dc650dSSadaf Ebrahimi followed by a literal "{". This hack is necessary for cases like \u{ 12} 1352*22dc650dSSadaf Ebrahimi because without it, this is interpreted as u{12} now that spaces are allowed in 1353*22dc650dSSadaf Ebrahimi quantifiers. 1354*22dc650dSSadaf Ebrahimi 1355*22dc650dSSadaf Ebrahimi Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in 1356*22dc650dSSadaf Ebrahimi check_escape(). There are tests in the code for an escape greater than ESC_b 1357*22dc650dSSadaf Ebrahimi and less than ESC_Z to detect the types that may be repeated. These are the 1358*22dc650dSSadaf Ebrahimi types that consume characters. If any new escapes are put in between that don't 1359*22dc650dSSadaf Ebrahimi consume a character, that code will have to change. */ 1360*22dc650dSSadaf Ebrahimi 1361*22dc650dSSadaf Ebrahimi enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, 1362*22dc650dSSadaf Ebrahimi ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, 1363*22dc650dSSadaf Ebrahimi ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, 1364*22dc650dSSadaf Ebrahimi ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub }; 1365*22dc650dSSadaf Ebrahimi 1366*22dc650dSSadaf Ebrahimi 1367*22dc650dSSadaf Ebrahimi /********************** Opcode definitions ******************/ 1368*22dc650dSSadaf Ebrahimi 1369*22dc650dSSadaf Ebrahimi /****** NOTE NOTE NOTE ****** 1370*22dc650dSSadaf Ebrahimi 1371*22dc650dSSadaf Ebrahimi Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in 1372*22dc650dSSadaf Ebrahimi order to the list of escapes immediately above. Furthermore, values up to 1373*22dc650dSSadaf Ebrahimi OP_DOLLM must not be changed without adjusting the table called autoposstab in 1374*22dc650dSSadaf Ebrahimi pcre2_auto_possess.c. 1375*22dc650dSSadaf Ebrahimi 1376*22dc650dSSadaf Ebrahimi Whenever this list is updated, the two macro definitions that follow must be 1377*22dc650dSSadaf Ebrahimi updated to match. The possessification table called "opcode_possessify" in 1378*22dc650dSSadaf Ebrahimi pcre2_compile.c must also be updated, and also the tables called "coptable" 1379*22dc650dSSadaf Ebrahimi and "poptable" in pcre2_dfa_match.c. 1380*22dc650dSSadaf Ebrahimi 1381*22dc650dSSadaf Ebrahimi ****** NOTE NOTE NOTE ******/ 1382*22dc650dSSadaf Ebrahimi 1383*22dc650dSSadaf Ebrahimi 1384*22dc650dSSadaf Ebrahimi /* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive, 1385*22dc650dSSadaf Ebrahimi are used in a table for deciding whether a repeated character type can be 1386*22dc650dSSadaf Ebrahimi auto-possessified. */ 1387*22dc650dSSadaf Ebrahimi 1388*22dc650dSSadaf Ebrahimi #define FIRST_AUTOTAB_OP OP_NOT_DIGIT 1389*22dc650dSSadaf Ebrahimi #define LAST_AUTOTAB_LEFT_OP OP_EXTUNI 1390*22dc650dSSadaf Ebrahimi #define LAST_AUTOTAB_RIGHT_OP OP_DOLLM 1391*22dc650dSSadaf Ebrahimi 1392*22dc650dSSadaf Ebrahimi enum { 1393*22dc650dSSadaf Ebrahimi OP_END, /* 0 End of pattern */ 1394*22dc650dSSadaf Ebrahimi 1395*22dc650dSSadaf Ebrahimi /* Values corresponding to backslashed metacharacters */ 1396*22dc650dSSadaf Ebrahimi 1397*22dc650dSSadaf Ebrahimi OP_SOD, /* 1 Start of data: \A */ 1398*22dc650dSSadaf Ebrahimi OP_SOM, /* 2 Start of match (subject + offset): \G */ 1399*22dc650dSSadaf Ebrahimi OP_SET_SOM, /* 3 Set start of match (\K) */ 1400*22dc650dSSadaf Ebrahimi OP_NOT_WORD_BOUNDARY, /* 4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */ 1401*22dc650dSSadaf Ebrahimi OP_WORD_BOUNDARY, /* 5 \b -- see also OP_UCP_WORD_BOUNDARY */ 1402*22dc650dSSadaf Ebrahimi OP_NOT_DIGIT, /* 6 \D */ 1403*22dc650dSSadaf Ebrahimi OP_DIGIT, /* 7 \d */ 1404*22dc650dSSadaf Ebrahimi OP_NOT_WHITESPACE, /* 8 \S */ 1405*22dc650dSSadaf Ebrahimi OP_WHITESPACE, /* 9 \s */ 1406*22dc650dSSadaf Ebrahimi OP_NOT_WORDCHAR, /* 10 \W */ 1407*22dc650dSSadaf Ebrahimi OP_WORDCHAR, /* 11 \w */ 1408*22dc650dSSadaf Ebrahimi 1409*22dc650dSSadaf Ebrahimi OP_ANY, /* 12 Match any character except newline (\N) */ 1410*22dc650dSSadaf Ebrahimi OP_ALLANY, /* 13 Match any character */ 1411*22dc650dSSadaf Ebrahimi OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */ 1412*22dc650dSSadaf Ebrahimi OP_NOTPROP, /* 15 \P (not Unicode property) */ 1413*22dc650dSSadaf Ebrahimi OP_PROP, /* 16 \p (Unicode property) */ 1414*22dc650dSSadaf Ebrahimi OP_ANYNL, /* 17 \R (any newline sequence) */ 1415*22dc650dSSadaf Ebrahimi OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */ 1416*22dc650dSSadaf Ebrahimi OP_HSPACE, /* 19 \h (horizontal whitespace) */ 1417*22dc650dSSadaf Ebrahimi OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */ 1418*22dc650dSSadaf Ebrahimi OP_VSPACE, /* 21 \v (vertical whitespace) */ 1419*22dc650dSSadaf Ebrahimi OP_EXTUNI, /* 22 \X (extended Unicode sequence */ 1420*22dc650dSSadaf Ebrahimi OP_EODN, /* 23 End of data or \n at end of data (\Z) */ 1421*22dc650dSSadaf Ebrahimi OP_EOD, /* 24 End of data (\z) */ 1422*22dc650dSSadaf Ebrahimi 1423*22dc650dSSadaf Ebrahimi /* Line end assertions */ 1424*22dc650dSSadaf Ebrahimi 1425*22dc650dSSadaf Ebrahimi OP_DOLL, /* 25 End of line - not multiline */ 1426*22dc650dSSadaf Ebrahimi OP_DOLLM, /* 26 End of line - multiline */ 1427*22dc650dSSadaf Ebrahimi OP_CIRC, /* 27 Start of line - not multiline */ 1428*22dc650dSSadaf Ebrahimi OP_CIRCM, /* 28 Start of line - multiline */ 1429*22dc650dSSadaf Ebrahimi 1430*22dc650dSSadaf Ebrahimi /* Single characters; caseful must precede the caseless ones, and these 1431*22dc650dSSadaf Ebrahimi must remain in this order, and adjacent. */ 1432*22dc650dSSadaf Ebrahimi 1433*22dc650dSSadaf Ebrahimi OP_CHAR, /* 29 Match one character, casefully */ 1434*22dc650dSSadaf Ebrahimi OP_CHARI, /* 30 Match one character, caselessly */ 1435*22dc650dSSadaf Ebrahimi OP_NOT, /* 31 Match one character, not the given one, casefully */ 1436*22dc650dSSadaf Ebrahimi OP_NOTI, /* 32 Match one character, not the given one, caselessly */ 1437*22dc650dSSadaf Ebrahimi 1438*22dc650dSSadaf Ebrahimi /* The following sets of 13 opcodes must always be kept in step because 1439*22dc650dSSadaf Ebrahimi the offset from the first one is used to generate the others. */ 1440*22dc650dSSadaf Ebrahimi 1441*22dc650dSSadaf Ebrahimi /* Repeated characters; caseful must precede the caseless ones */ 1442*22dc650dSSadaf Ebrahimi 1443*22dc650dSSadaf Ebrahimi OP_STAR, /* 33 The maximizing and minimizing versions of */ 1444*22dc650dSSadaf Ebrahimi OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ 1445*22dc650dSSadaf Ebrahimi OP_PLUS, /* 35 the minimizing one second. */ 1446*22dc650dSSadaf Ebrahimi OP_MINPLUS, /* 36 */ 1447*22dc650dSSadaf Ebrahimi OP_QUERY, /* 37 */ 1448*22dc650dSSadaf Ebrahimi OP_MINQUERY, /* 38 */ 1449*22dc650dSSadaf Ebrahimi 1450*22dc650dSSadaf Ebrahimi OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/ 1451*22dc650dSSadaf Ebrahimi OP_MINUPTO, /* 40 */ 1452*22dc650dSSadaf Ebrahimi OP_EXACT, /* 41 Exactly n matches */ 1453*22dc650dSSadaf Ebrahimi 1454*22dc650dSSadaf Ebrahimi OP_POSSTAR, /* 42 Possessified star, caseful */ 1455*22dc650dSSadaf Ebrahimi OP_POSPLUS, /* 43 Possessified plus, caseful */ 1456*22dc650dSSadaf Ebrahimi OP_POSQUERY, /* 44 Posesssified query, caseful */ 1457*22dc650dSSadaf Ebrahimi OP_POSUPTO, /* 45 Possessified upto, caseful */ 1458*22dc650dSSadaf Ebrahimi 1459*22dc650dSSadaf Ebrahimi /* Repeated characters; caseless must follow the caseful ones */ 1460*22dc650dSSadaf Ebrahimi 1461*22dc650dSSadaf Ebrahimi OP_STARI, /* 46 */ 1462*22dc650dSSadaf Ebrahimi OP_MINSTARI, /* 47 */ 1463*22dc650dSSadaf Ebrahimi OP_PLUSI, /* 48 */ 1464*22dc650dSSadaf Ebrahimi OP_MINPLUSI, /* 49 */ 1465*22dc650dSSadaf Ebrahimi OP_QUERYI, /* 50 */ 1466*22dc650dSSadaf Ebrahimi OP_MINQUERYI, /* 51 */ 1467*22dc650dSSadaf Ebrahimi 1468*22dc650dSSadaf Ebrahimi OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */ 1469*22dc650dSSadaf Ebrahimi OP_MINUPTOI, /* 53 */ 1470*22dc650dSSadaf Ebrahimi OP_EXACTI, /* 54 */ 1471*22dc650dSSadaf Ebrahimi 1472*22dc650dSSadaf Ebrahimi OP_POSSTARI, /* 55 Possessified star, caseless */ 1473*22dc650dSSadaf Ebrahimi OP_POSPLUSI, /* 56 Possessified plus, caseless */ 1474*22dc650dSSadaf Ebrahimi OP_POSQUERYI, /* 57 Posesssified query, caseless */ 1475*22dc650dSSadaf Ebrahimi OP_POSUPTOI, /* 58 Possessified upto, caseless */ 1476*22dc650dSSadaf Ebrahimi 1477*22dc650dSSadaf Ebrahimi /* The negated ones must follow the non-negated ones, and match them */ 1478*22dc650dSSadaf Ebrahimi /* Negated repeated character, caseful; must precede the caseless ones */ 1479*22dc650dSSadaf Ebrahimi 1480*22dc650dSSadaf Ebrahimi OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ 1481*22dc650dSSadaf Ebrahimi OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ 1482*22dc650dSSadaf Ebrahimi OP_NOTPLUS, /* 61 the minimizing one second. They must be in */ 1483*22dc650dSSadaf Ebrahimi OP_NOTMINPLUS, /* 62 exactly the same order as those above. */ 1484*22dc650dSSadaf Ebrahimi OP_NOTQUERY, /* 63 */ 1485*22dc650dSSadaf Ebrahimi OP_NOTMINQUERY, /* 64 */ 1486*22dc650dSSadaf Ebrahimi 1487*22dc650dSSadaf Ebrahimi OP_NOTUPTO, /* 65 From 0 to n matches, caseful */ 1488*22dc650dSSadaf Ebrahimi OP_NOTMINUPTO, /* 66 */ 1489*22dc650dSSadaf Ebrahimi OP_NOTEXACT, /* 67 Exactly n matches */ 1490*22dc650dSSadaf Ebrahimi 1491*22dc650dSSadaf Ebrahimi OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */ 1492*22dc650dSSadaf Ebrahimi OP_NOTPOSPLUS, /* 69 */ 1493*22dc650dSSadaf Ebrahimi OP_NOTPOSQUERY, /* 70 */ 1494*22dc650dSSadaf Ebrahimi OP_NOTPOSUPTO, /* 71 */ 1495*22dc650dSSadaf Ebrahimi 1496*22dc650dSSadaf Ebrahimi /* Negated repeated character, caseless; must follow the caseful ones */ 1497*22dc650dSSadaf Ebrahimi 1498*22dc650dSSadaf Ebrahimi OP_NOTSTARI, /* 72 */ 1499*22dc650dSSadaf Ebrahimi OP_NOTMINSTARI, /* 73 */ 1500*22dc650dSSadaf Ebrahimi OP_NOTPLUSI, /* 74 */ 1501*22dc650dSSadaf Ebrahimi OP_NOTMINPLUSI, /* 75 */ 1502*22dc650dSSadaf Ebrahimi OP_NOTQUERYI, /* 76 */ 1503*22dc650dSSadaf Ebrahimi OP_NOTMINQUERYI, /* 77 */ 1504*22dc650dSSadaf Ebrahimi 1505*22dc650dSSadaf Ebrahimi OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */ 1506*22dc650dSSadaf Ebrahimi OP_NOTMINUPTOI, /* 79 */ 1507*22dc650dSSadaf Ebrahimi OP_NOTEXACTI, /* 80 Exactly n matches */ 1508*22dc650dSSadaf Ebrahimi 1509*22dc650dSSadaf Ebrahimi OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */ 1510*22dc650dSSadaf Ebrahimi OP_NOTPOSPLUSI, /* 82 */ 1511*22dc650dSSadaf Ebrahimi OP_NOTPOSQUERYI, /* 83 */ 1512*22dc650dSSadaf Ebrahimi OP_NOTPOSUPTOI, /* 84 */ 1513*22dc650dSSadaf Ebrahimi 1514*22dc650dSSadaf Ebrahimi /* Character types */ 1515*22dc650dSSadaf Ebrahimi 1516*22dc650dSSadaf Ebrahimi OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ 1517*22dc650dSSadaf Ebrahimi OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ 1518*22dc650dSSadaf Ebrahimi OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */ 1519*22dc650dSSadaf Ebrahimi OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */ 1520*22dc650dSSadaf Ebrahimi OP_TYPEQUERY, /* 89 */ 1521*22dc650dSSadaf Ebrahimi OP_TYPEMINQUERY, /* 90 */ 1522*22dc650dSSadaf Ebrahimi 1523*22dc650dSSadaf Ebrahimi OP_TYPEUPTO, /* 91 From 0 to n matches */ 1524*22dc650dSSadaf Ebrahimi OP_TYPEMINUPTO, /* 92 */ 1525*22dc650dSSadaf Ebrahimi OP_TYPEEXACT, /* 93 Exactly n matches */ 1526*22dc650dSSadaf Ebrahimi 1527*22dc650dSSadaf Ebrahimi OP_TYPEPOSSTAR, /* 94 Possessified versions */ 1528*22dc650dSSadaf Ebrahimi OP_TYPEPOSPLUS, /* 95 */ 1529*22dc650dSSadaf Ebrahimi OP_TYPEPOSQUERY, /* 96 */ 1530*22dc650dSSadaf Ebrahimi OP_TYPEPOSUPTO, /* 97 */ 1531*22dc650dSSadaf Ebrahimi 1532*22dc650dSSadaf Ebrahimi /* These are used for character classes and back references; only the 1533*22dc650dSSadaf Ebrahimi first six are the same as the sets above. */ 1534*22dc650dSSadaf Ebrahimi 1535*22dc650dSSadaf Ebrahimi OP_CRSTAR, /* 98 The maximizing and minimizing versions of */ 1536*22dc650dSSadaf Ebrahimi OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */ 1537*22dc650dSSadaf Ebrahimi OP_CRPLUS, /* 100 the minimizing one second. These codes must */ 1538*22dc650dSSadaf Ebrahimi OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */ 1539*22dc650dSSadaf Ebrahimi OP_CRQUERY, /* 102 */ 1540*22dc650dSSadaf Ebrahimi OP_CRMINQUERY, /* 103 */ 1541*22dc650dSSadaf Ebrahimi 1542*22dc650dSSadaf Ebrahimi OP_CRRANGE, /* 104 These are different to the three sets above. */ 1543*22dc650dSSadaf Ebrahimi OP_CRMINRANGE, /* 105 */ 1544*22dc650dSSadaf Ebrahimi 1545*22dc650dSSadaf Ebrahimi OP_CRPOSSTAR, /* 106 Possessified versions */ 1546*22dc650dSSadaf Ebrahimi OP_CRPOSPLUS, /* 107 */ 1547*22dc650dSSadaf Ebrahimi OP_CRPOSQUERY, /* 108 */ 1548*22dc650dSSadaf Ebrahimi OP_CRPOSRANGE, /* 109 */ 1549*22dc650dSSadaf Ebrahimi 1550*22dc650dSSadaf Ebrahimi /* End of quantifier opcodes */ 1551*22dc650dSSadaf Ebrahimi 1552*22dc650dSSadaf Ebrahimi OP_CLASS, /* 110 Match a character class, chars < 256 only */ 1553*22dc650dSSadaf Ebrahimi OP_NCLASS, /* 111 Same, but the bitmap was created from a negative 1554*22dc650dSSadaf Ebrahimi class - the difference is relevant only when a 1555*22dc650dSSadaf Ebrahimi character > 255 is encountered. */ 1556*22dc650dSSadaf Ebrahimi OP_XCLASS, /* 112 Extended class for handling > 255 chars within the 1557*22dc650dSSadaf Ebrahimi class. This does both positive and negative. */ 1558*22dc650dSSadaf Ebrahimi OP_REF, /* 113 Match a back reference, casefully */ 1559*22dc650dSSadaf Ebrahimi OP_REFI, /* 114 Match a back reference, caselessly */ 1560*22dc650dSSadaf Ebrahimi OP_DNREF, /* 115 Match a duplicate name backref, casefully */ 1561*22dc650dSSadaf Ebrahimi OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ 1562*22dc650dSSadaf Ebrahimi OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ 1563*22dc650dSSadaf Ebrahimi OP_CALLOUT, /* 118 Call out to external function if provided */ 1564*22dc650dSSadaf Ebrahimi OP_CALLOUT_STR, /* 119 Call out with string argument */ 1565*22dc650dSSadaf Ebrahimi 1566*22dc650dSSadaf Ebrahimi OP_ALT, /* 120 Start of alternation */ 1567*22dc650dSSadaf Ebrahimi OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ 1568*22dc650dSSadaf Ebrahimi OP_KETRMAX, /* 122 These two must remain together and in this */ 1569*22dc650dSSadaf Ebrahimi OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ 1570*22dc650dSSadaf Ebrahimi OP_KETRPOS, /* 124 Possessive unlimited repeat. */ 1571*22dc650dSSadaf Ebrahimi 1572*22dc650dSSadaf Ebrahimi /* The assertions must come before BRA, CBRA, ONCE, and COND. */ 1573*22dc650dSSadaf Ebrahimi 1574*22dc650dSSadaf Ebrahimi OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ 1575*22dc650dSSadaf Ebrahimi OP_VREVERSE, /* 126 Move pointer back - variable */ 1576*22dc650dSSadaf Ebrahimi OP_ASSERT, /* 127 Positive lookahead */ 1577*22dc650dSSadaf Ebrahimi OP_ASSERT_NOT, /* 128 Negative lookahead */ 1578*22dc650dSSadaf Ebrahimi OP_ASSERTBACK, /* 129 Positive lookbehind */ 1579*22dc650dSSadaf Ebrahimi OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */ 1580*22dc650dSSadaf Ebrahimi OP_ASSERT_NA, /* 131 Positive non-atomic lookahead */ 1581*22dc650dSSadaf Ebrahimi OP_ASSERTBACK_NA, /* 132 Positive non-atomic lookbehind */ 1582*22dc650dSSadaf Ebrahimi 1583*22dc650dSSadaf Ebrahimi /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come 1584*22dc650dSSadaf Ebrahimi immediately after the assertions, with ONCE first, as there's a test for >= 1585*22dc650dSSadaf Ebrahimi ONCE for a subpattern that isn't an assertion. The POS versions must 1586*22dc650dSSadaf Ebrahimi immediately follow the non-POS versions in each case. */ 1587*22dc650dSSadaf Ebrahimi 1588*22dc650dSSadaf Ebrahimi OP_ONCE, /* 133 Atomic group, contains captures */ 1589*22dc650dSSadaf Ebrahimi OP_SCRIPT_RUN, /* 134 Non-capture, but check characters' scripts */ 1590*22dc650dSSadaf Ebrahimi OP_BRA, /* 135 Start of non-capturing bracket */ 1591*22dc650dSSadaf Ebrahimi OP_BRAPOS, /* 136 Ditto, with unlimited, possessive repeat */ 1592*22dc650dSSadaf Ebrahimi OP_CBRA, /* 137 Start of capturing bracket */ 1593*22dc650dSSadaf Ebrahimi OP_CBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ 1594*22dc650dSSadaf Ebrahimi OP_COND, /* 139 Conditional group */ 1595*22dc650dSSadaf Ebrahimi 1596*22dc650dSSadaf Ebrahimi /* These five must follow the previous five, in the same order. There's a 1597*22dc650dSSadaf Ebrahimi check for >= SBRA to distinguish the two sets. */ 1598*22dc650dSSadaf Ebrahimi 1599*22dc650dSSadaf Ebrahimi OP_SBRA, /* 140 Start of non-capturing bracket, check empty */ 1600*22dc650dSSadaf Ebrahimi OP_SBRAPOS, /* 141 Ditto, with unlimited, possessive repeat */ 1601*22dc650dSSadaf Ebrahimi OP_SCBRA, /* 142 Start of capturing bracket, check empty */ 1602*22dc650dSSadaf Ebrahimi OP_SCBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ 1603*22dc650dSSadaf Ebrahimi OP_SCOND, /* 144 Conditional group, check empty */ 1604*22dc650dSSadaf Ebrahimi 1605*22dc650dSSadaf Ebrahimi /* The next two pairs must (respectively) be kept together. */ 1606*22dc650dSSadaf Ebrahimi 1607*22dc650dSSadaf Ebrahimi OP_CREF, /* 145 Used to hold a capture number as condition */ 1608*22dc650dSSadaf Ebrahimi OP_DNCREF, /* 146 Used to point to duplicate names as a condition */ 1609*22dc650dSSadaf Ebrahimi OP_RREF, /* 147 Used to hold a recursion number as condition */ 1610*22dc650dSSadaf Ebrahimi OP_DNRREF, /* 148 Used to point to duplicate names as a condition */ 1611*22dc650dSSadaf Ebrahimi OP_FALSE, /* 149 Always false (used by DEFINE and VERSION) */ 1612*22dc650dSSadaf Ebrahimi OP_TRUE, /* 150 Always true (used by VERSION) */ 1613*22dc650dSSadaf Ebrahimi 1614*22dc650dSSadaf Ebrahimi OP_BRAZERO, /* 151 These two must remain together and in this */ 1615*22dc650dSSadaf Ebrahimi OP_BRAMINZERO, /* 152 order. */ 1616*22dc650dSSadaf Ebrahimi OP_BRAPOSZERO, /* 153 */ 1617*22dc650dSSadaf Ebrahimi 1618*22dc650dSSadaf Ebrahimi /* These are backtracking control verbs */ 1619*22dc650dSSadaf Ebrahimi 1620*22dc650dSSadaf Ebrahimi OP_MARK, /* 154 always has an argument */ 1621*22dc650dSSadaf Ebrahimi OP_PRUNE, /* 155 */ 1622*22dc650dSSadaf Ebrahimi OP_PRUNE_ARG, /* 156 same, but with argument */ 1623*22dc650dSSadaf Ebrahimi OP_SKIP, /* 157 */ 1624*22dc650dSSadaf Ebrahimi OP_SKIP_ARG, /* 158 same, but with argument */ 1625*22dc650dSSadaf Ebrahimi OP_THEN, /* 159 */ 1626*22dc650dSSadaf Ebrahimi OP_THEN_ARG, /* 160 same, but with argument */ 1627*22dc650dSSadaf Ebrahimi OP_COMMIT, /* 161 */ 1628*22dc650dSSadaf Ebrahimi OP_COMMIT_ARG, /* 162 same, but with argument */ 1629*22dc650dSSadaf Ebrahimi 1630*22dc650dSSadaf Ebrahimi /* These are forced failure and success verbs. FAIL and ACCEPT do accept an 1631*22dc650dSSadaf Ebrahimi argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) 1632*22dc650dSSadaf Ebrahimi without the need for a special opcode. */ 1633*22dc650dSSadaf Ebrahimi 1634*22dc650dSSadaf Ebrahimi OP_FAIL, /* 163 */ 1635*22dc650dSSadaf Ebrahimi OP_ACCEPT, /* 164 */ 1636*22dc650dSSadaf Ebrahimi OP_ASSERT_ACCEPT, /* 165 Used inside assertions */ 1637*22dc650dSSadaf Ebrahimi OP_CLOSE, /* 166 Used before OP_ACCEPT to close open captures */ 1638*22dc650dSSadaf Ebrahimi 1639*22dc650dSSadaf Ebrahimi /* This is used to skip a subpattern with a {0} quantifier */ 1640*22dc650dSSadaf Ebrahimi 1641*22dc650dSSadaf Ebrahimi OP_SKIPZERO, /* 167 */ 1642*22dc650dSSadaf Ebrahimi 1643*22dc650dSSadaf Ebrahimi /* This is used to identify a DEFINE group during compilation so that it can 1644*22dc650dSSadaf Ebrahimi be checked for having only one branch. It is changed to OP_FALSE before 1645*22dc650dSSadaf Ebrahimi compilation finishes. */ 1646*22dc650dSSadaf Ebrahimi 1647*22dc650dSSadaf Ebrahimi OP_DEFINE, /* 168 */ 1648*22dc650dSSadaf Ebrahimi 1649*22dc650dSSadaf Ebrahimi /* These opcodes replace their normal counterparts in UCP mode when 1650*22dc650dSSadaf Ebrahimi PCRE2_EXTRA_ASCII_BSW is not set. */ 1651*22dc650dSSadaf Ebrahimi 1652*22dc650dSSadaf Ebrahimi OP_NOT_UCP_WORD_BOUNDARY, /* 169 */ 1653*22dc650dSSadaf Ebrahimi OP_UCP_WORD_BOUNDARY, /* 170 */ 1654*22dc650dSSadaf Ebrahimi 1655*22dc650dSSadaf Ebrahimi /* This is not an opcode, but is used to check that tables indexed by opcode 1656*22dc650dSSadaf Ebrahimi are the correct length, in order to catch updating errors - there have been 1657*22dc650dSSadaf Ebrahimi some in the past. */ 1658*22dc650dSSadaf Ebrahimi 1659*22dc650dSSadaf Ebrahimi OP_TABLE_LENGTH 1660*22dc650dSSadaf Ebrahimi 1661*22dc650dSSadaf Ebrahimi }; 1662*22dc650dSSadaf Ebrahimi 1663*22dc650dSSadaf Ebrahimi /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro 1664*22dc650dSSadaf Ebrahimi definitions that follow must also be updated to match. There are also tables 1665*22dc650dSSadaf Ebrahimi called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in 1666*22dc650dSSadaf Ebrahimi pcre2_dfa_match.c that must be updated. */ 1667*22dc650dSSadaf Ebrahimi 1668*22dc650dSSadaf Ebrahimi 1669*22dc650dSSadaf Ebrahimi /* This macro defines textual names for all the opcodes. These are used only 1670*22dc650dSSadaf Ebrahimi for debugging, and some of them are only partial names. The macro is referenced 1671*22dc650dSSadaf Ebrahimi only in pcre2_printint.c, which fills out the full names in many cases (and in 1672*22dc650dSSadaf Ebrahimi some cases doesn't actually use these names at all). */ 1673*22dc650dSSadaf Ebrahimi 1674*22dc650dSSadaf Ebrahimi #define OP_NAME_LIST \ 1675*22dc650dSSadaf Ebrahimi "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ 1676*22dc650dSSadaf Ebrahimi "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ 1677*22dc650dSSadaf Ebrahimi "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ 1678*22dc650dSSadaf Ebrahimi "extuni", "\\Z", "\\z", \ 1679*22dc650dSSadaf Ebrahimi "$", "$", "^", "^", "char", "chari", "not", "noti", \ 1680*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", \ 1681*22dc650dSSadaf Ebrahimi "{", "{", "{", \ 1682*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1683*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", \ 1684*22dc650dSSadaf Ebrahimi "{", "{", "{", \ 1685*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1686*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", \ 1687*22dc650dSSadaf Ebrahimi "{", "{", "{", \ 1688*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1689*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", \ 1690*22dc650dSSadaf Ebrahimi "{", "{", "{", \ 1691*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1692*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ 1693*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1694*22dc650dSSadaf Ebrahimi "*", "*?", "+", "+?", "?", "??", "{", "{", \ 1695*22dc650dSSadaf Ebrahimi "*+","++", "?+", "{", \ 1696*22dc650dSSadaf Ebrahimi "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ 1697*22dc650dSSadaf Ebrahimi "Recurse", "Callout", "CalloutStr", \ 1698*22dc650dSSadaf Ebrahimi "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ 1699*22dc650dSSadaf Ebrahimi "Reverse", "VReverse", "Assert", "Assert not", \ 1700*22dc650dSSadaf Ebrahimi "Assert back", "Assert back not", \ 1701*22dc650dSSadaf Ebrahimi "Non-atomic assert", "Non-atomic assert back", \ 1702*22dc650dSSadaf Ebrahimi "Once", \ 1703*22dc650dSSadaf Ebrahimi "Script run", \ 1704*22dc650dSSadaf Ebrahimi "Bra", "BraPos", "CBra", "CBraPos", \ 1705*22dc650dSSadaf Ebrahimi "Cond", \ 1706*22dc650dSSadaf Ebrahimi "SBra", "SBraPos", "SCBra", "SCBraPos", \ 1707*22dc650dSSadaf Ebrahimi "SCond", \ 1708*22dc650dSSadaf Ebrahimi "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \ 1709*22dc650dSSadaf Ebrahimi "Cond false", "Cond true", \ 1710*22dc650dSSadaf Ebrahimi "Brazero", "Braminzero", "Braposzero", \ 1711*22dc650dSSadaf Ebrahimi "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ 1712*22dc650dSSadaf Ebrahimi "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \ 1713*22dc650dSSadaf Ebrahimi "*ACCEPT", "*ASSERT_ACCEPT", \ 1714*22dc650dSSadaf Ebrahimi "Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)" 1715*22dc650dSSadaf Ebrahimi 1716*22dc650dSSadaf Ebrahimi 1717*22dc650dSSadaf Ebrahimi /* This macro defines the length of fixed length operations in the compiled 1718*22dc650dSSadaf Ebrahimi regex. The lengths are used when searching for specific things, and also in the 1719*22dc650dSSadaf Ebrahimi debugging printing of a compiled regex. We use a macro so that it can be 1720*22dc650dSSadaf Ebrahimi defined close to the definitions of the opcodes themselves. 1721*22dc650dSSadaf Ebrahimi 1722*22dc650dSSadaf Ebrahimi As things have been extended, some of these are no longer fixed lenths, but are 1723*22dc650dSSadaf Ebrahimi minima instead. For example, the length of a single-character repeat may vary 1724*22dc650dSSadaf Ebrahimi in UTF-8 mode. The code that uses this table must know about such things. */ 1725*22dc650dSSadaf Ebrahimi 1726*22dc650dSSadaf Ebrahimi #define OP_LENGTHS \ 1727*22dc650dSSadaf Ebrahimi 1, /* End */ \ 1728*22dc650dSSadaf Ebrahimi 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ 1729*22dc650dSSadaf Ebrahimi 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ 1730*22dc650dSSadaf Ebrahimi 1, 1, 1, /* Any, AllAny, Anybyte */ \ 1731*22dc650dSSadaf Ebrahimi 3, 3, /* \P, \p */ \ 1732*22dc650dSSadaf Ebrahimi 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ 1733*22dc650dSSadaf Ebrahimi 1, /* \X */ \ 1734*22dc650dSSadaf Ebrahimi 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ 1735*22dc650dSSadaf Ebrahimi 2, /* Char - the minimum length */ \ 1736*22dc650dSSadaf Ebrahimi 2, /* Chari - the minimum length */ \ 1737*22dc650dSSadaf Ebrahimi 2, /* not */ \ 1738*22dc650dSSadaf Ebrahimi 2, /* noti */ \ 1739*22dc650dSSadaf Ebrahimi /* Positive single-char repeats ** These are */ \ 1740*22dc650dSSadaf Ebrahimi 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ 1741*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ 1742*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, /* exact */ \ 1743*22dc650dSSadaf Ebrahimi 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ 1744*22dc650dSSadaf Ebrahimi 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ 1745*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ 1746*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, /* exact I */ \ 1747*22dc650dSSadaf Ebrahimi 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ 1748*22dc650dSSadaf Ebrahimi /* Negative single-char repeats - only for chars < 256 */ \ 1749*22dc650dSSadaf Ebrahimi 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 1750*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ 1751*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, /* NOT exact */ \ 1752*22dc650dSSadaf Ebrahimi 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ 1753*22dc650dSSadaf Ebrahimi 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ 1754*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ 1755*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, /* NOT exact I */ \ 1756*22dc650dSSadaf Ebrahimi 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ 1757*22dc650dSSadaf Ebrahimi /* Positive type repeats */ \ 1758*22dc650dSSadaf Ebrahimi 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ 1759*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ 1760*22dc650dSSadaf Ebrahimi 2+IMM2_SIZE, /* Type exact */ \ 1761*22dc650dSSadaf Ebrahimi 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ 1762*22dc650dSSadaf Ebrahimi /* Character class & ref repeats */ \ 1763*22dc650dSSadaf Ebrahimi 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ 1764*22dc650dSSadaf Ebrahimi 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ 1765*22dc650dSSadaf Ebrahimi 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \ 1766*22dc650dSSadaf Ebrahimi 1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \ 1767*22dc650dSSadaf Ebrahimi 1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \ 1768*22dc650dSSadaf Ebrahimi 0, /* XCLASS - variable length */ \ 1769*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, /* REF */ \ 1770*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, /* REFI */ \ 1771*22dc650dSSadaf Ebrahimi 1+2*IMM2_SIZE, /* DNREF */ \ 1772*22dc650dSSadaf Ebrahimi 1+2*IMM2_SIZE, /* DNREFI */ \ 1773*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* RECURSE */ \ 1774*22dc650dSSadaf Ebrahimi 1+2*LINK_SIZE+1, /* CALLOUT */ \ 1775*22dc650dSSadaf Ebrahimi 0, /* CALLOUT_STR - variable length */ \ 1776*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Alt */ \ 1777*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Ket */ \ 1778*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* KetRmax */ \ 1779*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* KetRmin */ \ 1780*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* KetRpos */ \ 1781*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, /* Reverse */ \ 1782*22dc650dSSadaf Ebrahimi 1+2*IMM2_SIZE, /* VReverse */ \ 1783*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Assert */ \ 1784*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Assert not */ \ 1785*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Assert behind */ \ 1786*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* Assert behind not */ \ 1787*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* NA Assert */ \ 1788*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* NA Assert behind */ \ 1789*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* ONCE */ \ 1790*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* SCRIPT_RUN */ \ 1791*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* BRA */ \ 1792*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* BRAPOS */ \ 1793*22dc650dSSadaf Ebrahimi 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ 1794*22dc650dSSadaf Ebrahimi 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ 1795*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* COND */ \ 1796*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* SBRA */ \ 1797*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* SBRAPOS */ \ 1798*22dc650dSSadaf Ebrahimi 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ 1799*22dc650dSSadaf Ebrahimi 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ 1800*22dc650dSSadaf Ebrahimi 1+LINK_SIZE, /* SCOND */ \ 1801*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ 1802*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ 1803*22dc650dSSadaf Ebrahimi 1, 1, /* FALSE, TRUE */ \ 1804*22dc650dSSadaf Ebrahimi 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 1805*22dc650dSSadaf Ebrahimi 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1806*22dc650dSSadaf Ebrahimi 1, 3, /* SKIP, SKIP_ARG */ \ 1807*22dc650dSSadaf Ebrahimi 1, 3, /* THEN, THEN_ARG */ \ 1808*22dc650dSSadaf Ebrahimi 1, 3, /* COMMIT, COMMIT_ARG */ \ 1809*22dc650dSSadaf Ebrahimi 1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1810*22dc650dSSadaf Ebrahimi 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ 1811*22dc650dSSadaf Ebrahimi 1, /* DEFINE */ \ 1812*22dc650dSSadaf Ebrahimi 1, 1 /* \B and \b in UCP mode */ 1813*22dc650dSSadaf Ebrahimi 1814*22dc650dSSadaf Ebrahimi /* A magic value for OP_RREF to indicate the "any recursion" condition. */ 1815*22dc650dSSadaf Ebrahimi 1816*22dc650dSSadaf Ebrahimi #define RREF_ANY 0xffff 1817*22dc650dSSadaf Ebrahimi 1818*22dc650dSSadaf Ebrahimi 1819*22dc650dSSadaf Ebrahimi /* ---------- Private structures that are mode-independent. ---------- */ 1820*22dc650dSSadaf Ebrahimi 1821*22dc650dSSadaf Ebrahimi /* Structure to hold data for custom memory management. */ 1822*22dc650dSSadaf Ebrahimi 1823*22dc650dSSadaf Ebrahimi typedef struct pcre2_memctl { 1824*22dc650dSSadaf Ebrahimi void * (*malloc)(size_t, void *); 1825*22dc650dSSadaf Ebrahimi void (*free)(void *, void *); 1826*22dc650dSSadaf Ebrahimi void *memory_data; 1827*22dc650dSSadaf Ebrahimi } pcre2_memctl; 1828*22dc650dSSadaf Ebrahimi 1829*22dc650dSSadaf Ebrahimi /* Structure for building a chain of open capturing subpatterns during 1830*22dc650dSSadaf Ebrahimi compiling, so that instructions to close them can be compiled when (*ACCEPT) is 1831*22dc650dSSadaf Ebrahimi encountered. */ 1832*22dc650dSSadaf Ebrahimi 1833*22dc650dSSadaf Ebrahimi typedef struct open_capitem { 1834*22dc650dSSadaf Ebrahimi struct open_capitem *next; /* Chain link */ 1835*22dc650dSSadaf Ebrahimi uint16_t number; /* Capture number */ 1836*22dc650dSSadaf Ebrahimi uint16_t assert_depth; /* Assertion depth when opened */ 1837*22dc650dSSadaf Ebrahimi } open_capitem; 1838*22dc650dSSadaf Ebrahimi 1839*22dc650dSSadaf Ebrahimi /* Layout of the UCP type table that translates property names into types and 1840*22dc650dSSadaf Ebrahimi codes. Each entry used to point directly to a name, but to reduce the number of 1841*22dc650dSSadaf Ebrahimi relocations in shared libraries, it now has an offset into a single string 1842*22dc650dSSadaf Ebrahimi instead. */ 1843*22dc650dSSadaf Ebrahimi 1844*22dc650dSSadaf Ebrahimi typedef struct { 1845*22dc650dSSadaf Ebrahimi uint16_t name_offset; 1846*22dc650dSSadaf Ebrahimi uint16_t type; 1847*22dc650dSSadaf Ebrahimi uint16_t value; 1848*22dc650dSSadaf Ebrahimi } ucp_type_table; 1849*22dc650dSSadaf Ebrahimi 1850*22dc650dSSadaf Ebrahimi /* Unicode character database (UCD) record format */ 1851*22dc650dSSadaf Ebrahimi 1852*22dc650dSSadaf Ebrahimi typedef struct { 1853*22dc650dSSadaf Ebrahimi uint8_t script; /* ucp_Arabic, etc. */ 1854*22dc650dSSadaf Ebrahimi uint8_t chartype; /* ucp_Cc, etc. (general categories) */ 1855*22dc650dSSadaf Ebrahimi uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ 1856*22dc650dSSadaf Ebrahimi uint8_t caseset; /* offset to multichar other cases or zero */ 1857*22dc650dSSadaf Ebrahimi int32_t other_case; /* offset to other case, or zero if none */ 1858*22dc650dSSadaf Ebrahimi uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */ 1859*22dc650dSSadaf Ebrahimi uint16_t bprops; /* binary properties offset */ 1860*22dc650dSSadaf Ebrahimi } ucd_record; 1861*22dc650dSSadaf Ebrahimi 1862*22dc650dSSadaf Ebrahimi /* UCD access macros */ 1863*22dc650dSSadaf Ebrahimi 1864*22dc650dSSadaf Ebrahimi #define UCD_BLOCK_SIZE 128 1865*22dc650dSSadaf Ebrahimi #define REAL_GET_UCD(ch) (PRIV(ucd_records) + \ 1866*22dc650dSSadaf Ebrahimi PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \ 1867*22dc650dSSadaf Ebrahimi UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) 1868*22dc650dSSadaf Ebrahimi 1869*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32 1870*22dc650dSSadaf Ebrahimi #define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \ 1871*22dc650dSSadaf Ebrahimi PRIV(dummy_ucd_record) : REAL_GET_UCD(ch)) 1872*22dc650dSSadaf Ebrahimi #else 1873*22dc650dSSadaf Ebrahimi #define GET_UCD(ch) REAL_GET_UCD(ch) 1874*22dc650dSSadaf Ebrahimi #endif 1875*22dc650dSSadaf Ebrahimi 1876*22dc650dSSadaf Ebrahimi #define UCD_SCRIPTX_MASK 0x3ff 1877*22dc650dSSadaf Ebrahimi #define UCD_BIDICLASS_SHIFT 11 1878*22dc650dSSadaf Ebrahimi #define UCD_BPROPS_MASK 0xfff 1879*22dc650dSSadaf Ebrahimi 1880*22dc650dSSadaf Ebrahimi #define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK) 1881*22dc650dSSadaf Ebrahimi #define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT) 1882*22dc650dSSadaf Ebrahimi #define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK) 1883*22dc650dSSadaf Ebrahimi 1884*22dc650dSSadaf Ebrahimi #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype 1885*22dc650dSSadaf Ebrahimi #define UCD_SCRIPT(ch) GET_UCD(ch)->script 1886*22dc650dSSadaf Ebrahimi #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] 1887*22dc650dSSadaf Ebrahimi #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop 1888*22dc650dSSadaf Ebrahimi #define UCD_CASESET(ch) GET_UCD(ch)->caseset 1889*22dc650dSSadaf Ebrahimi #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) 1890*22dc650dSSadaf Ebrahimi #define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) 1891*22dc650dSSadaf Ebrahimi #define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) 1892*22dc650dSSadaf Ebrahimi #define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) 1893*22dc650dSSadaf Ebrahimi 1894*22dc650dSSadaf Ebrahimi /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words 1895*22dc650dSSadaf Ebrahimi that form a bitmap representing a list of scripts or boolean properties. These 1896*22dc650dSSadaf Ebrahimi macros test or set a bit in the map by number. */ 1897*22dc650dSSadaf Ebrahimi 1898*22dc650dSSadaf Ebrahimi #define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32))) 1899*22dc650dSSadaf Ebrahimi #define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32))) 1900*22dc650dSSadaf Ebrahimi 1901*22dc650dSSadaf Ebrahimi /* Header for serialized pcre2 codes. */ 1902*22dc650dSSadaf Ebrahimi 1903*22dc650dSSadaf Ebrahimi typedef struct pcre2_serialized_data { 1904*22dc650dSSadaf Ebrahimi uint32_t magic; 1905*22dc650dSSadaf Ebrahimi uint32_t version; 1906*22dc650dSSadaf Ebrahimi uint32_t config; 1907*22dc650dSSadaf Ebrahimi int32_t number_of_codes; 1908*22dc650dSSadaf Ebrahimi } pcre2_serialized_data; 1909*22dc650dSSadaf Ebrahimi 1910*22dc650dSSadaf Ebrahimi 1911*22dc650dSSadaf Ebrahimi 1912*22dc650dSSadaf Ebrahimi /* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */ 1913*22dc650dSSadaf Ebrahimi 1914*22dc650dSSadaf Ebrahimi /* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as 1915*22dc650dSSadaf Ebrahimi 0, so the following items are omitted. */ 1916*22dc650dSSadaf Ebrahimi 1917*22dc650dSSadaf Ebrahimi #if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0 1918*22dc650dSSadaf Ebrahimi 1919*22dc650dSSadaf Ebrahimi /* EBCDIC is supported only for the 8-bit library. */ 1920*22dc650dSSadaf Ebrahimi 1921*22dc650dSSadaf Ebrahimi #if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8 1922*22dc650dSSadaf Ebrahimi #error EBCDIC is not supported for the 16-bit or 32-bit libraries 1923*22dc650dSSadaf Ebrahimi #endif 1924*22dc650dSSadaf Ebrahimi 1925*22dc650dSSadaf Ebrahimi /* This is the largest non-UTF code point. */ 1926*22dc650dSSadaf Ebrahimi 1927*22dc650dSSadaf Ebrahimi #define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH)) 1928*22dc650dSSadaf Ebrahimi 1929*22dc650dSSadaf Ebrahimi /* Internal shared data tables and variables. These are used by more than one 1930*22dc650dSSadaf Ebrahimi of the exported public functions. They have to be "external" in the C sense, 1931*22dc650dSSadaf Ebrahimi but are not part of the PCRE2 public API. Although the data for some of them is 1932*22dc650dSSadaf Ebrahimi identical in all libraries, they must have different names so that multiple 1933*22dc650dSSadaf Ebrahimi libraries can be simultaneously linked to a single application. However, UTF-8 1934*22dc650dSSadaf Ebrahimi tables are needed only when compiling the 8-bit library. */ 1935*22dc650dSSadaf Ebrahimi 1936*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 1937*22dc650dSSadaf Ebrahimi extern const int PRIV(utf8_table1)[]; 1938*22dc650dSSadaf Ebrahimi extern const int PRIV(utf8_table1_size); 1939*22dc650dSSadaf Ebrahimi extern const int PRIV(utf8_table2)[]; 1940*22dc650dSSadaf Ebrahimi extern const int PRIV(utf8_table3)[]; 1941*22dc650dSSadaf Ebrahimi extern const uint8_t PRIV(utf8_table4)[]; 1942*22dc650dSSadaf Ebrahimi #endif 1943*22dc650dSSadaf Ebrahimi 1944*22dc650dSSadaf Ebrahimi #define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_) 1945*22dc650dSSadaf Ebrahimi #define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_) 1946*22dc650dSSadaf Ebrahimi #define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_) 1947*22dc650dSSadaf Ebrahimi #define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_) 1948*22dc650dSSadaf Ebrahimi #define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_) 1949*22dc650dSSadaf Ebrahimi #define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_) 1950*22dc650dSSadaf Ebrahimi #define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_) 1951*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32 1952*22dc650dSSadaf Ebrahimi #define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_) 1953*22dc650dSSadaf Ebrahimi #endif 1954*22dc650dSSadaf Ebrahimi #define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) 1955*22dc650dSSadaf Ebrahimi #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) 1956*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) 1957*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) 1958*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) 1959*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) 1960*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_) 1961*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_) 1962*22dc650dSSadaf Ebrahimi #define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_) 1963*22dc650dSSadaf Ebrahimi #define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_) 1964*22dc650dSSadaf Ebrahimi #define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_) 1965*22dc650dSSadaf Ebrahimi #define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_) 1966*22dc650dSSadaf Ebrahimi #define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_) 1967*22dc650dSSadaf Ebrahimi #define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_) 1968*22dc650dSSadaf Ebrahimi #define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_) 1969*22dc650dSSadaf Ebrahimi #define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_) 1970*22dc650dSSadaf Ebrahimi 1971*22dc650dSSadaf Ebrahimi extern const uint8_t PRIV(OP_lengths)[]; 1972*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(callout_end_delims)[]; 1973*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(callout_start_delims)[]; 1974*22dc650dSSadaf Ebrahimi extern const pcre2_compile_context PRIV(default_compile_context); 1975*22dc650dSSadaf Ebrahimi extern const pcre2_convert_context PRIV(default_convert_context); 1976*22dc650dSSadaf Ebrahimi extern const pcre2_match_context PRIV(default_match_context); 1977*22dc650dSSadaf Ebrahimi extern const uint8_t PRIV(default_tables)[]; 1978*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(hspace_list)[]; 1979*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(vspace_list)[]; 1980*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucd_boolprop_sets)[]; 1981*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucd_caseless_sets)[]; 1982*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucd_digit_sets)[]; 1983*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucd_script_sets)[]; 1984*22dc650dSSadaf Ebrahimi extern const ucd_record PRIV(ucd_records)[]; 1985*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32 1986*22dc650dSSadaf Ebrahimi extern const ucd_record PRIV(dummy_ucd_record)[]; 1987*22dc650dSSadaf Ebrahimi #endif 1988*22dc650dSSadaf Ebrahimi extern const uint16_t PRIV(ucd_stage1)[]; 1989*22dc650dSSadaf Ebrahimi extern const uint16_t PRIV(ucd_stage2)[]; 1990*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucp_gbtable)[]; 1991*22dc650dSSadaf Ebrahimi extern const uint32_t PRIV(ucp_gentype)[]; 1992*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT 1993*22dc650dSSadaf Ebrahimi extern const int PRIV(ucp_typerange)[]; 1994*22dc650dSSadaf Ebrahimi #endif 1995*22dc650dSSadaf Ebrahimi extern const char *PRIV(unicode_version); 1996*22dc650dSSadaf Ebrahimi extern const ucp_type_table PRIV(utt)[]; 1997*22dc650dSSadaf Ebrahimi extern const char PRIV(utt_names)[]; 1998*22dc650dSSadaf Ebrahimi extern const size_t PRIV(utt_size); 1999*22dc650dSSadaf Ebrahimi 2000*22dc650dSSadaf Ebrahimi /* Mode-dependent macros and hidden and private structures are defined in a 2001*22dc650dSSadaf Ebrahimi separate file so that pcre2test can include them at all supported widths. When 2002*22dc650dSSadaf Ebrahimi compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can 2003*22dc650dSSadaf Ebrahimi include them at the appropriate width, after setting up suffix macros for the 2004*22dc650dSSadaf Ebrahimi private structures. */ 2005*22dc650dSSadaf Ebrahimi 2006*22dc650dSSadaf Ebrahimi #define branch_chain PCRE2_SUFFIX(branch_chain_) 2007*22dc650dSSadaf Ebrahimi #define compile_block PCRE2_SUFFIX(compile_block_) 2008*22dc650dSSadaf Ebrahimi #define dfa_match_block PCRE2_SUFFIX(dfa_match_block_) 2009*22dc650dSSadaf Ebrahimi #define match_block PCRE2_SUFFIX(match_block_) 2010*22dc650dSSadaf Ebrahimi #define named_group PCRE2_SUFFIX(named_group_) 2011*22dc650dSSadaf Ebrahimi 2012*22dc650dSSadaf Ebrahimi #include "pcre2_intmodedep.h" 2013*22dc650dSSadaf Ebrahimi 2014*22dc650dSSadaf Ebrahimi /* Private "external" functions. These are internal functions that are called 2015*22dc650dSSadaf Ebrahimi from modules other than the one in which they are defined. They have to be 2016*22dc650dSSadaf Ebrahimi "external" in the C sense, but are not part of the PCRE2 public API. They are 2017*22dc650dSSadaf Ebrahimi not referenced from pcre2test, and must not be defined when no code unit width 2018*22dc650dSSadaf Ebrahimi is available. */ 2019*22dc650dSSadaf Ebrahimi 2020*22dc650dSSadaf Ebrahimi #define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_) 2021*22dc650dSSadaf Ebrahimi #define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_) 2022*22dc650dSSadaf Ebrahimi #define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_) 2023*22dc650dSSadaf Ebrahimi #define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_) 2024*22dc650dSSadaf Ebrahimi #define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_) 2025*22dc650dSSadaf Ebrahimi #define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_) 2026*22dc650dSSadaf Ebrahimi #define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_) 2027*22dc650dSSadaf Ebrahimi #define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_) 2028*22dc650dSSadaf Ebrahimi #define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_) 2029*22dc650dSSadaf Ebrahimi #define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_) 2030*22dc650dSSadaf Ebrahimi #define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_) 2031*22dc650dSSadaf Ebrahimi #define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_) 2032*22dc650dSSadaf Ebrahimi #define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_) 2033*22dc650dSSadaf Ebrahimi #define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_) 2034*22dc650dSSadaf Ebrahimi #define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_) 2035*22dc650dSSadaf Ebrahimi #define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_) 2036*22dc650dSSadaf Ebrahimi #define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_) 2037*22dc650dSSadaf Ebrahimi #define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_) 2038*22dc650dSSadaf Ebrahimi #define _pcre2_study PCRE2_SUFFIX(_pcre2_study_) 2039*22dc650dSSadaf Ebrahimi #define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_) 2040*22dc650dSSadaf Ebrahimi #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) 2041*22dc650dSSadaf Ebrahimi #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) 2042*22dc650dSSadaf Ebrahimi 2043*22dc650dSSadaf Ebrahimi extern int _pcre2_auto_possessify(PCRE2_UCHAR *, 2044*22dc650dSSadaf Ebrahimi const compile_block *); 2045*22dc650dSSadaf Ebrahimi extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, 2046*22dc650dSSadaf Ebrahimi int *, uint32_t, uint32_t, BOOL, compile_block *); 2047*22dc650dSSadaf Ebrahimi extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR, 2048*22dc650dSSadaf Ebrahimi BOOL, int *); 2049*22dc650dSSadaf Ebrahimi extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); 2050*22dc650dSSadaf Ebrahimi extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, 2051*22dc650dSSadaf Ebrahimi uint32_t *, BOOL); 2052*22dc650dSSadaf Ebrahimi extern void _pcre2_jit_free_rodata(void *, void *); 2053*22dc650dSSadaf Ebrahimi extern void _pcre2_jit_free(void *, pcre2_memctl *); 2054*22dc650dSSadaf Ebrahimi extern size_t _pcre2_jit_get_size(void *); 2055*22dc650dSSadaf Ebrahimi const char * _pcre2_jit_get_target(void); 2056*22dc650dSSadaf Ebrahimi extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *); 2057*22dc650dSSadaf Ebrahimi extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); 2058*22dc650dSSadaf Ebrahimi extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL); 2059*22dc650dSSadaf Ebrahimi extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR); 2060*22dc650dSSadaf Ebrahimi extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *); 2061*22dc650dSSadaf Ebrahimi extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *); 2062*22dc650dSSadaf Ebrahimi extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR); 2063*22dc650dSSadaf Ebrahimi extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t); 2064*22dc650dSSadaf Ebrahimi extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t); 2065*22dc650dSSadaf Ebrahimi extern int _pcre2_study(pcre2_real_code *); 2066*22dc650dSSadaf Ebrahimi extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); 2067*22dc650dSSadaf Ebrahimi extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, 2068*22dc650dSSadaf Ebrahimi uint32_t *, BOOL); 2069*22dc650dSSadaf Ebrahimi extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); 2070*22dc650dSSadaf Ebrahimi 2071*22dc650dSSadaf Ebrahimi /* This function is needed only when memmove() is not available. */ 2072*22dc650dSSadaf Ebrahimi 2073*22dc650dSSadaf Ebrahimi #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) 2074*22dc650dSSadaf Ebrahimi #define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove) 2075*22dc650dSSadaf Ebrahimi extern void * _pcre2_memmove(void *, const void *, size_t); 2076*22dc650dSSadaf Ebrahimi #endif 2077*22dc650dSSadaf Ebrahimi 2078*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH */ 2079*22dc650dSSadaf Ebrahimi 2080*22dc650dSSadaf Ebrahimi extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int); 2081*22dc650dSSadaf Ebrahimi 2082*22dc650dSSadaf Ebrahimi #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ 2083*22dc650dSSadaf Ebrahimi 2084*22dc650dSSadaf Ebrahimi /* End of pcre2_internal.h */ 2085