1*22dc650dSSadaf Ebrahimi /************************************************* 2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions * 3*22dc650dSSadaf Ebrahimi *************************************************/ 4*22dc650dSSadaf Ebrahimi 5*22dc650dSSadaf Ebrahimi /* PCRE2 is a library of functions to support regular expressions whose syntax 6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language. This is 7*22dc650dSSadaf Ebrahimi the public header file to be #included by applications that call PCRE2 via the 8*22dc650dSSadaf Ebrahimi POSIX wrapper interface. 9*22dc650dSSadaf Ebrahimi 10*22dc650dSSadaf Ebrahimi Written by Philip Hazel 11*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge 12*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2023 University of Cambridge 13*22dc650dSSadaf Ebrahimi 14*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 15*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without 16*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met: 17*22dc650dSSadaf Ebrahimi 18*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice, 19*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer. 20*22dc650dSSadaf Ebrahimi 21*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright 22*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the 23*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution. 24*22dc650dSSadaf Ebrahimi 25*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its 26*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from 27*22dc650dSSadaf Ebrahimi this software without specific prior written permission. 28*22dc650dSSadaf Ebrahimi 29*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 30*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 33*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE. 40*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 41*22dc650dSSadaf Ebrahimi */ 42*22dc650dSSadaf Ebrahimi 43*22dc650dSSadaf Ebrahimi #ifndef PCRE2POSIX_H_IDEMPOTENT_GUARD 44*22dc650dSSadaf Ebrahimi #define PCRE2POSIX_H_IDEMPOTENT_GUARD 45*22dc650dSSadaf Ebrahimi 46*22dc650dSSadaf Ebrahimi /* Have to include stdlib.h in order to ensure that size_t is defined. */ 47*22dc650dSSadaf Ebrahimi 48*22dc650dSSadaf Ebrahimi #include <stdlib.h> 49*22dc650dSSadaf Ebrahimi 50*22dc650dSSadaf Ebrahimi /* Allow for C++ users */ 51*22dc650dSSadaf Ebrahimi 52*22dc650dSSadaf Ebrahimi #ifdef __cplusplus 53*22dc650dSSadaf Ebrahimi extern "C" { 54*22dc650dSSadaf Ebrahimi #endif 55*22dc650dSSadaf Ebrahimi 56*22dc650dSSadaf Ebrahimi /* Options, mostly defined by POSIX, but with some extras. */ 57*22dc650dSSadaf Ebrahimi 58*22dc650dSSadaf Ebrahimi #define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */ 59*22dc650dSSadaf Ebrahimi #define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */ 60*22dc650dSSadaf Ebrahimi #define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ 61*22dc650dSSadaf Ebrahimi #define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ 62*22dc650dSSadaf Ebrahimi #define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ 63*22dc650dSSadaf Ebrahimi #define REG_NOSUB 0x0020 /* Do not report what was matched */ 64*22dc650dSSadaf Ebrahimi #define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ 65*22dc650dSSadaf Ebrahimi #define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ 66*22dc650dSSadaf Ebrahimi #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ 67*22dc650dSSadaf Ebrahimi #define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */ 68*22dc650dSSadaf Ebrahimi #define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */ 69*22dc650dSSadaf Ebrahimi #define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */ 70*22dc650dSSadaf Ebrahimi #define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */ 71*22dc650dSSadaf Ebrahimi 72*22dc650dSSadaf Ebrahimi /* This is not used by PCRE2, but by defining it we make it easier 73*22dc650dSSadaf Ebrahimi to slot PCRE2 into existing programs that make POSIX calls. */ 74*22dc650dSSadaf Ebrahimi 75*22dc650dSSadaf Ebrahimi #define REG_EXTENDED 0 76*22dc650dSSadaf Ebrahimi 77*22dc650dSSadaf Ebrahimi /* Error values. Not all these are relevant or used by the wrapper. */ 78*22dc650dSSadaf Ebrahimi 79*22dc650dSSadaf Ebrahimi enum { 80*22dc650dSSadaf Ebrahimi REG_ASSERT = 1, /* internal error ? */ 81*22dc650dSSadaf Ebrahimi REG_BADBR, /* invalid repeat counts in {} */ 82*22dc650dSSadaf Ebrahimi REG_BADPAT, /* pattern error */ 83*22dc650dSSadaf Ebrahimi REG_BADRPT, /* ? * + invalid */ 84*22dc650dSSadaf Ebrahimi REG_EBRACE, /* unbalanced {} */ 85*22dc650dSSadaf Ebrahimi REG_EBRACK, /* unbalanced [] */ 86*22dc650dSSadaf Ebrahimi REG_ECOLLATE, /* collation error - not relevant */ 87*22dc650dSSadaf Ebrahimi REG_ECTYPE, /* bad class */ 88*22dc650dSSadaf Ebrahimi REG_EESCAPE, /* bad escape sequence */ 89*22dc650dSSadaf Ebrahimi REG_EMPTY, /* empty expression */ 90*22dc650dSSadaf Ebrahimi REG_EPAREN, /* unbalanced () */ 91*22dc650dSSadaf Ebrahimi REG_ERANGE, /* bad range inside [] */ 92*22dc650dSSadaf Ebrahimi REG_ESIZE, /* expression too big */ 93*22dc650dSSadaf Ebrahimi REG_ESPACE, /* failed to get memory */ 94*22dc650dSSadaf Ebrahimi REG_ESUBREG, /* bad back reference */ 95*22dc650dSSadaf Ebrahimi REG_INVARG, /* bad argument */ 96*22dc650dSSadaf Ebrahimi REG_NOMATCH /* match failed */ 97*22dc650dSSadaf Ebrahimi }; 98*22dc650dSSadaf Ebrahimi 99*22dc650dSSadaf Ebrahimi 100*22dc650dSSadaf Ebrahimi /* The structure representing a compiled regular expression. It is also used 101*22dc650dSSadaf Ebrahimi for passing the pattern end pointer when REG_PEND is set. */ 102*22dc650dSSadaf Ebrahimi 103*22dc650dSSadaf Ebrahimi typedef struct { 104*22dc650dSSadaf Ebrahimi void *re_pcre2_code; 105*22dc650dSSadaf Ebrahimi void *re_match_data; 106*22dc650dSSadaf Ebrahimi const char *re_endp; 107*22dc650dSSadaf Ebrahimi size_t re_nsub; 108*22dc650dSSadaf Ebrahimi size_t re_erroffset; 109*22dc650dSSadaf Ebrahimi int re_cflags; 110*22dc650dSSadaf Ebrahimi } regex_t; 111*22dc650dSSadaf Ebrahimi 112*22dc650dSSadaf Ebrahimi /* The structure in which a captured offset is returned. */ 113*22dc650dSSadaf Ebrahimi 114*22dc650dSSadaf Ebrahimi typedef int regoff_t; 115*22dc650dSSadaf Ebrahimi 116*22dc650dSSadaf Ebrahimi typedef struct { 117*22dc650dSSadaf Ebrahimi regoff_t rm_so; 118*22dc650dSSadaf Ebrahimi regoff_t rm_eo; 119*22dc650dSSadaf Ebrahimi } regmatch_t; 120*22dc650dSSadaf Ebrahimi 121*22dc650dSSadaf Ebrahimi /* When compiling with the MSVC compiler, it is sometimes necessary to include 122*22dc650dSSadaf Ebrahimi a "calling convention" before exported function names. (This is secondhand 123*22dc650dSSadaf Ebrahimi information; I know nothing about MSVC myself). For example, something like 124*22dc650dSSadaf Ebrahimi 125*22dc650dSSadaf Ebrahimi void __cdecl function(....) 126*22dc650dSSadaf Ebrahimi 127*22dc650dSSadaf Ebrahimi might be needed. In order to make this easy, all the exported functions have 128*22dc650dSSadaf Ebrahimi PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not 129*22dc650dSSadaf Ebrahimi set, we ensure here that it has no effect. */ 130*22dc650dSSadaf Ebrahimi 131*22dc650dSSadaf Ebrahimi #ifndef PCRE2_CALL_CONVENTION 132*22dc650dSSadaf Ebrahimi #define PCRE2_CALL_CONVENTION 133*22dc650dSSadaf Ebrahimi #endif 134*22dc650dSSadaf Ebrahimi 135*22dc650dSSadaf Ebrahimi #ifndef PCRE2_EXPORT 136*22dc650dSSadaf Ebrahimi #define PCRE2_EXPORT 137*22dc650dSSadaf Ebrahimi #endif 138*22dc650dSSadaf Ebrahimi 139*22dc650dSSadaf Ebrahimi /* When an application links to a PCRE2 DLL in Windows, the symbols that are 140*22dc650dSSadaf Ebrahimi imported have to be identified as such. When building PCRE2, the appropriate 141*22dc650dSSadaf Ebrahimi export settings are needed, and are set in pcre2posix.c before including this 142*22dc650dSSadaf Ebrahimi file. */ 143*22dc650dSSadaf Ebrahimi 144*22dc650dSSadaf Ebrahimi /* By default, we use the standard "extern" declarations. */ 145*22dc650dSSadaf Ebrahimi 146*22dc650dSSadaf Ebrahimi #ifndef PCRE2POSIX_EXP_DECL 147*22dc650dSSadaf Ebrahimi # if defined(_WIN32) && defined(PCRE2POSIX_SHARED) && !defined(PCRE2_STATIC) 148*22dc650dSSadaf Ebrahimi # define PCRE2POSIX_EXP_DECL extern __declspec(dllimport) 149*22dc650dSSadaf Ebrahimi # define PCRE2POSIX_EXP_DEFN __declspec(dllimport) 150*22dc650dSSadaf Ebrahimi # else 151*22dc650dSSadaf Ebrahimi # define PCRE2POSIX_EXP_DECL extern PCRE2_EXPORT 152*22dc650dSSadaf Ebrahimi # define PCRE2POSIX_EXP_DEFN 153*22dc650dSSadaf Ebrahimi # endif 154*22dc650dSSadaf Ebrahimi #endif 155*22dc650dSSadaf Ebrahimi 156*22dc650dSSadaf Ebrahimi /* The functions. The actual code is in functions with pcre2_xxx names for 157*22dc650dSSadaf Ebrahimi uniqueness. POSIX names are provided as macros for API compatibility with POSIX 158*22dc650dSSadaf Ebrahimi regex functions. It's done this way to ensure to they are always linked from 159*22dc650dSSadaf Ebrahimi the PCRE2 library and not by accident from elsewhere (regex_t differs in size 160*22dc650dSSadaf Ebrahimi elsewhere). */ 161*22dc650dSSadaf Ebrahimi 162*22dc650dSSadaf Ebrahimi PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regcomp(regex_t *, const char *, int); 163*22dc650dSSadaf Ebrahimi PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regexec(const regex_t *, const char *, size_t, 164*22dc650dSSadaf Ebrahimi regmatch_t *, int); 165*22dc650dSSadaf Ebrahimi PCRE2POSIX_EXP_DECL size_t PCRE2_CALL_CONVENTION pcre2_regerror(int, const regex_t *, char *, size_t); 166*22dc650dSSadaf Ebrahimi PCRE2POSIX_EXP_DECL void PCRE2_CALL_CONVENTION pcre2_regfree(regex_t *); 167*22dc650dSSadaf Ebrahimi 168*22dc650dSSadaf Ebrahimi #define regcomp pcre2_regcomp 169*22dc650dSSadaf Ebrahimi #define regexec pcre2_regexec 170*22dc650dSSadaf Ebrahimi #define regerror pcre2_regerror 171*22dc650dSSadaf Ebrahimi #define regfree pcre2_regfree 172*22dc650dSSadaf Ebrahimi 173*22dc650dSSadaf Ebrahimi /* Debian had a patch that used different names. These are now here to save 174*22dc650dSSadaf Ebrahimi them having to maintain their own patch, but are not documented by PCRE2. */ 175*22dc650dSSadaf Ebrahimi 176*22dc650dSSadaf Ebrahimi #define PCRE2regcomp pcre2_regcomp 177*22dc650dSSadaf Ebrahimi #define PCRE2regexec pcre2_regexec 178*22dc650dSSadaf Ebrahimi #define PCRE2regerror pcre2_regerror 179*22dc650dSSadaf Ebrahimi #define PCRE2regfree pcre2_regfree 180*22dc650dSSadaf Ebrahimi 181*22dc650dSSadaf Ebrahimi #ifdef __cplusplus 182*22dc650dSSadaf Ebrahimi } /* extern "C" */ 183*22dc650dSSadaf Ebrahimi #endif 184*22dc650dSSadaf Ebrahimi 185*22dc650dSSadaf Ebrahimi #endif /* PCRE2POSIX_H_IDEMPOTENT_GUARD */ 186*22dc650dSSadaf Ebrahimi 187*22dc650dSSadaf Ebrahimi /* End of pcre2posix.h */ 188