xref: /aosp_15_r20/external/pcre/src/pcre2_dfa_match.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2023 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi /* This module contains the external function pcre2_dfa_match(), which is an
43*22dc650dSSadaf Ebrahimi alternative matching function that uses a sort of DFA algorithm (not a true
44*22dc650dSSadaf Ebrahimi FSM). This is NOT Perl-compatible, but it has advantages in certain
45*22dc650dSSadaf Ebrahimi applications. */
46*22dc650dSSadaf Ebrahimi 
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
49*22dc650dSSadaf Ebrahimi the performance of his patterns greatly. I could not use it as it stood, as it
50*22dc650dSSadaf Ebrahimi was not thread safe, and made assumptions about pattern sizes. Also, it caused
51*22dc650dSSadaf Ebrahimi test 7 to loop, and test 9 to crash with a segfault.
52*22dc650dSSadaf Ebrahimi 
53*22dc650dSSadaf Ebrahimi The issue is the check for duplicate states, which is done by a simple linear
54*22dc650dSSadaf Ebrahimi search up the state list. (Grep for "duplicate" below to find the code.) For
55*22dc650dSSadaf Ebrahimi many patterns, there will never be many states active at one time, so a simple
56*22dc650dSSadaf Ebrahimi linear search is fine. In patterns that have many active states, it might be a
57*22dc650dSSadaf Ebrahimi bottleneck. The suggested code used an indexing scheme to remember which states
58*22dc650dSSadaf Ebrahimi had previously been used for each character, and avoided the linear search when
59*22dc650dSSadaf Ebrahimi it knew there was no chance of a duplicate. This was implemented when adding
60*22dc650dSSadaf Ebrahimi states to the state lists.
61*22dc650dSSadaf Ebrahimi 
62*22dc650dSSadaf Ebrahimi I wrote some thread-safe, not-limited code to try something similar at the time
63*22dc650dSSadaf Ebrahimi of checking for duplicates (instead of when adding states), using index vectors
64*22dc650dSSadaf Ebrahimi on the stack. It did give a 13% improvement with one specially constructed
65*22dc650dSSadaf Ebrahimi pattern for certain subject strings, but on other strings and on many of the
66*22dc650dSSadaf Ebrahimi simpler patterns in the test suite it did worse. The major problem, I think,
67*22dc650dSSadaf Ebrahimi was the extra time to initialize the index. This had to be done for each call
68*22dc650dSSadaf Ebrahimi of internal_dfa_match(). (The supplied patch used a static vector, initialized
69*22dc650dSSadaf Ebrahimi only once - I suspect this was the cause of the problems with the tests.)
70*22dc650dSSadaf Ebrahimi 
71*22dc650dSSadaf Ebrahimi Overall, I concluded that the gains in some cases did not outweigh the losses
72*22dc650dSSadaf Ebrahimi in others, so I abandoned this code. */
73*22dc650dSSadaf Ebrahimi 
74*22dc650dSSadaf Ebrahimi 
75*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
76*22dc650dSSadaf Ebrahimi #include "config.h"
77*22dc650dSSadaf Ebrahimi #endif
78*22dc650dSSadaf Ebrahimi 
79*22dc650dSSadaf Ebrahimi #define NLBLOCK mb             /* Block containing newline information */
80*22dc650dSSadaf Ebrahimi #define PSSTART start_subject  /* Field containing processed string start */
81*22dc650dSSadaf Ebrahimi #define PSEND   end_subject    /* Field containing processed string end */
82*22dc650dSSadaf Ebrahimi 
83*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
84*22dc650dSSadaf Ebrahimi 
85*22dc650dSSadaf Ebrahimi #define PUBLIC_DFA_MATCH_OPTIONS \
86*22dc650dSSadaf Ebrahimi   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
87*22dc650dSSadaf Ebrahimi    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
88*22dc650dSSadaf Ebrahimi    PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
89*22dc650dSSadaf Ebrahimi    PCRE2_COPY_MATCHED_SUBJECT)
90*22dc650dSSadaf Ebrahimi 
91*22dc650dSSadaf Ebrahimi 
92*22dc650dSSadaf Ebrahimi /*************************************************
93*22dc650dSSadaf Ebrahimi *      Code parameters and static tables         *
94*22dc650dSSadaf Ebrahimi *************************************************/
95*22dc650dSSadaf Ebrahimi 
96*22dc650dSSadaf Ebrahimi /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
97*22dc650dSSadaf Ebrahimi into others, under special conditions. A gap of 20 between the blocks should be
98*22dc650dSSadaf Ebrahimi enough. The resulting opcodes don't have to be less than 256 because they are
99*22dc650dSSadaf Ebrahimi never stored, so we push them well clear of the normal opcodes. */
100*22dc650dSSadaf Ebrahimi 
101*22dc650dSSadaf Ebrahimi #define OP_PROP_EXTRA       300
102*22dc650dSSadaf Ebrahimi #define OP_EXTUNI_EXTRA     320
103*22dc650dSSadaf Ebrahimi #define OP_ANYNL_EXTRA      340
104*22dc650dSSadaf Ebrahimi #define OP_HSPACE_EXTRA     360
105*22dc650dSSadaf Ebrahimi #define OP_VSPACE_EXTRA     380
106*22dc650dSSadaf Ebrahimi 
107*22dc650dSSadaf Ebrahimi 
108*22dc650dSSadaf Ebrahimi /* This table identifies those opcodes that are followed immediately by a
109*22dc650dSSadaf Ebrahimi character that is to be tested in some way. This makes it possible to
110*22dc650dSSadaf Ebrahimi centralize the loading of these characters. In the case of Type * etc, the
111*22dc650dSSadaf Ebrahimi "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112*22dc650dSSadaf Ebrahimi small value. Non-zero values in the table are the offsets from the opcode where
113*22dc650dSSadaf Ebrahimi the character is to be found. ***NOTE*** If the start of this table is
114*22dc650dSSadaf Ebrahimi modified, the three tables that follow must also be modified. */
115*22dc650dSSadaf Ebrahimi 
116*22dc650dSSadaf Ebrahimi static const uint8_t coptable[] = {
117*22dc650dSSadaf Ebrahimi   0,                             /* End                                    */
118*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
120*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
121*22dc650dSSadaf Ebrahimi   0, 0,                          /* \P, \p                                 */
122*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
123*22dc650dSSadaf Ebrahimi   0,                             /* \X                                     */
124*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
125*22dc650dSSadaf Ebrahimi   1,                             /* Char                                   */
126*22dc650dSSadaf Ebrahimi   1,                             /* Chari                                  */
127*22dc650dSSadaf Ebrahimi   1,                             /* not                                    */
128*22dc650dSSadaf Ebrahimi   1,                             /* noti                                   */
129*22dc650dSSadaf Ebrahimi   /* Positive single-char repeats                                          */
130*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE,                   /* exact                                  */
133*22dc650dSSadaf Ebrahimi   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE,                   /* exact I                                */
137*22dc650dSSadaf Ebrahimi   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138*22dc650dSSadaf Ebrahimi   /* Negative single-char repeats - only for chars < 256                   */
139*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE,                   /* NOT exact                              */
142*22dc650dSSadaf Ebrahimi   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE,                   /* NOT exact I                            */
146*22dc650dSSadaf Ebrahimi   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147*22dc650dSSadaf Ebrahimi   /* Positive type repeats                                                 */
148*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150*22dc650dSSadaf Ebrahimi   1+IMM2_SIZE,                   /* Type exact                             */
151*22dc650dSSadaf Ebrahimi   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152*22dc650dSSadaf Ebrahimi   /* Character class & ref repeats                                         */
153*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154*22dc650dSSadaf Ebrahimi   0, 0,                          /* CRRANGE, CRMINRANGE                    */
155*22dc650dSSadaf Ebrahimi   0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
156*22dc650dSSadaf Ebrahimi   0,                             /* CLASS                                  */
157*22dc650dSSadaf Ebrahimi   0,                             /* NCLASS                                 */
158*22dc650dSSadaf Ebrahimi   0,                             /* XCLASS - variable length               */
159*22dc650dSSadaf Ebrahimi   0,                             /* REF                                    */
160*22dc650dSSadaf Ebrahimi   0,                             /* REFI                                   */
161*22dc650dSSadaf Ebrahimi   0,                             /* DNREF                                  */
162*22dc650dSSadaf Ebrahimi   0,                             /* DNREFI                                 */
163*22dc650dSSadaf Ebrahimi   0,                             /* RECURSE                                */
164*22dc650dSSadaf Ebrahimi   0,                             /* CALLOUT                                */
165*22dc650dSSadaf Ebrahimi   0,                             /* CALLOUT_STR                            */
166*22dc650dSSadaf Ebrahimi   0,                             /* Alt                                    */
167*22dc650dSSadaf Ebrahimi   0,                             /* Ket                                    */
168*22dc650dSSadaf Ebrahimi   0,                             /* KetRmax                                */
169*22dc650dSSadaf Ebrahimi   0,                             /* KetRmin                                */
170*22dc650dSSadaf Ebrahimi   0,                             /* KetRpos                                */
171*22dc650dSSadaf Ebrahimi   0, 0,                          /* Reverse, Vreverse                      */
172*22dc650dSSadaf Ebrahimi   0,                             /* Assert                                 */
173*22dc650dSSadaf Ebrahimi   0,                             /* Assert not                             */
174*22dc650dSSadaf Ebrahimi   0,                             /* Assert behind                          */
175*22dc650dSSadaf Ebrahimi   0,                             /* Assert behind not                      */
176*22dc650dSSadaf Ebrahimi   0,                             /* NA assert                              */
177*22dc650dSSadaf Ebrahimi   0,                             /* NA assert behind                       */
178*22dc650dSSadaf Ebrahimi   0,                             /* ONCE                                   */
179*22dc650dSSadaf Ebrahimi   0,                             /* SCRIPT_RUN                             */
180*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
181*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
182*22dc650dSSadaf Ebrahimi   0, 0,                          /* CREF, DNCREF                           */
183*22dc650dSSadaf Ebrahimi   0, 0,                          /* RREF, DNRREF                           */
184*22dc650dSSadaf Ebrahimi   0, 0,                          /* FALSE, TRUE                            */
185*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
186*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
187*22dc650dSSadaf Ebrahimi   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
188*22dc650dSSadaf Ebrahimi   0, 0,                          /* COMMIT, COMMIT_ARG                     */
189*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
190*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
191*22dc650dSSadaf Ebrahimi   0, 0                           /* \B and \b in UCP mode                  */
192*22dc650dSSadaf Ebrahimi };
193*22dc650dSSadaf Ebrahimi 
194*22dc650dSSadaf Ebrahimi /* This table identifies those opcodes that inspect a character. It is used to
195*22dc650dSSadaf Ebrahimi remember the fact that a character could have been inspected when the end of
196*22dc650dSSadaf Ebrahimi the subject is reached. ***NOTE*** If the start of this table is modified, the
197*22dc650dSSadaf Ebrahimi two tables that follow must also be modified. */
198*22dc650dSSadaf Ebrahimi 
199*22dc650dSSadaf Ebrahimi static const uint8_t poptable[] = {
200*22dc650dSSadaf Ebrahimi   0,                             /* End                                    */
201*22dc650dSSadaf Ebrahimi   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
202*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
203*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
204*22dc650dSSadaf Ebrahimi   1, 1,                          /* \P, \p                                 */
205*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
206*22dc650dSSadaf Ebrahimi   1,                             /* \X                                     */
207*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
208*22dc650dSSadaf Ebrahimi   1,                             /* Char                                   */
209*22dc650dSSadaf Ebrahimi   1,                             /* Chari                                  */
210*22dc650dSSadaf Ebrahimi   1,                             /* not                                    */
211*22dc650dSSadaf Ebrahimi   1,                             /* noti                                   */
212*22dc650dSSadaf Ebrahimi   /* Positive single-char repeats                                          */
213*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
214*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* upto, minupto, exact                   */
215*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
216*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
217*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* upto I, minupto I, exact I             */
218*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
219*22dc650dSSadaf Ebrahimi   /* Negative single-char repeats - only for chars < 256                   */
220*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
221*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* NOT upto, minupto, exact               */
222*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
223*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
224*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
225*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
226*22dc650dSSadaf Ebrahimi   /* Positive type repeats                                                 */
227*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
228*22dc650dSSadaf Ebrahimi   1, 1, 1,                       /* Type upto, minupto, exact              */
229*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
230*22dc650dSSadaf Ebrahimi   /* Character class & ref repeats                                         */
231*22dc650dSSadaf Ebrahimi   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
232*22dc650dSSadaf Ebrahimi   1, 1,                          /* CRRANGE, CRMINRANGE                    */
233*22dc650dSSadaf Ebrahimi   1, 1, 1, 1,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
234*22dc650dSSadaf Ebrahimi   1,                             /* CLASS                                  */
235*22dc650dSSadaf Ebrahimi   1,                             /* NCLASS                                 */
236*22dc650dSSadaf Ebrahimi   1,                             /* XCLASS - variable length               */
237*22dc650dSSadaf Ebrahimi   0,                             /* REF                                    */
238*22dc650dSSadaf Ebrahimi   0,                             /* REFI                                   */
239*22dc650dSSadaf Ebrahimi   0,                             /* DNREF                                  */
240*22dc650dSSadaf Ebrahimi   0,                             /* DNREFI                                 */
241*22dc650dSSadaf Ebrahimi   0,                             /* RECURSE                                */
242*22dc650dSSadaf Ebrahimi   0,                             /* CALLOUT                                */
243*22dc650dSSadaf Ebrahimi   0,                             /* CALLOUT_STR                            */
244*22dc650dSSadaf Ebrahimi   0,                             /* Alt                                    */
245*22dc650dSSadaf Ebrahimi   0,                             /* Ket                                    */
246*22dc650dSSadaf Ebrahimi   0,                             /* KetRmax                                */
247*22dc650dSSadaf Ebrahimi   0,                             /* KetRmin                                */
248*22dc650dSSadaf Ebrahimi   0,                             /* KetRpos                                */
249*22dc650dSSadaf Ebrahimi   0, 0,                          /* Reverse, Vreverse                      */
250*22dc650dSSadaf Ebrahimi   0,                             /* Assert                                 */
251*22dc650dSSadaf Ebrahimi   0,                             /* Assert not                             */
252*22dc650dSSadaf Ebrahimi   0,                             /* Assert behind                          */
253*22dc650dSSadaf Ebrahimi   0,                             /* Assert behind not                      */
254*22dc650dSSadaf Ebrahimi   0,                             /* NA assert                              */
255*22dc650dSSadaf Ebrahimi   0,                             /* NA assert behind                       */
256*22dc650dSSadaf Ebrahimi   0,                             /* ONCE                                   */
257*22dc650dSSadaf Ebrahimi   0,                             /* SCRIPT_RUN                             */
258*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
259*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
260*22dc650dSSadaf Ebrahimi   0, 0,                          /* CREF, DNCREF                           */
261*22dc650dSSadaf Ebrahimi   0, 0,                          /* RREF, DNRREF                           */
262*22dc650dSSadaf Ebrahimi   0, 0,                          /* FALSE, TRUE                            */
263*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
264*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
265*22dc650dSSadaf Ebrahimi   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
266*22dc650dSSadaf Ebrahimi   0, 0,                          /* COMMIT, COMMIT_ARG                     */
267*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
268*22dc650dSSadaf Ebrahimi   0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
269*22dc650dSSadaf Ebrahimi   1, 1                           /* \B and \b in UCP mode                  */
270*22dc650dSSadaf Ebrahimi };
271*22dc650dSSadaf Ebrahimi 
272*22dc650dSSadaf Ebrahimi /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
273*22dc650dSSadaf Ebrahimi and \w */
274*22dc650dSSadaf Ebrahimi 
275*22dc650dSSadaf Ebrahimi static const uint8_t toptable1[] = {
276*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,
277*22dc650dSSadaf Ebrahimi   ctype_digit, ctype_digit,
278*22dc650dSSadaf Ebrahimi   ctype_space, ctype_space,
279*22dc650dSSadaf Ebrahimi   ctype_word,  ctype_word,
280*22dc650dSSadaf Ebrahimi   0, 0                            /* OP_ANY, OP_ALLANY */
281*22dc650dSSadaf Ebrahimi };
282*22dc650dSSadaf Ebrahimi 
283*22dc650dSSadaf Ebrahimi static const uint8_t toptable2[] = {
284*22dc650dSSadaf Ebrahimi   0, 0, 0, 0, 0, 0,
285*22dc650dSSadaf Ebrahimi   ctype_digit, 0,
286*22dc650dSSadaf Ebrahimi   ctype_space, 0,
287*22dc650dSSadaf Ebrahimi   ctype_word,  0,
288*22dc650dSSadaf Ebrahimi   1, 1                            /* OP_ANY, OP_ALLANY */
289*22dc650dSSadaf Ebrahimi };
290*22dc650dSSadaf Ebrahimi 
291*22dc650dSSadaf Ebrahimi 
292*22dc650dSSadaf Ebrahimi /* Structure for holding data about a particular state, which is in effect the
293*22dc650dSSadaf Ebrahimi current data for an active path through the match tree. It must consist
294*22dc650dSSadaf Ebrahimi entirely of ints because the working vector we are passed, and which we put
295*22dc650dSSadaf Ebrahimi these structures in, is a vector of ints. */
296*22dc650dSSadaf Ebrahimi 
297*22dc650dSSadaf Ebrahimi typedef struct stateblock {
298*22dc650dSSadaf Ebrahimi   int offset;                     /* Offset to opcode (-ve has meaning) */
299*22dc650dSSadaf Ebrahimi   int count;                      /* Count for repeats */
300*22dc650dSSadaf Ebrahimi   int data;                       /* Some use extra data */
301*22dc650dSSadaf Ebrahimi } stateblock;
302*22dc650dSSadaf Ebrahimi 
303*22dc650dSSadaf Ebrahimi #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
304*22dc650dSSadaf Ebrahimi 
305*22dc650dSSadaf Ebrahimi 
306*22dc650dSSadaf Ebrahimi /* Before version 10.32 the recursive calls of internal_dfa_match() were passed
307*22dc650dSSadaf Ebrahimi local working space and output vectors that were created on the stack. This has
308*22dc650dSSadaf Ebrahimi caused issues for some patterns, especially in small-stack environments such as
309*22dc650dSSadaf Ebrahimi Windows. A new scheme is now in use which sets up a vector on the stack, but if
310*22dc650dSSadaf Ebrahimi this is too small, heap memory is used, up to the heap_limit. The main
311*22dc650dSSadaf Ebrahimi parameters are all numbers of ints because the workspace is a vector of ints.
312*22dc650dSSadaf Ebrahimi 
313*22dc650dSSadaf Ebrahimi The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
314*22dc650dSSadaf Ebrahimi defined in pcre2_internal.h so as to be available to pcre2test when it is
315*22dc650dSSadaf Ebrahimi finding the minimum heap requirement for a match. */
316*22dc650dSSadaf Ebrahimi 
317*22dc650dSSadaf Ebrahimi #define OVEC_UNIT  (sizeof(PCRE2_SIZE)/sizeof(int))
318*22dc650dSSadaf Ebrahimi 
319*22dc650dSSadaf Ebrahimi #define RWS_BASE_SIZE   (DFA_START_RWS_SIZE/sizeof(int))  /* Stack vector */
320*22dc650dSSadaf Ebrahimi #define RWS_RSIZE       1000                    /* Work size for recursion */
321*22dc650dSSadaf Ebrahimi #define RWS_OVEC_RSIZE  (1000*OVEC_UNIT)        /* Ovector for recursion */
322*22dc650dSSadaf Ebrahimi #define RWS_OVEC_OSIZE  (2*OVEC_UNIT)           /* Ovector in other cases */
323*22dc650dSSadaf Ebrahimi 
324*22dc650dSSadaf Ebrahimi /* This structure is at the start of each workspace block. */
325*22dc650dSSadaf Ebrahimi 
326*22dc650dSSadaf Ebrahimi typedef struct RWS_anchor {
327*22dc650dSSadaf Ebrahimi   struct RWS_anchor *next;
328*22dc650dSSadaf Ebrahimi   uint32_t size;  /* Number of ints */
329*22dc650dSSadaf Ebrahimi   uint32_t free;  /* Number of ints */
330*22dc650dSSadaf Ebrahimi } RWS_anchor;
331*22dc650dSSadaf Ebrahimi 
332*22dc650dSSadaf Ebrahimi #define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
333*22dc650dSSadaf Ebrahimi 
334*22dc650dSSadaf Ebrahimi 
335*22dc650dSSadaf Ebrahimi 
336*22dc650dSSadaf Ebrahimi /*************************************************
337*22dc650dSSadaf Ebrahimi *               Process a callout                *
338*22dc650dSSadaf Ebrahimi *************************************************/
339*22dc650dSSadaf Ebrahimi 
340*22dc650dSSadaf Ebrahimi /* This function is called to perform a callout.
341*22dc650dSSadaf Ebrahimi 
342*22dc650dSSadaf Ebrahimi Arguments:
343*22dc650dSSadaf Ebrahimi   code              current code pointer
344*22dc650dSSadaf Ebrahimi   offsets           points to current capture offsets
345*22dc650dSSadaf Ebrahimi   current_subject   start of current subject match
346*22dc650dSSadaf Ebrahimi   ptr               current position in subject
347*22dc650dSSadaf Ebrahimi   mb                the match block
348*22dc650dSSadaf Ebrahimi   extracode         extra code offset when called from condition
349*22dc650dSSadaf Ebrahimi   lengthptr         where to return the callout length
350*22dc650dSSadaf Ebrahimi 
351*22dc650dSSadaf Ebrahimi Returns:            the return from the callout
352*22dc650dSSadaf Ebrahimi */
353*22dc650dSSadaf Ebrahimi 
354*22dc650dSSadaf Ebrahimi static int
do_callout_dfa(PCRE2_SPTR code,PCRE2_SIZE * offsets,PCRE2_SPTR current_subject,PCRE2_SPTR ptr,dfa_match_block * mb,PCRE2_SIZE extracode,PCRE2_SIZE * lengthptr)355*22dc650dSSadaf Ebrahimi do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
356*22dc650dSSadaf Ebrahimi   PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
357*22dc650dSSadaf Ebrahimi   PCRE2_SIZE *lengthptr)
358*22dc650dSSadaf Ebrahimi {
359*22dc650dSSadaf Ebrahimi pcre2_callout_block *cb = mb->cb;
360*22dc650dSSadaf Ebrahimi 
361*22dc650dSSadaf Ebrahimi *lengthptr = (code[extracode] == OP_CALLOUT)?
362*22dc650dSSadaf Ebrahimi   (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
363*22dc650dSSadaf Ebrahimi   (PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
364*22dc650dSSadaf Ebrahimi 
365*22dc650dSSadaf Ebrahimi if (mb->callout == NULL) return 0;    /* No callout provided */
366*22dc650dSSadaf Ebrahimi 
367*22dc650dSSadaf Ebrahimi /* Fixed fields in the callout block are set once and for all at the start of
368*22dc650dSSadaf Ebrahimi matching. */
369*22dc650dSSadaf Ebrahimi 
370*22dc650dSSadaf Ebrahimi cb->offset_vector    = offsets;
371*22dc650dSSadaf Ebrahimi cb->start_match      = (PCRE2_SIZE)(current_subject - mb->start_subject);
372*22dc650dSSadaf Ebrahimi cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
373*22dc650dSSadaf Ebrahimi cb->pattern_position = GET(code, 1 + extracode);
374*22dc650dSSadaf Ebrahimi cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
375*22dc650dSSadaf Ebrahimi 
376*22dc650dSSadaf Ebrahimi if (code[extracode] == OP_CALLOUT)
377*22dc650dSSadaf Ebrahimi   {
378*22dc650dSSadaf Ebrahimi   cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
379*22dc650dSSadaf Ebrahimi   cb->callout_string_offset = 0;
380*22dc650dSSadaf Ebrahimi   cb->callout_string = NULL;
381*22dc650dSSadaf Ebrahimi   cb->callout_string_length = 0;
382*22dc650dSSadaf Ebrahimi   }
383*22dc650dSSadaf Ebrahimi else
384*22dc650dSSadaf Ebrahimi   {
385*22dc650dSSadaf Ebrahimi   cb->callout_number = 0;
386*22dc650dSSadaf Ebrahimi   cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
387*22dc650dSSadaf Ebrahimi   cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
388*22dc650dSSadaf Ebrahimi   cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
389*22dc650dSSadaf Ebrahimi   }
390*22dc650dSSadaf Ebrahimi 
391*22dc650dSSadaf Ebrahimi return (mb->callout)(cb, mb->callout_data);
392*22dc650dSSadaf Ebrahimi }
393*22dc650dSSadaf Ebrahimi 
394*22dc650dSSadaf Ebrahimi 
395*22dc650dSSadaf Ebrahimi 
396*22dc650dSSadaf Ebrahimi /*************************************************
397*22dc650dSSadaf Ebrahimi *         Expand local workspace memory          *
398*22dc650dSSadaf Ebrahimi *************************************************/
399*22dc650dSSadaf Ebrahimi 
400*22dc650dSSadaf Ebrahimi /* This function is called when internal_dfa_match() is about to be called
401*22dc650dSSadaf Ebrahimi recursively and there is insufficient working space left in the current
402*22dc650dSSadaf Ebrahimi workspace block. If there's an existing next block, use it; otherwise get a new
403*22dc650dSSadaf Ebrahimi block unless the heap limit is reached.
404*22dc650dSSadaf Ebrahimi 
405*22dc650dSSadaf Ebrahimi Arguments:
406*22dc650dSSadaf Ebrahimi   rwsptr     pointer to block pointer (updated)
407*22dc650dSSadaf Ebrahimi   ovecsize   space needed for an ovector
408*22dc650dSSadaf Ebrahimi   mb         the match block
409*22dc650dSSadaf Ebrahimi 
410*22dc650dSSadaf Ebrahimi Returns:     0 rwsptr has been updated
411*22dc650dSSadaf Ebrahimi             !0 an error code
412*22dc650dSSadaf Ebrahimi */
413*22dc650dSSadaf Ebrahimi 
414*22dc650dSSadaf Ebrahimi static int
more_workspace(RWS_anchor ** rwsptr,unsigned int ovecsize,dfa_match_block * mb)415*22dc650dSSadaf Ebrahimi more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
416*22dc650dSSadaf Ebrahimi {
417*22dc650dSSadaf Ebrahimi RWS_anchor *rws = *rwsptr;
418*22dc650dSSadaf Ebrahimi RWS_anchor *new;
419*22dc650dSSadaf Ebrahimi 
420*22dc650dSSadaf Ebrahimi if (rws->next != NULL)
421*22dc650dSSadaf Ebrahimi   {
422*22dc650dSSadaf Ebrahimi   new = rws->next;
423*22dc650dSSadaf Ebrahimi   }
424*22dc650dSSadaf Ebrahimi 
425*22dc650dSSadaf Ebrahimi /* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
426*22dc650dSSadaf Ebrahimi mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
427*22dc650dSSadaf Ebrahimi overflow. */
428*22dc650dSSadaf Ebrahimi 
429*22dc650dSSadaf Ebrahimi else
430*22dc650dSSadaf Ebrahimi   {
431*22dc650dSSadaf Ebrahimi   uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2;
432*22dc650dSSadaf Ebrahimi   uint32_t newsizeK = newsize/(1024/sizeof(int));
433*22dc650dSSadaf Ebrahimi 
434*22dc650dSSadaf Ebrahimi   if (newsizeK + mb->heap_used > mb->heap_limit)
435*22dc650dSSadaf Ebrahimi     newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
436*22dc650dSSadaf Ebrahimi   newsize = newsizeK*(1024/sizeof(int));
437*22dc650dSSadaf Ebrahimi 
438*22dc650dSSadaf Ebrahimi   if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
439*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_HEAPLIMIT;
440*22dc650dSSadaf Ebrahimi   new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
441*22dc650dSSadaf Ebrahimi   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
442*22dc650dSSadaf Ebrahimi   mb->heap_used += newsizeK;
443*22dc650dSSadaf Ebrahimi   new->next = NULL;
444*22dc650dSSadaf Ebrahimi   new->size = newsize;
445*22dc650dSSadaf Ebrahimi   rws->next = new;
446*22dc650dSSadaf Ebrahimi   }
447*22dc650dSSadaf Ebrahimi 
448*22dc650dSSadaf Ebrahimi new->free = new->size - RWS_ANCHOR_SIZE;
449*22dc650dSSadaf Ebrahimi *rwsptr = new;
450*22dc650dSSadaf Ebrahimi return 0;
451*22dc650dSSadaf Ebrahimi }
452*22dc650dSSadaf Ebrahimi 
453*22dc650dSSadaf Ebrahimi 
454*22dc650dSSadaf Ebrahimi 
455*22dc650dSSadaf Ebrahimi /*************************************************
456*22dc650dSSadaf Ebrahimi *     Match a Regular Expression - DFA engine    *
457*22dc650dSSadaf Ebrahimi *************************************************/
458*22dc650dSSadaf Ebrahimi 
459*22dc650dSSadaf Ebrahimi /* This internal function applies a compiled pattern to a subject string,
460*22dc650dSSadaf Ebrahimi starting at a given point, using a DFA engine. This function is called from the
461*22dc650dSSadaf Ebrahimi external one, possibly multiple times if the pattern is not anchored. The
462*22dc650dSSadaf Ebrahimi function calls itself recursively for some kinds of subpattern.
463*22dc650dSSadaf Ebrahimi 
464*22dc650dSSadaf Ebrahimi Arguments:
465*22dc650dSSadaf Ebrahimi   mb                the match_data block with fixed information
466*22dc650dSSadaf Ebrahimi   this_start_code   the opening bracket of this subexpression's code
467*22dc650dSSadaf Ebrahimi   current_subject   where we currently are in the subject string
468*22dc650dSSadaf Ebrahimi   start_offset      start offset in the subject string
469*22dc650dSSadaf Ebrahimi   offsets           vector to contain the matching string offsets
470*22dc650dSSadaf Ebrahimi   offsetcount       size of same
471*22dc650dSSadaf Ebrahimi   workspace         vector of workspace
472*22dc650dSSadaf Ebrahimi   wscount           size of same
473*22dc650dSSadaf Ebrahimi   rlevel            function call recursion level
474*22dc650dSSadaf Ebrahimi 
475*22dc650dSSadaf Ebrahimi Returns:            > 0 => number of match offset pairs placed in offsets
476*22dc650dSSadaf Ebrahimi                     = 0 => offsets overflowed; longest matches are present
477*22dc650dSSadaf Ebrahimi                      -1 => failed to match
478*22dc650dSSadaf Ebrahimi                    < -1 => some kind of unexpected problem
479*22dc650dSSadaf Ebrahimi 
480*22dc650dSSadaf Ebrahimi The following macros are used for adding states to the two state vectors (one
481*22dc650dSSadaf Ebrahimi for the current character, one for the following character). */
482*22dc650dSSadaf Ebrahimi 
483*22dc650dSSadaf Ebrahimi #define ADD_ACTIVE(x,y) \
484*22dc650dSSadaf Ebrahimi   if (active_count++ < wscount) \
485*22dc650dSSadaf Ebrahimi     { \
486*22dc650dSSadaf Ebrahimi     next_active_state->offset = (x); \
487*22dc650dSSadaf Ebrahimi     next_active_state->count  = (y); \
488*22dc650dSSadaf Ebrahimi     next_active_state++; \
489*22dc650dSSadaf Ebrahimi     } \
490*22dc650dSSadaf Ebrahimi   else return PCRE2_ERROR_DFA_WSSIZE
491*22dc650dSSadaf Ebrahimi 
492*22dc650dSSadaf Ebrahimi #define ADD_ACTIVE_DATA(x,y,z) \
493*22dc650dSSadaf Ebrahimi   if (active_count++ < wscount) \
494*22dc650dSSadaf Ebrahimi     { \
495*22dc650dSSadaf Ebrahimi     next_active_state->offset = (x); \
496*22dc650dSSadaf Ebrahimi     next_active_state->count  = (y); \
497*22dc650dSSadaf Ebrahimi     next_active_state->data   = (z); \
498*22dc650dSSadaf Ebrahimi     next_active_state++; \
499*22dc650dSSadaf Ebrahimi     } \
500*22dc650dSSadaf Ebrahimi   else return PCRE2_ERROR_DFA_WSSIZE
501*22dc650dSSadaf Ebrahimi 
502*22dc650dSSadaf Ebrahimi #define ADD_NEW(x,y) \
503*22dc650dSSadaf Ebrahimi   if (new_count++ < wscount) \
504*22dc650dSSadaf Ebrahimi     { \
505*22dc650dSSadaf Ebrahimi     next_new_state->offset = (x); \
506*22dc650dSSadaf Ebrahimi     next_new_state->count  = (y); \
507*22dc650dSSadaf Ebrahimi     next_new_state++; \
508*22dc650dSSadaf Ebrahimi     } \
509*22dc650dSSadaf Ebrahimi   else return PCRE2_ERROR_DFA_WSSIZE
510*22dc650dSSadaf Ebrahimi 
511*22dc650dSSadaf Ebrahimi #define ADD_NEW_DATA(x,y,z) \
512*22dc650dSSadaf Ebrahimi   if (new_count++ < wscount) \
513*22dc650dSSadaf Ebrahimi     { \
514*22dc650dSSadaf Ebrahimi     next_new_state->offset = (x); \
515*22dc650dSSadaf Ebrahimi     next_new_state->count  = (y); \
516*22dc650dSSadaf Ebrahimi     next_new_state->data   = (z); \
517*22dc650dSSadaf Ebrahimi     next_new_state++; \
518*22dc650dSSadaf Ebrahimi     } \
519*22dc650dSSadaf Ebrahimi   else return PCRE2_ERROR_DFA_WSSIZE
520*22dc650dSSadaf Ebrahimi 
521*22dc650dSSadaf Ebrahimi /* And now, here is the code */
522*22dc650dSSadaf Ebrahimi 
523*22dc650dSSadaf Ebrahimi static int
internal_dfa_match(dfa_match_block * mb,PCRE2_SPTR this_start_code,PCRE2_SPTR current_subject,PCRE2_SIZE start_offset,PCRE2_SIZE * offsets,uint32_t offsetcount,int * workspace,int wscount,uint32_t rlevel,int * RWS)524*22dc650dSSadaf Ebrahimi internal_dfa_match(
525*22dc650dSSadaf Ebrahimi   dfa_match_block *mb,
526*22dc650dSSadaf Ebrahimi   PCRE2_SPTR this_start_code,
527*22dc650dSSadaf Ebrahimi   PCRE2_SPTR current_subject,
528*22dc650dSSadaf Ebrahimi   PCRE2_SIZE start_offset,
529*22dc650dSSadaf Ebrahimi   PCRE2_SIZE *offsets,
530*22dc650dSSadaf Ebrahimi   uint32_t offsetcount,
531*22dc650dSSadaf Ebrahimi   int *workspace,
532*22dc650dSSadaf Ebrahimi   int wscount,
533*22dc650dSSadaf Ebrahimi   uint32_t rlevel,
534*22dc650dSSadaf Ebrahimi   int *RWS)
535*22dc650dSSadaf Ebrahimi {
536*22dc650dSSadaf Ebrahimi stateblock *active_states, *new_states, *temp_states;
537*22dc650dSSadaf Ebrahimi stateblock *next_active_state, *next_new_state;
538*22dc650dSSadaf Ebrahimi const uint8_t *ctypes, *lcc, *fcc;
539*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptr;
540*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_code;
541*22dc650dSSadaf Ebrahimi dfa_recursion_info new_recursive;
542*22dc650dSSadaf Ebrahimi int active_count, new_count, match_count;
543*22dc650dSSadaf Ebrahimi 
544*22dc650dSSadaf Ebrahimi /* Some fields in the mb block are frequently referenced, so we load them into
545*22dc650dSSadaf Ebrahimi independent variables in the hope that this will perform better. */
546*22dc650dSSadaf Ebrahimi 
547*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject = mb->start_subject;
548*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject = mb->end_subject;
549*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_code = mb->start_code;
550*22dc650dSSadaf Ebrahimi 
551*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
552*22dc650dSSadaf Ebrahimi BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
553*22dc650dSSadaf Ebrahimi BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
554*22dc650dSSadaf Ebrahimi #else
555*22dc650dSSadaf Ebrahimi BOOL utf = FALSE;
556*22dc650dSSadaf Ebrahimi #endif
557*22dc650dSSadaf Ebrahimi 
558*22dc650dSSadaf Ebrahimi BOOL reset_could_continue = FALSE;
559*22dc650dSSadaf Ebrahimi 
560*22dc650dSSadaf Ebrahimi if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
561*22dc650dSSadaf Ebrahimi if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
562*22dc650dSSadaf Ebrahimi offsetcount &= (uint32_t)(-2);  /* Round down */
563*22dc650dSSadaf Ebrahimi 
564*22dc650dSSadaf Ebrahimi wscount -= 2;
565*22dc650dSSadaf Ebrahimi wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
566*22dc650dSSadaf Ebrahimi           (2 * INTS_PER_STATEBLOCK);
567*22dc650dSSadaf Ebrahimi 
568*22dc650dSSadaf Ebrahimi ctypes = mb->tables + ctypes_offset;
569*22dc650dSSadaf Ebrahimi lcc = mb->tables + lcc_offset;
570*22dc650dSSadaf Ebrahimi fcc = mb->tables + fcc_offset;
571*22dc650dSSadaf Ebrahimi 
572*22dc650dSSadaf Ebrahimi match_count = PCRE2_ERROR_NOMATCH;   /* A negative number */
573*22dc650dSSadaf Ebrahimi 
574*22dc650dSSadaf Ebrahimi active_states = (stateblock *)(workspace + 2);
575*22dc650dSSadaf Ebrahimi next_new_state = new_states = active_states + wscount;
576*22dc650dSSadaf Ebrahimi new_count = 0;
577*22dc650dSSadaf Ebrahimi 
578*22dc650dSSadaf Ebrahimi /* The first thing in any (sub) pattern is a bracket of some sort. Push all
579*22dc650dSSadaf Ebrahimi the alternative states onto the list, and find out where the end is. This
580*22dc650dSSadaf Ebrahimi makes is possible to use this function recursively, when we want to stop at a
581*22dc650dSSadaf Ebrahimi matching internal ket rather than at the end.
582*22dc650dSSadaf Ebrahimi 
583*22dc650dSSadaf Ebrahimi If we are dealing with a backward assertion we have to find out the maximum
584*22dc650dSSadaf Ebrahimi amount to move back, and set up each alternative appropriately. */
585*22dc650dSSadaf Ebrahimi 
586*22dc650dSSadaf Ebrahimi if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
587*22dc650dSSadaf Ebrahimi   {
588*22dc650dSSadaf Ebrahimi   size_t max_back = 0;
589*22dc650dSSadaf Ebrahimi   size_t gone_back;
590*22dc650dSSadaf Ebrahimi 
591*22dc650dSSadaf Ebrahimi   end_code = this_start_code;
592*22dc650dSSadaf Ebrahimi   do
593*22dc650dSSadaf Ebrahimi     {
594*22dc650dSSadaf Ebrahimi     size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
595*22dc650dSSadaf Ebrahimi     if (back > max_back) max_back = back;
596*22dc650dSSadaf Ebrahimi     end_code += GET(end_code, 1);
597*22dc650dSSadaf Ebrahimi     }
598*22dc650dSSadaf Ebrahimi   while (*end_code == OP_ALT);
599*22dc650dSSadaf Ebrahimi 
600*22dc650dSSadaf Ebrahimi   /* If we can't go back the amount required for the longest lookbehind
601*22dc650dSSadaf Ebrahimi   pattern, go back as far as we can; some alternatives may still be viable. */
602*22dc650dSSadaf Ebrahimi 
603*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
604*22dc650dSSadaf Ebrahimi   /* In character mode we have to step back character by character */
605*22dc650dSSadaf Ebrahimi 
606*22dc650dSSadaf Ebrahimi   if (utf)
607*22dc650dSSadaf Ebrahimi     {
608*22dc650dSSadaf Ebrahimi     for (gone_back = 0; gone_back < max_back; gone_back++)
609*22dc650dSSadaf Ebrahimi       {
610*22dc650dSSadaf Ebrahimi       if (current_subject <= start_subject) break;
611*22dc650dSSadaf Ebrahimi       current_subject--;
612*22dc650dSSadaf Ebrahimi       ACROSSCHAR(current_subject > start_subject, current_subject,
613*22dc650dSSadaf Ebrahimi         current_subject--);
614*22dc650dSSadaf Ebrahimi       }
615*22dc650dSSadaf Ebrahimi     }
616*22dc650dSSadaf Ebrahimi   else
617*22dc650dSSadaf Ebrahimi #endif
618*22dc650dSSadaf Ebrahimi 
619*22dc650dSSadaf Ebrahimi   /* In byte-mode we can do this quickly. */
620*22dc650dSSadaf Ebrahimi 
621*22dc650dSSadaf Ebrahimi     {
622*22dc650dSSadaf Ebrahimi     size_t current_offset = (size_t)(current_subject - start_subject);
623*22dc650dSSadaf Ebrahimi     gone_back = (current_offset < max_back)? current_offset : max_back;
624*22dc650dSSadaf Ebrahimi     current_subject -= gone_back;
625*22dc650dSSadaf Ebrahimi     }
626*22dc650dSSadaf Ebrahimi 
627*22dc650dSSadaf Ebrahimi   /* Save the earliest consulted character */
628*22dc650dSSadaf Ebrahimi 
629*22dc650dSSadaf Ebrahimi   if (current_subject < mb->start_used_ptr)
630*22dc650dSSadaf Ebrahimi     mb->start_used_ptr = current_subject;
631*22dc650dSSadaf Ebrahimi 
632*22dc650dSSadaf Ebrahimi   /* Now we can process the individual branches. There will be an OP_REVERSE at
633*22dc650dSSadaf Ebrahimi   the start of each branch, except when the length of the branch is zero. */
634*22dc650dSSadaf Ebrahimi 
635*22dc650dSSadaf Ebrahimi   end_code = this_start_code;
636*22dc650dSSadaf Ebrahimi   do
637*22dc650dSSadaf Ebrahimi     {
638*22dc650dSSadaf Ebrahimi     uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0;
639*22dc650dSSadaf Ebrahimi     size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE);
640*22dc650dSSadaf Ebrahimi     if (back <= gone_back)
641*22dc650dSSadaf Ebrahimi       {
642*22dc650dSSadaf Ebrahimi       int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
643*22dc650dSSadaf Ebrahimi       ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
644*22dc650dSSadaf Ebrahimi       }
645*22dc650dSSadaf Ebrahimi     end_code += GET(end_code, 1);
646*22dc650dSSadaf Ebrahimi     }
647*22dc650dSSadaf Ebrahimi   while (*end_code == OP_ALT);
648*22dc650dSSadaf Ebrahimi  }
649*22dc650dSSadaf Ebrahimi 
650*22dc650dSSadaf Ebrahimi /* This is the code for a "normal" subpattern (not a backward assertion). The
651*22dc650dSSadaf Ebrahimi start of a whole pattern is always one of these. If we are at the top level,
652*22dc650dSSadaf Ebrahimi we may be asked to restart matching from the same point that we reached for a
653*22dc650dSSadaf Ebrahimi previous partial match. We still have to scan through the top-level branches to
654*22dc650dSSadaf Ebrahimi find the end state. */
655*22dc650dSSadaf Ebrahimi 
656*22dc650dSSadaf Ebrahimi else
657*22dc650dSSadaf Ebrahimi   {
658*22dc650dSSadaf Ebrahimi   end_code = this_start_code;
659*22dc650dSSadaf Ebrahimi 
660*22dc650dSSadaf Ebrahimi   /* Restarting */
661*22dc650dSSadaf Ebrahimi 
662*22dc650dSSadaf Ebrahimi   if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0)
663*22dc650dSSadaf Ebrahimi     {
664*22dc650dSSadaf Ebrahimi     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
665*22dc650dSSadaf Ebrahimi     new_count = workspace[1];
666*22dc650dSSadaf Ebrahimi     if (!workspace[0])
667*22dc650dSSadaf Ebrahimi       memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
668*22dc650dSSadaf Ebrahimi     }
669*22dc650dSSadaf Ebrahimi 
670*22dc650dSSadaf Ebrahimi   /* Not restarting */
671*22dc650dSSadaf Ebrahimi 
672*22dc650dSSadaf Ebrahimi   else
673*22dc650dSSadaf Ebrahimi     {
674*22dc650dSSadaf Ebrahimi     int length = 1 + LINK_SIZE +
675*22dc650dSSadaf Ebrahimi       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
676*22dc650dSSadaf Ebrahimi         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
677*22dc650dSSadaf Ebrahimi         ? IMM2_SIZE:0);
678*22dc650dSSadaf Ebrahimi     do
679*22dc650dSSadaf Ebrahimi       {
680*22dc650dSSadaf Ebrahimi       ADD_NEW((int)(end_code - start_code + length), 0);
681*22dc650dSSadaf Ebrahimi       end_code += GET(end_code, 1);
682*22dc650dSSadaf Ebrahimi       length = 1 + LINK_SIZE;
683*22dc650dSSadaf Ebrahimi       }
684*22dc650dSSadaf Ebrahimi     while (*end_code == OP_ALT);
685*22dc650dSSadaf Ebrahimi     }
686*22dc650dSSadaf Ebrahimi   }
687*22dc650dSSadaf Ebrahimi 
688*22dc650dSSadaf Ebrahimi workspace[0] = 0;    /* Bit indicating which vector is current */
689*22dc650dSSadaf Ebrahimi 
690*22dc650dSSadaf Ebrahimi /* Loop for scanning the subject */
691*22dc650dSSadaf Ebrahimi 
692*22dc650dSSadaf Ebrahimi ptr = current_subject;
693*22dc650dSSadaf Ebrahimi for (;;)
694*22dc650dSSadaf Ebrahimi   {
695*22dc650dSSadaf Ebrahimi   int i, j;
696*22dc650dSSadaf Ebrahimi   int clen, dlen;
697*22dc650dSSadaf Ebrahimi   uint32_t c, d;
698*22dc650dSSadaf Ebrahimi   int forced_fail = 0;
699*22dc650dSSadaf Ebrahimi   BOOL partial_newline = FALSE;
700*22dc650dSSadaf Ebrahimi   BOOL could_continue = reset_could_continue;
701*22dc650dSSadaf Ebrahimi   reset_could_continue = FALSE;
702*22dc650dSSadaf Ebrahimi 
703*22dc650dSSadaf Ebrahimi   if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
704*22dc650dSSadaf Ebrahimi 
705*22dc650dSSadaf Ebrahimi   /* Make the new state list into the active state list and empty the
706*22dc650dSSadaf Ebrahimi   new state list. */
707*22dc650dSSadaf Ebrahimi 
708*22dc650dSSadaf Ebrahimi   temp_states = active_states;
709*22dc650dSSadaf Ebrahimi   active_states = new_states;
710*22dc650dSSadaf Ebrahimi   new_states = temp_states;
711*22dc650dSSadaf Ebrahimi   active_count = new_count;
712*22dc650dSSadaf Ebrahimi   new_count = 0;
713*22dc650dSSadaf Ebrahimi 
714*22dc650dSSadaf Ebrahimi   workspace[0] ^= 1;              /* Remember for the restarting feature */
715*22dc650dSSadaf Ebrahimi   workspace[1] = active_count;
716*22dc650dSSadaf Ebrahimi 
717*22dc650dSSadaf Ebrahimi   /* Set the pointers for adding new states */
718*22dc650dSSadaf Ebrahimi 
719*22dc650dSSadaf Ebrahimi   next_active_state = active_states + active_count;
720*22dc650dSSadaf Ebrahimi   next_new_state = new_states;
721*22dc650dSSadaf Ebrahimi 
722*22dc650dSSadaf Ebrahimi   /* Load the current character from the subject outside the loop, as many
723*22dc650dSSadaf Ebrahimi   different states may want to look at it, and we assume that at least one
724*22dc650dSSadaf Ebrahimi   will. */
725*22dc650dSSadaf Ebrahimi 
726*22dc650dSSadaf Ebrahimi   if (ptr < end_subject)
727*22dc650dSSadaf Ebrahimi     {
728*22dc650dSSadaf Ebrahimi     clen = 1;        /* Number of data items in the character */
729*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
730*22dc650dSSadaf Ebrahimi     GETCHARLENTEST(c, ptr, clen);
731*22dc650dSSadaf Ebrahimi #else
732*22dc650dSSadaf Ebrahimi     c = *ptr;
733*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
734*22dc650dSSadaf Ebrahimi     }
735*22dc650dSSadaf Ebrahimi   else
736*22dc650dSSadaf Ebrahimi     {
737*22dc650dSSadaf Ebrahimi     clen = 0;        /* This indicates the end of the subject */
738*22dc650dSSadaf Ebrahimi     c = NOTACHAR;    /* This value should never actually be used */
739*22dc650dSSadaf Ebrahimi     }
740*22dc650dSSadaf Ebrahimi 
741*22dc650dSSadaf Ebrahimi   /* Scan up the active states and act on each one. The result of an action
742*22dc650dSSadaf Ebrahimi   may be to add more states to the currently active list (e.g. on hitting a
743*22dc650dSSadaf Ebrahimi   parenthesis) or it may be to put states on the new list, for considering
744*22dc650dSSadaf Ebrahimi   when we move the character pointer on. */
745*22dc650dSSadaf Ebrahimi 
746*22dc650dSSadaf Ebrahimi   for (i = 0; i < active_count; i++)
747*22dc650dSSadaf Ebrahimi     {
748*22dc650dSSadaf Ebrahimi     stateblock *current_state = active_states + i;
749*22dc650dSSadaf Ebrahimi     BOOL caseless = FALSE;
750*22dc650dSSadaf Ebrahimi     PCRE2_SPTR code;
751*22dc650dSSadaf Ebrahimi     uint32_t codevalue;
752*22dc650dSSadaf Ebrahimi     int state_offset = current_state->offset;
753*22dc650dSSadaf Ebrahimi     int rrc;
754*22dc650dSSadaf Ebrahimi     int count;
755*22dc650dSSadaf Ebrahimi 
756*22dc650dSSadaf Ebrahimi     /* A negative offset is a special case meaning "hold off going to this
757*22dc650dSSadaf Ebrahimi     (negated) state until the number of characters in the data field have
758*22dc650dSSadaf Ebrahimi     been skipped". If the could_continue flag was passed over from a previous
759*22dc650dSSadaf Ebrahimi     state, arrange for it to passed on. */
760*22dc650dSSadaf Ebrahimi 
761*22dc650dSSadaf Ebrahimi     if (state_offset < 0)
762*22dc650dSSadaf Ebrahimi       {
763*22dc650dSSadaf Ebrahimi       if (current_state->data > 0)
764*22dc650dSSadaf Ebrahimi         {
765*22dc650dSSadaf Ebrahimi         ADD_NEW_DATA(state_offset, current_state->count,
766*22dc650dSSadaf Ebrahimi           current_state->data - 1);
767*22dc650dSSadaf Ebrahimi         if (could_continue) reset_could_continue = TRUE;
768*22dc650dSSadaf Ebrahimi         continue;
769*22dc650dSSadaf Ebrahimi         }
770*22dc650dSSadaf Ebrahimi       else
771*22dc650dSSadaf Ebrahimi         {
772*22dc650dSSadaf Ebrahimi         current_state->offset = state_offset = -state_offset;
773*22dc650dSSadaf Ebrahimi         }
774*22dc650dSSadaf Ebrahimi       }
775*22dc650dSSadaf Ebrahimi 
776*22dc650dSSadaf Ebrahimi     /* Check for a duplicate state with the same count, and skip if found.
777*22dc650dSSadaf Ebrahimi     See the note at the head of this module about the possibility of improving
778*22dc650dSSadaf Ebrahimi     performance here. */
779*22dc650dSSadaf Ebrahimi 
780*22dc650dSSadaf Ebrahimi     for (j = 0; j < i; j++)
781*22dc650dSSadaf Ebrahimi       {
782*22dc650dSSadaf Ebrahimi       if (active_states[j].offset == state_offset &&
783*22dc650dSSadaf Ebrahimi           active_states[j].count == current_state->count)
784*22dc650dSSadaf Ebrahimi         goto NEXT_ACTIVE_STATE;
785*22dc650dSSadaf Ebrahimi       }
786*22dc650dSSadaf Ebrahimi 
787*22dc650dSSadaf Ebrahimi     /* The state offset is the offset to the opcode */
788*22dc650dSSadaf Ebrahimi 
789*22dc650dSSadaf Ebrahimi     code = start_code + state_offset;
790*22dc650dSSadaf Ebrahimi     codevalue = *code;
791*22dc650dSSadaf Ebrahimi 
792*22dc650dSSadaf Ebrahimi     /* If this opcode inspects a character, but we are at the end of the
793*22dc650dSSadaf Ebrahimi     subject, remember the fact for use when testing for a partial match. */
794*22dc650dSSadaf Ebrahimi 
795*22dc650dSSadaf Ebrahimi     if (clen == 0 && poptable[codevalue] != 0)
796*22dc650dSSadaf Ebrahimi       could_continue = TRUE;
797*22dc650dSSadaf Ebrahimi 
798*22dc650dSSadaf Ebrahimi     /* If this opcode is followed by an inline character, load it. It is
799*22dc650dSSadaf Ebrahimi     tempting to test for the presence of a subject character here, but that
800*22dc650dSSadaf Ebrahimi     is wrong, because sometimes zero repetitions of the subject are
801*22dc650dSSadaf Ebrahimi     permitted.
802*22dc650dSSadaf Ebrahimi 
803*22dc650dSSadaf Ebrahimi     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
804*22dc650dSSadaf Ebrahimi     argument that is not a data character - but is always one byte long because
805*22dc650dSSadaf Ebrahimi     the values are small. We have to take special action to deal with  \P, \p,
806*22dc650dSSadaf Ebrahimi     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
807*22dc650dSSadaf Ebrahimi     these ones to new opcodes. */
808*22dc650dSSadaf Ebrahimi 
809*22dc650dSSadaf Ebrahimi     if (coptable[codevalue] > 0)
810*22dc650dSSadaf Ebrahimi       {
811*22dc650dSSadaf Ebrahimi       dlen = 1;
812*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
813*22dc650dSSadaf Ebrahimi       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
814*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
815*22dc650dSSadaf Ebrahimi       d = code[coptable[codevalue]];
816*22dc650dSSadaf Ebrahimi       if (codevalue >= OP_TYPESTAR)
817*22dc650dSSadaf Ebrahimi         {
818*22dc650dSSadaf Ebrahimi         switch(d)
819*22dc650dSSadaf Ebrahimi           {
820*22dc650dSSadaf Ebrahimi           case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
821*22dc650dSSadaf Ebrahimi           case OP_NOTPROP:
822*22dc650dSSadaf Ebrahimi           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
823*22dc650dSSadaf Ebrahimi           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
824*22dc650dSSadaf Ebrahimi           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
825*22dc650dSSadaf Ebrahimi           case OP_NOT_HSPACE:
826*22dc650dSSadaf Ebrahimi           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
827*22dc650dSSadaf Ebrahimi           case OP_NOT_VSPACE:
828*22dc650dSSadaf Ebrahimi           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
829*22dc650dSSadaf Ebrahimi           default: break;
830*22dc650dSSadaf Ebrahimi           }
831*22dc650dSSadaf Ebrahimi         }
832*22dc650dSSadaf Ebrahimi       }
833*22dc650dSSadaf Ebrahimi     else
834*22dc650dSSadaf Ebrahimi       {
835*22dc650dSSadaf Ebrahimi       dlen = 0;         /* Not strictly necessary, but compilers moan */
836*22dc650dSSadaf Ebrahimi       d = NOTACHAR;     /* if these variables are not set. */
837*22dc650dSSadaf Ebrahimi       }
838*22dc650dSSadaf Ebrahimi 
839*22dc650dSSadaf Ebrahimi 
840*22dc650dSSadaf Ebrahimi     /* Now process the individual opcodes */
841*22dc650dSSadaf Ebrahimi 
842*22dc650dSSadaf Ebrahimi     switch (codevalue)
843*22dc650dSSadaf Ebrahimi       {
844*22dc650dSSadaf Ebrahimi /* ========================================================================== */
845*22dc650dSSadaf Ebrahimi       /* These cases are never obeyed. This is a fudge that causes a compile-
846*22dc650dSSadaf Ebrahimi       time error if the vectors coptable or poptable, which are indexed by
847*22dc650dSSadaf Ebrahimi       opcode, are not the correct length. It seems to be the only way to do
848*22dc650dSSadaf Ebrahimi       such a check at compile time, as the sizeof() operator does not work
849*22dc650dSSadaf Ebrahimi       in the C preprocessor. */
850*22dc650dSSadaf Ebrahimi 
851*22dc650dSSadaf Ebrahimi       case OP_TABLE_LENGTH:
852*22dc650dSSadaf Ebrahimi       case OP_TABLE_LENGTH +
853*22dc650dSSadaf Ebrahimi         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
854*22dc650dSSadaf Ebrahimi          (sizeof(poptable) == OP_TABLE_LENGTH)):
855*22dc650dSSadaf Ebrahimi       return 0;
856*22dc650dSSadaf Ebrahimi 
857*22dc650dSSadaf Ebrahimi /* ========================================================================== */
858*22dc650dSSadaf Ebrahimi       /* Reached a closing bracket. If not at the end of the pattern, carry
859*22dc650dSSadaf Ebrahimi       on with the next opcode. For repeating opcodes, also add the repeat
860*22dc650dSSadaf Ebrahimi       state. Note that KETRPOS will always be encountered at the end of the
861*22dc650dSSadaf Ebrahimi       subpattern, because the possessive subpattern repeats are always handled
862*22dc650dSSadaf Ebrahimi       using recursive calls. Thus, it never adds any new states.
863*22dc650dSSadaf Ebrahimi 
864*22dc650dSSadaf Ebrahimi       At the end of the (sub)pattern, unless we have an empty string and
865*22dc650dSSadaf Ebrahimi       PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
866*22dc650dSSadaf Ebrahimi       start of the subject, save the match data, shifting up all previous
867*22dc650dSSadaf Ebrahimi       matches so we always have the longest first. */
868*22dc650dSSadaf Ebrahimi 
869*22dc650dSSadaf Ebrahimi       case OP_KET:
870*22dc650dSSadaf Ebrahimi       case OP_KETRMIN:
871*22dc650dSSadaf Ebrahimi       case OP_KETRMAX:
872*22dc650dSSadaf Ebrahimi       case OP_KETRPOS:
873*22dc650dSSadaf Ebrahimi       if (code != end_code)
874*22dc650dSSadaf Ebrahimi         {
875*22dc650dSSadaf Ebrahimi         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
876*22dc650dSSadaf Ebrahimi         if (codevalue != OP_KET)
877*22dc650dSSadaf Ebrahimi           {
878*22dc650dSSadaf Ebrahimi           ADD_ACTIVE(state_offset - (int)GET(code, 1), 0);
879*22dc650dSSadaf Ebrahimi           }
880*22dc650dSSadaf Ebrahimi         }
881*22dc650dSSadaf Ebrahimi       else
882*22dc650dSSadaf Ebrahimi         {
883*22dc650dSSadaf Ebrahimi         if (ptr > current_subject ||
884*22dc650dSSadaf Ebrahimi             ((mb->moptions & PCRE2_NOTEMPTY) == 0 &&
885*22dc650dSSadaf Ebrahimi               ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 ||
886*22dc650dSSadaf Ebrahimi                 current_subject > start_subject + mb->start_offset)))
887*22dc650dSSadaf Ebrahimi           {
888*22dc650dSSadaf Ebrahimi           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
889*22dc650dSSadaf Ebrahimi             else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
890*22dc650dSSadaf Ebrahimi               match_count = 0;
891*22dc650dSSadaf Ebrahimi           count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
892*22dc650dSSadaf Ebrahimi           if (count > 0) (void)memmove(offsets + 2, offsets,
893*22dc650dSSadaf Ebrahimi             (size_t)count * sizeof(PCRE2_SIZE));
894*22dc650dSSadaf Ebrahimi           if (offsetcount >= 2)
895*22dc650dSSadaf Ebrahimi             {
896*22dc650dSSadaf Ebrahimi             offsets[0] = (PCRE2_SIZE)(current_subject - start_subject);
897*22dc650dSSadaf Ebrahimi             offsets[1] = (PCRE2_SIZE)(ptr - start_subject);
898*22dc650dSSadaf Ebrahimi             }
899*22dc650dSSadaf Ebrahimi           if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count;
900*22dc650dSSadaf Ebrahimi           }
901*22dc650dSSadaf Ebrahimi         }
902*22dc650dSSadaf Ebrahimi       break;
903*22dc650dSSadaf Ebrahimi 
904*22dc650dSSadaf Ebrahimi /* ========================================================================== */
905*22dc650dSSadaf Ebrahimi       /* These opcodes add to the current list of states without looking
906*22dc650dSSadaf Ebrahimi       at the current character. */
907*22dc650dSSadaf Ebrahimi 
908*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
909*22dc650dSSadaf Ebrahimi       case OP_ALT:
910*22dc650dSSadaf Ebrahimi       do { code += GET(code, 1); } while (*code == OP_ALT);
911*22dc650dSSadaf Ebrahimi       ADD_ACTIVE((int)(code - start_code), 0);
912*22dc650dSSadaf Ebrahimi       break;
913*22dc650dSSadaf Ebrahimi 
914*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
915*22dc650dSSadaf Ebrahimi       case OP_BRA:
916*22dc650dSSadaf Ebrahimi       case OP_SBRA:
917*22dc650dSSadaf Ebrahimi       do
918*22dc650dSSadaf Ebrahimi         {
919*22dc650dSSadaf Ebrahimi         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
920*22dc650dSSadaf Ebrahimi         code += GET(code, 1);
921*22dc650dSSadaf Ebrahimi         }
922*22dc650dSSadaf Ebrahimi       while (*code == OP_ALT);
923*22dc650dSSadaf Ebrahimi       break;
924*22dc650dSSadaf Ebrahimi 
925*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
926*22dc650dSSadaf Ebrahimi       case OP_CBRA:
927*22dc650dSSadaf Ebrahimi       case OP_SCBRA:
928*22dc650dSSadaf Ebrahimi       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
929*22dc650dSSadaf Ebrahimi       code += GET(code, 1);
930*22dc650dSSadaf Ebrahimi       while (*code == OP_ALT)
931*22dc650dSSadaf Ebrahimi         {
932*22dc650dSSadaf Ebrahimi         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
933*22dc650dSSadaf Ebrahimi         code += GET(code, 1);
934*22dc650dSSadaf Ebrahimi         }
935*22dc650dSSadaf Ebrahimi       break;
936*22dc650dSSadaf Ebrahimi 
937*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
938*22dc650dSSadaf Ebrahimi       case OP_BRAZERO:
939*22dc650dSSadaf Ebrahimi       case OP_BRAMINZERO:
940*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 1, 0);
941*22dc650dSSadaf Ebrahimi       code += 1 + GET(code, 2);
942*22dc650dSSadaf Ebrahimi       while (*code == OP_ALT) code += GET(code, 1);
943*22dc650dSSadaf Ebrahimi       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
944*22dc650dSSadaf Ebrahimi       break;
945*22dc650dSSadaf Ebrahimi 
946*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
947*22dc650dSSadaf Ebrahimi       case OP_SKIPZERO:
948*22dc650dSSadaf Ebrahimi       code += 1 + GET(code, 2);
949*22dc650dSSadaf Ebrahimi       while (*code == OP_ALT) code += GET(code, 1);
950*22dc650dSSadaf Ebrahimi       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
951*22dc650dSSadaf Ebrahimi       break;
952*22dc650dSSadaf Ebrahimi 
953*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
954*22dc650dSSadaf Ebrahimi       case OP_CIRC:
955*22dc650dSSadaf Ebrahimi       if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0)
956*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 1, 0); }
957*22dc650dSSadaf Ebrahimi       break;
958*22dc650dSSadaf Ebrahimi 
959*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
960*22dc650dSSadaf Ebrahimi       case OP_CIRCM:
961*22dc650dSSadaf Ebrahimi       if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) ||
962*22dc650dSSadaf Ebrahimi           ((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 )
963*22dc650dSSadaf Ebrahimi             && WAS_NEWLINE(ptr)))
964*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 1, 0); }
965*22dc650dSSadaf Ebrahimi       break;
966*22dc650dSSadaf Ebrahimi 
967*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
968*22dc650dSSadaf Ebrahimi       case OP_EOD:
969*22dc650dSSadaf Ebrahimi       if (ptr >= end_subject)
970*22dc650dSSadaf Ebrahimi         {
971*22dc650dSSadaf Ebrahimi         if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
972*22dc650dSSadaf Ebrahimi           return PCRE2_ERROR_PARTIAL;
973*22dc650dSSadaf Ebrahimi         else { ADD_ACTIVE(state_offset + 1, 0); }
974*22dc650dSSadaf Ebrahimi         }
975*22dc650dSSadaf Ebrahimi       break;
976*22dc650dSSadaf Ebrahimi 
977*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
978*22dc650dSSadaf Ebrahimi       case OP_SOD:
979*22dc650dSSadaf Ebrahimi       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
980*22dc650dSSadaf Ebrahimi       break;
981*22dc650dSSadaf Ebrahimi 
982*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
983*22dc650dSSadaf Ebrahimi       case OP_SOM:
984*22dc650dSSadaf Ebrahimi       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
985*22dc650dSSadaf Ebrahimi       break;
986*22dc650dSSadaf Ebrahimi 
987*22dc650dSSadaf Ebrahimi 
988*22dc650dSSadaf Ebrahimi /* ========================================================================== */
989*22dc650dSSadaf Ebrahimi       /* These opcodes inspect the next subject character, and sometimes
990*22dc650dSSadaf Ebrahimi       the previous one as well, but do not have an argument. The variable
991*22dc650dSSadaf Ebrahimi       clen contains the length of the current character and is zero if we are
992*22dc650dSSadaf Ebrahimi       at the end of the subject. */
993*22dc650dSSadaf Ebrahimi 
994*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
995*22dc650dSSadaf Ebrahimi       case OP_ANY:
996*22dc650dSSadaf Ebrahimi       if (clen > 0 && !IS_NEWLINE(ptr))
997*22dc650dSSadaf Ebrahimi         {
998*22dc650dSSadaf Ebrahimi         if (ptr + 1 >= mb->end_subject &&
999*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1000*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1001*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1002*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1003*22dc650dSSadaf Ebrahimi           {
1004*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1005*22dc650dSSadaf Ebrahimi           }
1006*22dc650dSSadaf Ebrahimi         else
1007*22dc650dSSadaf Ebrahimi           {
1008*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + 1, 0);
1009*22dc650dSSadaf Ebrahimi           }
1010*22dc650dSSadaf Ebrahimi         }
1011*22dc650dSSadaf Ebrahimi       break;
1012*22dc650dSSadaf Ebrahimi 
1013*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1014*22dc650dSSadaf Ebrahimi       case OP_ALLANY:
1015*22dc650dSSadaf Ebrahimi       if (clen > 0)
1016*22dc650dSSadaf Ebrahimi         { ADD_NEW(state_offset + 1, 0); }
1017*22dc650dSSadaf Ebrahimi       break;
1018*22dc650dSSadaf Ebrahimi 
1019*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1020*22dc650dSSadaf Ebrahimi       case OP_EODN:
1021*22dc650dSSadaf Ebrahimi       if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
1022*22dc650dSSadaf Ebrahimi         {
1023*22dc650dSSadaf Ebrahimi         if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
1024*22dc650dSSadaf Ebrahimi           return PCRE2_ERROR_PARTIAL;
1025*22dc650dSSadaf Ebrahimi         ADD_ACTIVE(state_offset + 1, 0);
1026*22dc650dSSadaf Ebrahimi         }
1027*22dc650dSSadaf Ebrahimi       break;
1028*22dc650dSSadaf Ebrahimi 
1029*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1030*22dc650dSSadaf Ebrahimi       case OP_DOLL:
1031*22dc650dSSadaf Ebrahimi       if ((mb->moptions & PCRE2_NOTEOL) == 0)
1032*22dc650dSSadaf Ebrahimi         {
1033*22dc650dSSadaf Ebrahimi         if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
1034*22dc650dSSadaf Ebrahimi           could_continue = TRUE;
1035*22dc650dSSadaf Ebrahimi         else if (clen == 0 ||
1036*22dc650dSSadaf Ebrahimi             ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
1037*22dc650dSSadaf Ebrahimi                (ptr == end_subject - mb->nllen)
1038*22dc650dSSadaf Ebrahimi             ))
1039*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + 1, 0); }
1040*22dc650dSSadaf Ebrahimi         else if (ptr + 1 >= mb->end_subject &&
1041*22dc650dSSadaf Ebrahimi                  (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
1042*22dc650dSSadaf Ebrahimi                  NLBLOCK->nltype == NLTYPE_FIXED &&
1043*22dc650dSSadaf Ebrahimi                  NLBLOCK->nllen == 2 &&
1044*22dc650dSSadaf Ebrahimi                  c == NLBLOCK->nl[0])
1045*22dc650dSSadaf Ebrahimi           {
1046*22dc650dSSadaf Ebrahimi           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
1047*22dc650dSSadaf Ebrahimi             {
1048*22dc650dSSadaf Ebrahimi             reset_could_continue = TRUE;
1049*22dc650dSSadaf Ebrahimi             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1050*22dc650dSSadaf Ebrahimi             }
1051*22dc650dSSadaf Ebrahimi           else could_continue = partial_newline = TRUE;
1052*22dc650dSSadaf Ebrahimi           }
1053*22dc650dSSadaf Ebrahimi         }
1054*22dc650dSSadaf Ebrahimi       break;
1055*22dc650dSSadaf Ebrahimi 
1056*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1057*22dc650dSSadaf Ebrahimi       case OP_DOLLM:
1058*22dc650dSSadaf Ebrahimi       if ((mb->moptions & PCRE2_NOTEOL) == 0)
1059*22dc650dSSadaf Ebrahimi         {
1060*22dc650dSSadaf Ebrahimi         if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
1061*22dc650dSSadaf Ebrahimi           could_continue = TRUE;
1062*22dc650dSSadaf Ebrahimi         else if (clen == 0 ||
1063*22dc650dSSadaf Ebrahimi             ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
1064*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + 1, 0); }
1065*22dc650dSSadaf Ebrahimi         else if (ptr + 1 >= mb->end_subject &&
1066*22dc650dSSadaf Ebrahimi                  (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
1067*22dc650dSSadaf Ebrahimi                  NLBLOCK->nltype == NLTYPE_FIXED &&
1068*22dc650dSSadaf Ebrahimi                  NLBLOCK->nllen == 2 &&
1069*22dc650dSSadaf Ebrahimi                  c == NLBLOCK->nl[0])
1070*22dc650dSSadaf Ebrahimi           {
1071*22dc650dSSadaf Ebrahimi           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
1072*22dc650dSSadaf Ebrahimi             {
1073*22dc650dSSadaf Ebrahimi             reset_could_continue = TRUE;
1074*22dc650dSSadaf Ebrahimi             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1075*22dc650dSSadaf Ebrahimi             }
1076*22dc650dSSadaf Ebrahimi           else could_continue = partial_newline = TRUE;
1077*22dc650dSSadaf Ebrahimi           }
1078*22dc650dSSadaf Ebrahimi         }
1079*22dc650dSSadaf Ebrahimi       else if (IS_NEWLINE(ptr))
1080*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 1, 0); }
1081*22dc650dSSadaf Ebrahimi       break;
1082*22dc650dSSadaf Ebrahimi 
1083*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1084*22dc650dSSadaf Ebrahimi 
1085*22dc650dSSadaf Ebrahimi       case OP_DIGIT:
1086*22dc650dSSadaf Ebrahimi       case OP_WHITESPACE:
1087*22dc650dSSadaf Ebrahimi       case OP_WORDCHAR:
1088*22dc650dSSadaf Ebrahimi       if (clen > 0 && c < 256 &&
1089*22dc650dSSadaf Ebrahimi             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
1090*22dc650dSSadaf Ebrahimi         { ADD_NEW(state_offset + 1, 0); }
1091*22dc650dSSadaf Ebrahimi       break;
1092*22dc650dSSadaf Ebrahimi 
1093*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1094*22dc650dSSadaf Ebrahimi       case OP_NOT_DIGIT:
1095*22dc650dSSadaf Ebrahimi       case OP_NOT_WHITESPACE:
1096*22dc650dSSadaf Ebrahimi       case OP_NOT_WORDCHAR:
1097*22dc650dSSadaf Ebrahimi       if (clen > 0 && (c >= 256 ||
1098*22dc650dSSadaf Ebrahimi             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
1099*22dc650dSSadaf Ebrahimi         { ADD_NEW(state_offset + 1, 0); }
1100*22dc650dSSadaf Ebrahimi       break;
1101*22dc650dSSadaf Ebrahimi 
1102*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1103*22dc650dSSadaf Ebrahimi       case OP_WORD_BOUNDARY:
1104*22dc650dSSadaf Ebrahimi       case OP_NOT_WORD_BOUNDARY:
1105*22dc650dSSadaf Ebrahimi       case OP_NOT_UCP_WORD_BOUNDARY:
1106*22dc650dSSadaf Ebrahimi       case OP_UCP_WORD_BOUNDARY:
1107*22dc650dSSadaf Ebrahimi         {
1108*22dc650dSSadaf Ebrahimi         int left_word, right_word;
1109*22dc650dSSadaf Ebrahimi 
1110*22dc650dSSadaf Ebrahimi         if (ptr > start_subject)
1111*22dc650dSSadaf Ebrahimi           {
1112*22dc650dSSadaf Ebrahimi           PCRE2_SPTR temp = ptr - 1;
1113*22dc650dSSadaf Ebrahimi           if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
1114*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1115*22dc650dSSadaf Ebrahimi           if (utf) { BACKCHAR(temp); }
1116*22dc650dSSadaf Ebrahimi #endif
1117*22dc650dSSadaf Ebrahimi           GETCHARTEST(d, temp);
1118*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1119*22dc650dSSadaf Ebrahimi           if (codevalue == OP_UCP_WORD_BOUNDARY ||
1120*22dc650dSSadaf Ebrahimi               codevalue == OP_NOT_UCP_WORD_BOUNDARY)
1121*22dc650dSSadaf Ebrahimi             {
1122*22dc650dSSadaf Ebrahimi             int chartype = UCD_CHARTYPE(d);
1123*22dc650dSSadaf Ebrahimi             int category = PRIV(ucp_gentype)[chartype];
1124*22dc650dSSadaf Ebrahimi             left_word = (category == ucp_L || category == ucp_N ||
1125*22dc650dSSadaf Ebrahimi               chartype == ucp_Mn || chartype == ucp_Pc);
1126*22dc650dSSadaf Ebrahimi             }
1127*22dc650dSSadaf Ebrahimi           else
1128*22dc650dSSadaf Ebrahimi #endif
1129*22dc650dSSadaf Ebrahimi           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1130*22dc650dSSadaf Ebrahimi           }
1131*22dc650dSSadaf Ebrahimi         else left_word = FALSE;
1132*22dc650dSSadaf Ebrahimi 
1133*22dc650dSSadaf Ebrahimi         if (clen > 0)
1134*22dc650dSSadaf Ebrahimi           {
1135*22dc650dSSadaf Ebrahimi           if (ptr >= mb->last_used_ptr)
1136*22dc650dSSadaf Ebrahimi             {
1137*22dc650dSSadaf Ebrahimi             PCRE2_SPTR temp = ptr + 1;
1138*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1139*22dc650dSSadaf Ebrahimi             if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
1140*22dc650dSSadaf Ebrahimi #endif
1141*22dc650dSSadaf Ebrahimi             mb->last_used_ptr = temp;
1142*22dc650dSSadaf Ebrahimi             }
1143*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1144*22dc650dSSadaf Ebrahimi           if (codevalue == OP_UCP_WORD_BOUNDARY ||
1145*22dc650dSSadaf Ebrahimi               codevalue == OP_NOT_UCP_WORD_BOUNDARY)
1146*22dc650dSSadaf Ebrahimi             {
1147*22dc650dSSadaf Ebrahimi             int chartype = UCD_CHARTYPE(c);
1148*22dc650dSSadaf Ebrahimi             int category = PRIV(ucp_gentype)[chartype];
1149*22dc650dSSadaf Ebrahimi             right_word = (category == ucp_L || category == ucp_N ||
1150*22dc650dSSadaf Ebrahimi               chartype == ucp_Mn || chartype == ucp_Pc);
1151*22dc650dSSadaf Ebrahimi             }
1152*22dc650dSSadaf Ebrahimi           else
1153*22dc650dSSadaf Ebrahimi #endif
1154*22dc650dSSadaf Ebrahimi           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1155*22dc650dSSadaf Ebrahimi           }
1156*22dc650dSSadaf Ebrahimi         else right_word = FALSE;
1157*22dc650dSSadaf Ebrahimi 
1158*22dc650dSSadaf Ebrahimi         if ((left_word == right_word) ==
1159*22dc650dSSadaf Ebrahimi             (codevalue == OP_NOT_WORD_BOUNDARY ||
1160*22dc650dSSadaf Ebrahimi              codevalue == OP_NOT_UCP_WORD_BOUNDARY))
1161*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + 1, 0); }
1162*22dc650dSSadaf Ebrahimi         }
1163*22dc650dSSadaf Ebrahimi       break;
1164*22dc650dSSadaf Ebrahimi 
1165*22dc650dSSadaf Ebrahimi 
1166*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1167*22dc650dSSadaf Ebrahimi       /* Check the next character by Unicode property. We will get here only
1168*22dc650dSSadaf Ebrahimi       if the support is in the binary; otherwise a compile-time error occurs.
1169*22dc650dSSadaf Ebrahimi       */
1170*22dc650dSSadaf Ebrahimi 
1171*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1172*22dc650dSSadaf Ebrahimi       case OP_PROP:
1173*22dc650dSSadaf Ebrahimi       case OP_NOTPROP:
1174*22dc650dSSadaf Ebrahimi       if (clen > 0)
1175*22dc650dSSadaf Ebrahimi         {
1176*22dc650dSSadaf Ebrahimi         BOOL OK;
1177*22dc650dSSadaf Ebrahimi         int chartype;
1178*22dc650dSSadaf Ebrahimi         const uint32_t *cp;
1179*22dc650dSSadaf Ebrahimi         const ucd_record * prop = GET_UCD(c);
1180*22dc650dSSadaf Ebrahimi         switch(code[1])
1181*22dc650dSSadaf Ebrahimi           {
1182*22dc650dSSadaf Ebrahimi           case PT_ANY:
1183*22dc650dSSadaf Ebrahimi           OK = TRUE;
1184*22dc650dSSadaf Ebrahimi           break;
1185*22dc650dSSadaf Ebrahimi 
1186*22dc650dSSadaf Ebrahimi           case PT_LAMP:
1187*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1188*22dc650dSSadaf Ebrahimi           OK = chartype == ucp_Lu || chartype == ucp_Ll ||
1189*22dc650dSSadaf Ebrahimi                chartype == ucp_Lt;
1190*22dc650dSSadaf Ebrahimi           break;
1191*22dc650dSSadaf Ebrahimi 
1192*22dc650dSSadaf Ebrahimi           case PT_GC:
1193*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1194*22dc650dSSadaf Ebrahimi           break;
1195*22dc650dSSadaf Ebrahimi 
1196*22dc650dSSadaf Ebrahimi           case PT_PC:
1197*22dc650dSSadaf Ebrahimi           OK = prop->chartype == code[2];
1198*22dc650dSSadaf Ebrahimi           break;
1199*22dc650dSSadaf Ebrahimi 
1200*22dc650dSSadaf Ebrahimi           case PT_SC:
1201*22dc650dSSadaf Ebrahimi           OK = prop->script == code[2];
1202*22dc650dSSadaf Ebrahimi           break;
1203*22dc650dSSadaf Ebrahimi 
1204*22dc650dSSadaf Ebrahimi           case PT_SCX:
1205*22dc650dSSadaf Ebrahimi           OK = (prop->script == code[2] ||
1206*22dc650dSSadaf Ebrahimi                 MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
1207*22dc650dSSadaf Ebrahimi           break;
1208*22dc650dSSadaf Ebrahimi 
1209*22dc650dSSadaf Ebrahimi           /* These are specials for combination cases. */
1210*22dc650dSSadaf Ebrahimi 
1211*22dc650dSSadaf Ebrahimi           case PT_ALNUM:
1212*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1213*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1214*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N;
1215*22dc650dSSadaf Ebrahimi           break;
1216*22dc650dSSadaf Ebrahimi 
1217*22dc650dSSadaf Ebrahimi           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1218*22dc650dSSadaf Ebrahimi           which means that Perl space and POSIX space are now identical. PCRE
1219*22dc650dSSadaf Ebrahimi           was changed at release 8.34. */
1220*22dc650dSSadaf Ebrahimi 
1221*22dc650dSSadaf Ebrahimi           case PT_SPACE:    /* Perl space */
1222*22dc650dSSadaf Ebrahimi           case PT_PXSPACE:  /* POSIX space */
1223*22dc650dSSadaf Ebrahimi           switch(c)
1224*22dc650dSSadaf Ebrahimi             {
1225*22dc650dSSadaf Ebrahimi             HSPACE_CASES:
1226*22dc650dSSadaf Ebrahimi             VSPACE_CASES:
1227*22dc650dSSadaf Ebrahimi             OK = TRUE;
1228*22dc650dSSadaf Ebrahimi             break;
1229*22dc650dSSadaf Ebrahimi 
1230*22dc650dSSadaf Ebrahimi             default:
1231*22dc650dSSadaf Ebrahimi             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1232*22dc650dSSadaf Ebrahimi             break;
1233*22dc650dSSadaf Ebrahimi             }
1234*22dc650dSSadaf Ebrahimi           break;
1235*22dc650dSSadaf Ebrahimi 
1236*22dc650dSSadaf Ebrahimi           case PT_WORD:
1237*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1238*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1239*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N ||
1240*22dc650dSSadaf Ebrahimi                chartype == ucp_Mn || chartype == ucp_Pc;
1241*22dc650dSSadaf Ebrahimi           break;
1242*22dc650dSSadaf Ebrahimi 
1243*22dc650dSSadaf Ebrahimi           case PT_CLIST:
1244*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
1245*22dc650dSSadaf Ebrahimi           if (c > MAX_UTF_CODE_POINT)
1246*22dc650dSSadaf Ebrahimi             {
1247*22dc650dSSadaf Ebrahimi             OK = FALSE;
1248*22dc650dSSadaf Ebrahimi             break;
1249*22dc650dSSadaf Ebrahimi             }
1250*22dc650dSSadaf Ebrahimi #endif
1251*22dc650dSSadaf Ebrahimi           cp = PRIV(ucd_caseless_sets) + code[2];
1252*22dc650dSSadaf Ebrahimi           for (;;)
1253*22dc650dSSadaf Ebrahimi             {
1254*22dc650dSSadaf Ebrahimi             if (c < *cp) { OK = FALSE; break; }
1255*22dc650dSSadaf Ebrahimi             if (c == *cp++) { OK = TRUE; break; }
1256*22dc650dSSadaf Ebrahimi             }
1257*22dc650dSSadaf Ebrahimi           break;
1258*22dc650dSSadaf Ebrahimi 
1259*22dc650dSSadaf Ebrahimi           case PT_UCNC:
1260*22dc650dSSadaf Ebrahimi           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1261*22dc650dSSadaf Ebrahimi                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1262*22dc650dSSadaf Ebrahimi                c >= 0xe000;
1263*22dc650dSSadaf Ebrahimi           break;
1264*22dc650dSSadaf Ebrahimi 
1265*22dc650dSSadaf Ebrahimi           case PT_BIDICL:
1266*22dc650dSSadaf Ebrahimi           OK = UCD_BIDICLASS(c) == code[2];
1267*22dc650dSSadaf Ebrahimi           break;
1268*22dc650dSSadaf Ebrahimi 
1269*22dc650dSSadaf Ebrahimi           case PT_BOOL:
1270*22dc650dSSadaf Ebrahimi           OK = MAPBIT(PRIV(ucd_boolprop_sets) +
1271*22dc650dSSadaf Ebrahimi             UCD_BPROPS_PROP(prop), code[2]) != 0;
1272*22dc650dSSadaf Ebrahimi           break;
1273*22dc650dSSadaf Ebrahimi 
1274*22dc650dSSadaf Ebrahimi           /* Should never occur, but keep compilers from grumbling. */
1275*22dc650dSSadaf Ebrahimi 
1276*22dc650dSSadaf Ebrahimi           default:
1277*22dc650dSSadaf Ebrahimi           OK = codevalue != OP_PROP;
1278*22dc650dSSadaf Ebrahimi           break;
1279*22dc650dSSadaf Ebrahimi           }
1280*22dc650dSSadaf Ebrahimi 
1281*22dc650dSSadaf Ebrahimi         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1282*22dc650dSSadaf Ebrahimi         }
1283*22dc650dSSadaf Ebrahimi       break;
1284*22dc650dSSadaf Ebrahimi #endif
1285*22dc650dSSadaf Ebrahimi 
1286*22dc650dSSadaf Ebrahimi 
1287*22dc650dSSadaf Ebrahimi 
1288*22dc650dSSadaf Ebrahimi /* ========================================================================== */
1289*22dc650dSSadaf Ebrahimi       /* These opcodes likewise inspect the subject character, but have an
1290*22dc650dSSadaf Ebrahimi       argument that is not a data character. It is one of these opcodes:
1291*22dc650dSSadaf Ebrahimi       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1292*22dc650dSSadaf Ebrahimi       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1293*22dc650dSSadaf Ebrahimi 
1294*22dc650dSSadaf Ebrahimi       case OP_TYPEPLUS:
1295*22dc650dSSadaf Ebrahimi       case OP_TYPEMINPLUS:
1296*22dc650dSSadaf Ebrahimi       case OP_TYPEPOSPLUS:
1297*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
1298*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1299*22dc650dSSadaf Ebrahimi       if (clen > 0)
1300*22dc650dSSadaf Ebrahimi         {
1301*22dc650dSSadaf Ebrahimi         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
1302*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1303*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1304*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1305*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1306*22dc650dSSadaf Ebrahimi           {
1307*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1308*22dc650dSSadaf Ebrahimi           }
1309*22dc650dSSadaf Ebrahimi         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1310*22dc650dSSadaf Ebrahimi             (c < 256 &&
1311*22dc650dSSadaf Ebrahimi               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1312*22dc650dSSadaf Ebrahimi               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1313*22dc650dSSadaf Ebrahimi           {
1314*22dc650dSSadaf Ebrahimi           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1315*22dc650dSSadaf Ebrahimi             {
1316*22dc650dSSadaf Ebrahimi             active_count--;            /* Remove non-match possibility */
1317*22dc650dSSadaf Ebrahimi             next_active_state--;
1318*22dc650dSSadaf Ebrahimi             }
1319*22dc650dSSadaf Ebrahimi           count++;
1320*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset, count);
1321*22dc650dSSadaf Ebrahimi           }
1322*22dc650dSSadaf Ebrahimi         }
1323*22dc650dSSadaf Ebrahimi       break;
1324*22dc650dSSadaf Ebrahimi 
1325*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1326*22dc650dSSadaf Ebrahimi       case OP_TYPEQUERY:
1327*22dc650dSSadaf Ebrahimi       case OP_TYPEMINQUERY:
1328*22dc650dSSadaf Ebrahimi       case OP_TYPEPOSQUERY:
1329*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1330*22dc650dSSadaf Ebrahimi       if (clen > 0)
1331*22dc650dSSadaf Ebrahimi         {
1332*22dc650dSSadaf Ebrahimi         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
1333*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1334*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1335*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1336*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1337*22dc650dSSadaf Ebrahimi           {
1338*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1339*22dc650dSSadaf Ebrahimi           }
1340*22dc650dSSadaf Ebrahimi         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1341*22dc650dSSadaf Ebrahimi             (c < 256 &&
1342*22dc650dSSadaf Ebrahimi               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1343*22dc650dSSadaf Ebrahimi               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1344*22dc650dSSadaf Ebrahimi           {
1345*22dc650dSSadaf Ebrahimi           if (codevalue == OP_TYPEPOSQUERY)
1346*22dc650dSSadaf Ebrahimi             {
1347*22dc650dSSadaf Ebrahimi             active_count--;            /* Remove non-match possibility */
1348*22dc650dSSadaf Ebrahimi             next_active_state--;
1349*22dc650dSSadaf Ebrahimi             }
1350*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + 2, 0);
1351*22dc650dSSadaf Ebrahimi           }
1352*22dc650dSSadaf Ebrahimi         }
1353*22dc650dSSadaf Ebrahimi       break;
1354*22dc650dSSadaf Ebrahimi 
1355*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1356*22dc650dSSadaf Ebrahimi       case OP_TYPESTAR:
1357*22dc650dSSadaf Ebrahimi       case OP_TYPEMINSTAR:
1358*22dc650dSSadaf Ebrahimi       case OP_TYPEPOSSTAR:
1359*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1360*22dc650dSSadaf Ebrahimi       if (clen > 0)
1361*22dc650dSSadaf Ebrahimi         {
1362*22dc650dSSadaf Ebrahimi         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
1363*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1364*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1365*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1366*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1367*22dc650dSSadaf Ebrahimi           {
1368*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1369*22dc650dSSadaf Ebrahimi           }
1370*22dc650dSSadaf Ebrahimi         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1371*22dc650dSSadaf Ebrahimi             (c < 256 &&
1372*22dc650dSSadaf Ebrahimi               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1373*22dc650dSSadaf Ebrahimi               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1374*22dc650dSSadaf Ebrahimi           {
1375*22dc650dSSadaf Ebrahimi           if (codevalue == OP_TYPEPOSSTAR)
1376*22dc650dSSadaf Ebrahimi             {
1377*22dc650dSSadaf Ebrahimi             active_count--;            /* Remove non-match possibility */
1378*22dc650dSSadaf Ebrahimi             next_active_state--;
1379*22dc650dSSadaf Ebrahimi             }
1380*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset, 0);
1381*22dc650dSSadaf Ebrahimi           }
1382*22dc650dSSadaf Ebrahimi         }
1383*22dc650dSSadaf Ebrahimi       break;
1384*22dc650dSSadaf Ebrahimi 
1385*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1386*22dc650dSSadaf Ebrahimi       case OP_TYPEEXACT:
1387*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
1388*22dc650dSSadaf Ebrahimi       if (clen > 0)
1389*22dc650dSSadaf Ebrahimi         {
1390*22dc650dSSadaf Ebrahimi         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
1391*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1392*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1393*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1394*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1395*22dc650dSSadaf Ebrahimi           {
1396*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1397*22dc650dSSadaf Ebrahimi           }
1398*22dc650dSSadaf Ebrahimi         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1399*22dc650dSSadaf Ebrahimi             (c < 256 &&
1400*22dc650dSSadaf Ebrahimi               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1401*22dc650dSSadaf Ebrahimi               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1402*22dc650dSSadaf Ebrahimi           {
1403*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
1404*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1405*22dc650dSSadaf Ebrahimi           else
1406*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset, count); }
1407*22dc650dSSadaf Ebrahimi           }
1408*22dc650dSSadaf Ebrahimi         }
1409*22dc650dSSadaf Ebrahimi       break;
1410*22dc650dSSadaf Ebrahimi 
1411*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1412*22dc650dSSadaf Ebrahimi       case OP_TYPEUPTO:
1413*22dc650dSSadaf Ebrahimi       case OP_TYPEMINUPTO:
1414*22dc650dSSadaf Ebrahimi       case OP_TYPEPOSUPTO:
1415*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1416*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
1417*22dc650dSSadaf Ebrahimi       if (clen > 0)
1418*22dc650dSSadaf Ebrahimi         {
1419*22dc650dSSadaf Ebrahimi         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
1420*22dc650dSSadaf Ebrahimi             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
1421*22dc650dSSadaf Ebrahimi             NLBLOCK->nltype == NLTYPE_FIXED &&
1422*22dc650dSSadaf Ebrahimi             NLBLOCK->nllen == 2 &&
1423*22dc650dSSadaf Ebrahimi             c == NLBLOCK->nl[0])
1424*22dc650dSSadaf Ebrahimi           {
1425*22dc650dSSadaf Ebrahimi           could_continue = partial_newline = TRUE;
1426*22dc650dSSadaf Ebrahimi           }
1427*22dc650dSSadaf Ebrahimi         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1428*22dc650dSSadaf Ebrahimi             (c < 256 &&
1429*22dc650dSSadaf Ebrahimi               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1430*22dc650dSSadaf Ebrahimi               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1431*22dc650dSSadaf Ebrahimi           {
1432*22dc650dSSadaf Ebrahimi           if (codevalue == OP_TYPEPOSUPTO)
1433*22dc650dSSadaf Ebrahimi             {
1434*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1435*22dc650dSSadaf Ebrahimi             next_active_state--;
1436*22dc650dSSadaf Ebrahimi             }
1437*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
1438*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1439*22dc650dSSadaf Ebrahimi           else
1440*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset, count); }
1441*22dc650dSSadaf Ebrahimi           }
1442*22dc650dSSadaf Ebrahimi         }
1443*22dc650dSSadaf Ebrahimi       break;
1444*22dc650dSSadaf Ebrahimi 
1445*22dc650dSSadaf Ebrahimi /* ========================================================================== */
1446*22dc650dSSadaf Ebrahimi       /* These are virtual opcodes that are used when something like
1447*22dc650dSSadaf Ebrahimi       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1448*22dc650dSSadaf Ebrahimi       argument. It keeps the code above fast for the other cases. The argument
1449*22dc650dSSadaf Ebrahimi       is in the d variable. */
1450*22dc650dSSadaf Ebrahimi 
1451*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1452*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEPLUS:
1453*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1454*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1455*22dc650dSSadaf Ebrahimi       count = current_state->count;           /* Already matched */
1456*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1457*22dc650dSSadaf Ebrahimi       if (clen > 0)
1458*22dc650dSSadaf Ebrahimi         {
1459*22dc650dSSadaf Ebrahimi         BOOL OK;
1460*22dc650dSSadaf Ebrahimi         int chartype;
1461*22dc650dSSadaf Ebrahimi         const uint32_t *cp;
1462*22dc650dSSadaf Ebrahimi         const ucd_record * prop = GET_UCD(c);
1463*22dc650dSSadaf Ebrahimi         switch(code[2])
1464*22dc650dSSadaf Ebrahimi           {
1465*22dc650dSSadaf Ebrahimi           case PT_ANY:
1466*22dc650dSSadaf Ebrahimi           OK = TRUE;
1467*22dc650dSSadaf Ebrahimi           break;
1468*22dc650dSSadaf Ebrahimi 
1469*22dc650dSSadaf Ebrahimi           case PT_LAMP:
1470*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1471*22dc650dSSadaf Ebrahimi           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1472*22dc650dSSadaf Ebrahimi           break;
1473*22dc650dSSadaf Ebrahimi 
1474*22dc650dSSadaf Ebrahimi           case PT_GC:
1475*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1476*22dc650dSSadaf Ebrahimi           break;
1477*22dc650dSSadaf Ebrahimi 
1478*22dc650dSSadaf Ebrahimi           case PT_PC:
1479*22dc650dSSadaf Ebrahimi           OK = prop->chartype == code[3];
1480*22dc650dSSadaf Ebrahimi           break;
1481*22dc650dSSadaf Ebrahimi 
1482*22dc650dSSadaf Ebrahimi           case PT_SC:
1483*22dc650dSSadaf Ebrahimi           OK = prop->script == code[3];
1484*22dc650dSSadaf Ebrahimi           break;
1485*22dc650dSSadaf Ebrahimi 
1486*22dc650dSSadaf Ebrahimi           case PT_SCX:
1487*22dc650dSSadaf Ebrahimi           OK = (prop->script == code[3] ||
1488*22dc650dSSadaf Ebrahimi                 MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
1489*22dc650dSSadaf Ebrahimi           break;
1490*22dc650dSSadaf Ebrahimi 
1491*22dc650dSSadaf Ebrahimi           /* These are specials for combination cases. */
1492*22dc650dSSadaf Ebrahimi 
1493*22dc650dSSadaf Ebrahimi           case PT_ALNUM:
1494*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1495*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1496*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N;
1497*22dc650dSSadaf Ebrahimi           break;
1498*22dc650dSSadaf Ebrahimi 
1499*22dc650dSSadaf Ebrahimi           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1500*22dc650dSSadaf Ebrahimi           which means that Perl space and POSIX space are now identical. PCRE
1501*22dc650dSSadaf Ebrahimi           was changed at release 8.34. */
1502*22dc650dSSadaf Ebrahimi 
1503*22dc650dSSadaf Ebrahimi           case PT_SPACE:    /* Perl space */
1504*22dc650dSSadaf Ebrahimi           case PT_PXSPACE:  /* POSIX space */
1505*22dc650dSSadaf Ebrahimi           switch(c)
1506*22dc650dSSadaf Ebrahimi             {
1507*22dc650dSSadaf Ebrahimi             HSPACE_CASES:
1508*22dc650dSSadaf Ebrahimi             VSPACE_CASES:
1509*22dc650dSSadaf Ebrahimi             OK = TRUE;
1510*22dc650dSSadaf Ebrahimi             break;
1511*22dc650dSSadaf Ebrahimi 
1512*22dc650dSSadaf Ebrahimi             default:
1513*22dc650dSSadaf Ebrahimi             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1514*22dc650dSSadaf Ebrahimi             break;
1515*22dc650dSSadaf Ebrahimi             }
1516*22dc650dSSadaf Ebrahimi           break;
1517*22dc650dSSadaf Ebrahimi 
1518*22dc650dSSadaf Ebrahimi           case PT_WORD:
1519*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1520*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1521*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N ||
1522*22dc650dSSadaf Ebrahimi                chartype == ucp_Mn || chartype == ucp_Pc;
1523*22dc650dSSadaf Ebrahimi           break;
1524*22dc650dSSadaf Ebrahimi 
1525*22dc650dSSadaf Ebrahimi           case PT_CLIST:
1526*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
1527*22dc650dSSadaf Ebrahimi           if (c > MAX_UTF_CODE_POINT)
1528*22dc650dSSadaf Ebrahimi             {
1529*22dc650dSSadaf Ebrahimi             OK = FALSE;
1530*22dc650dSSadaf Ebrahimi             break;
1531*22dc650dSSadaf Ebrahimi             }
1532*22dc650dSSadaf Ebrahimi #endif
1533*22dc650dSSadaf Ebrahimi           cp = PRIV(ucd_caseless_sets) + code[3];
1534*22dc650dSSadaf Ebrahimi           for (;;)
1535*22dc650dSSadaf Ebrahimi             {
1536*22dc650dSSadaf Ebrahimi             if (c < *cp) { OK = FALSE; break; }
1537*22dc650dSSadaf Ebrahimi             if (c == *cp++) { OK = TRUE; break; }
1538*22dc650dSSadaf Ebrahimi             }
1539*22dc650dSSadaf Ebrahimi           break;
1540*22dc650dSSadaf Ebrahimi 
1541*22dc650dSSadaf Ebrahimi           case PT_UCNC:
1542*22dc650dSSadaf Ebrahimi           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1543*22dc650dSSadaf Ebrahimi                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1544*22dc650dSSadaf Ebrahimi                c >= 0xe000;
1545*22dc650dSSadaf Ebrahimi           break;
1546*22dc650dSSadaf Ebrahimi 
1547*22dc650dSSadaf Ebrahimi           case PT_BIDICL:
1548*22dc650dSSadaf Ebrahimi           OK = UCD_BIDICLASS(c) == code[3];
1549*22dc650dSSadaf Ebrahimi           break;
1550*22dc650dSSadaf Ebrahimi 
1551*22dc650dSSadaf Ebrahimi           case PT_BOOL:
1552*22dc650dSSadaf Ebrahimi           OK = MAPBIT(PRIV(ucd_boolprop_sets) +
1553*22dc650dSSadaf Ebrahimi             UCD_BPROPS_PROP(prop), code[3]) != 0;
1554*22dc650dSSadaf Ebrahimi           break;
1555*22dc650dSSadaf Ebrahimi 
1556*22dc650dSSadaf Ebrahimi           /* Should never occur, but keep compilers from grumbling. */
1557*22dc650dSSadaf Ebrahimi 
1558*22dc650dSSadaf Ebrahimi           default:
1559*22dc650dSSadaf Ebrahimi           OK = codevalue != OP_PROP;
1560*22dc650dSSadaf Ebrahimi           break;
1561*22dc650dSSadaf Ebrahimi           }
1562*22dc650dSSadaf Ebrahimi 
1563*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_PROP))
1564*22dc650dSSadaf Ebrahimi           {
1565*22dc650dSSadaf Ebrahimi           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1566*22dc650dSSadaf Ebrahimi             {
1567*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1568*22dc650dSSadaf Ebrahimi             next_active_state--;
1569*22dc650dSSadaf Ebrahimi             }
1570*22dc650dSSadaf Ebrahimi           count++;
1571*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset, count);
1572*22dc650dSSadaf Ebrahimi           }
1573*22dc650dSSadaf Ebrahimi         }
1574*22dc650dSSadaf Ebrahimi       break;
1575*22dc650dSSadaf Ebrahimi 
1576*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1577*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1578*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1579*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1580*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
1581*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1582*22dc650dSSadaf Ebrahimi       if (clen > 0)
1583*22dc650dSSadaf Ebrahimi         {
1584*22dc650dSSadaf Ebrahimi         int ncount = 0;
1585*22dc650dSSadaf Ebrahimi         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1586*22dc650dSSadaf Ebrahimi           {
1587*22dc650dSSadaf Ebrahimi           active_count--;           /* Remove non-match possibility */
1588*22dc650dSSadaf Ebrahimi           next_active_state--;
1589*22dc650dSSadaf Ebrahimi           }
1590*22dc650dSSadaf Ebrahimi         (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
1591*22dc650dSSadaf Ebrahimi           &ncount);
1592*22dc650dSSadaf Ebrahimi         count++;
1593*22dc650dSSadaf Ebrahimi         ADD_NEW_DATA(-state_offset, count, ncount);
1594*22dc650dSSadaf Ebrahimi         }
1595*22dc650dSSadaf Ebrahimi       break;
1596*22dc650dSSadaf Ebrahimi #endif
1597*22dc650dSSadaf Ebrahimi 
1598*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1599*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1600*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1601*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1602*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
1603*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1604*22dc650dSSadaf Ebrahimi       if (clen > 0)
1605*22dc650dSSadaf Ebrahimi         {
1606*22dc650dSSadaf Ebrahimi         int ncount = 0;
1607*22dc650dSSadaf Ebrahimi         switch (c)
1608*22dc650dSSadaf Ebrahimi           {
1609*22dc650dSSadaf Ebrahimi           case CHAR_VT:
1610*22dc650dSSadaf Ebrahimi           case CHAR_FF:
1611*22dc650dSSadaf Ebrahimi           case CHAR_NEL:
1612*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1613*22dc650dSSadaf Ebrahimi           case 0x2028:
1614*22dc650dSSadaf Ebrahimi           case 0x2029:
1615*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
1616*22dc650dSSadaf Ebrahimi           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
1617*22dc650dSSadaf Ebrahimi           goto ANYNL01;
1618*22dc650dSSadaf Ebrahimi 
1619*22dc650dSSadaf Ebrahimi           case CHAR_CR:
1620*22dc650dSSadaf Ebrahimi           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1621*22dc650dSSadaf Ebrahimi           /* Fall through */
1622*22dc650dSSadaf Ebrahimi 
1623*22dc650dSSadaf Ebrahimi           ANYNL01:
1624*22dc650dSSadaf Ebrahimi           case CHAR_LF:
1625*22dc650dSSadaf Ebrahimi           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1626*22dc650dSSadaf Ebrahimi             {
1627*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1628*22dc650dSSadaf Ebrahimi             next_active_state--;
1629*22dc650dSSadaf Ebrahimi             }
1630*22dc650dSSadaf Ebrahimi           count++;
1631*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-state_offset, count, ncount);
1632*22dc650dSSadaf Ebrahimi           break;
1633*22dc650dSSadaf Ebrahimi 
1634*22dc650dSSadaf Ebrahimi           default:
1635*22dc650dSSadaf Ebrahimi           break;
1636*22dc650dSSadaf Ebrahimi           }
1637*22dc650dSSadaf Ebrahimi         }
1638*22dc650dSSadaf Ebrahimi       break;
1639*22dc650dSSadaf Ebrahimi 
1640*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1641*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1642*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1643*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1644*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
1645*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1646*22dc650dSSadaf Ebrahimi       if (clen > 0)
1647*22dc650dSSadaf Ebrahimi         {
1648*22dc650dSSadaf Ebrahimi         BOOL OK;
1649*22dc650dSSadaf Ebrahimi         switch (c)
1650*22dc650dSSadaf Ebrahimi           {
1651*22dc650dSSadaf Ebrahimi           VSPACE_CASES:
1652*22dc650dSSadaf Ebrahimi           OK = TRUE;
1653*22dc650dSSadaf Ebrahimi           break;
1654*22dc650dSSadaf Ebrahimi 
1655*22dc650dSSadaf Ebrahimi           default:
1656*22dc650dSSadaf Ebrahimi           OK = FALSE;
1657*22dc650dSSadaf Ebrahimi           break;
1658*22dc650dSSadaf Ebrahimi           }
1659*22dc650dSSadaf Ebrahimi 
1660*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_VSPACE))
1661*22dc650dSSadaf Ebrahimi           {
1662*22dc650dSSadaf Ebrahimi           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1663*22dc650dSSadaf Ebrahimi             {
1664*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1665*22dc650dSSadaf Ebrahimi             next_active_state--;
1666*22dc650dSSadaf Ebrahimi             }
1667*22dc650dSSadaf Ebrahimi           count++;
1668*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-state_offset, count, 0);
1669*22dc650dSSadaf Ebrahimi           }
1670*22dc650dSSadaf Ebrahimi         }
1671*22dc650dSSadaf Ebrahimi       break;
1672*22dc650dSSadaf Ebrahimi 
1673*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1674*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1675*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1676*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1677*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
1678*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1679*22dc650dSSadaf Ebrahimi       if (clen > 0)
1680*22dc650dSSadaf Ebrahimi         {
1681*22dc650dSSadaf Ebrahimi         BOOL OK;
1682*22dc650dSSadaf Ebrahimi         switch (c)
1683*22dc650dSSadaf Ebrahimi           {
1684*22dc650dSSadaf Ebrahimi           HSPACE_CASES:
1685*22dc650dSSadaf Ebrahimi           OK = TRUE;
1686*22dc650dSSadaf Ebrahimi           break;
1687*22dc650dSSadaf Ebrahimi 
1688*22dc650dSSadaf Ebrahimi           default:
1689*22dc650dSSadaf Ebrahimi           OK = FALSE;
1690*22dc650dSSadaf Ebrahimi           break;
1691*22dc650dSSadaf Ebrahimi           }
1692*22dc650dSSadaf Ebrahimi 
1693*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_HSPACE))
1694*22dc650dSSadaf Ebrahimi           {
1695*22dc650dSSadaf Ebrahimi           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1696*22dc650dSSadaf Ebrahimi             {
1697*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1698*22dc650dSSadaf Ebrahimi             next_active_state--;
1699*22dc650dSSadaf Ebrahimi             }
1700*22dc650dSSadaf Ebrahimi           count++;
1701*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-state_offset, count, 0);
1702*22dc650dSSadaf Ebrahimi           }
1703*22dc650dSSadaf Ebrahimi         }
1704*22dc650dSSadaf Ebrahimi       break;
1705*22dc650dSSadaf Ebrahimi 
1706*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1707*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1708*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEQUERY:
1709*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1710*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1711*22dc650dSSadaf Ebrahimi       count = 4;
1712*22dc650dSSadaf Ebrahimi       goto QS1;
1713*22dc650dSSadaf Ebrahimi 
1714*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPESTAR:
1715*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1716*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1717*22dc650dSSadaf Ebrahimi       count = 0;
1718*22dc650dSSadaf Ebrahimi 
1719*22dc650dSSadaf Ebrahimi       QS1:
1720*22dc650dSSadaf Ebrahimi 
1721*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 4, 0);
1722*22dc650dSSadaf Ebrahimi       if (clen > 0)
1723*22dc650dSSadaf Ebrahimi         {
1724*22dc650dSSadaf Ebrahimi         BOOL OK;
1725*22dc650dSSadaf Ebrahimi         int chartype;
1726*22dc650dSSadaf Ebrahimi         const uint32_t *cp;
1727*22dc650dSSadaf Ebrahimi         const ucd_record * prop = GET_UCD(c);
1728*22dc650dSSadaf Ebrahimi         switch(code[2])
1729*22dc650dSSadaf Ebrahimi           {
1730*22dc650dSSadaf Ebrahimi           case PT_ANY:
1731*22dc650dSSadaf Ebrahimi           OK = TRUE;
1732*22dc650dSSadaf Ebrahimi           break;
1733*22dc650dSSadaf Ebrahimi 
1734*22dc650dSSadaf Ebrahimi           case PT_LAMP:
1735*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1736*22dc650dSSadaf Ebrahimi           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1737*22dc650dSSadaf Ebrahimi           break;
1738*22dc650dSSadaf Ebrahimi 
1739*22dc650dSSadaf Ebrahimi           case PT_GC:
1740*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1741*22dc650dSSadaf Ebrahimi           break;
1742*22dc650dSSadaf Ebrahimi 
1743*22dc650dSSadaf Ebrahimi           case PT_PC:
1744*22dc650dSSadaf Ebrahimi           OK = prop->chartype == code[3];
1745*22dc650dSSadaf Ebrahimi           break;
1746*22dc650dSSadaf Ebrahimi 
1747*22dc650dSSadaf Ebrahimi           case PT_SC:
1748*22dc650dSSadaf Ebrahimi           OK = prop->script == code[3];
1749*22dc650dSSadaf Ebrahimi           break;
1750*22dc650dSSadaf Ebrahimi 
1751*22dc650dSSadaf Ebrahimi           case PT_SCX:
1752*22dc650dSSadaf Ebrahimi           OK = (prop->script == code[3] ||
1753*22dc650dSSadaf Ebrahimi                 MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
1754*22dc650dSSadaf Ebrahimi           break;
1755*22dc650dSSadaf Ebrahimi 
1756*22dc650dSSadaf Ebrahimi           /* These are specials for combination cases. */
1757*22dc650dSSadaf Ebrahimi 
1758*22dc650dSSadaf Ebrahimi           case PT_ALNUM:
1759*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1760*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1761*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N;
1762*22dc650dSSadaf Ebrahimi           break;
1763*22dc650dSSadaf Ebrahimi 
1764*22dc650dSSadaf Ebrahimi           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1765*22dc650dSSadaf Ebrahimi           which means that Perl space and POSIX space are now identical. PCRE
1766*22dc650dSSadaf Ebrahimi           was changed at release 8.34. */
1767*22dc650dSSadaf Ebrahimi 
1768*22dc650dSSadaf Ebrahimi           case PT_SPACE:    /* Perl space */
1769*22dc650dSSadaf Ebrahimi           case PT_PXSPACE:  /* POSIX space */
1770*22dc650dSSadaf Ebrahimi           switch(c)
1771*22dc650dSSadaf Ebrahimi             {
1772*22dc650dSSadaf Ebrahimi             HSPACE_CASES:
1773*22dc650dSSadaf Ebrahimi             VSPACE_CASES:
1774*22dc650dSSadaf Ebrahimi             OK = TRUE;
1775*22dc650dSSadaf Ebrahimi             break;
1776*22dc650dSSadaf Ebrahimi 
1777*22dc650dSSadaf Ebrahimi             default:
1778*22dc650dSSadaf Ebrahimi             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1779*22dc650dSSadaf Ebrahimi             break;
1780*22dc650dSSadaf Ebrahimi             }
1781*22dc650dSSadaf Ebrahimi           break;
1782*22dc650dSSadaf Ebrahimi 
1783*22dc650dSSadaf Ebrahimi           case PT_WORD:
1784*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
1785*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
1786*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N ||
1787*22dc650dSSadaf Ebrahimi                chartype == ucp_Mn || chartype == ucp_Pc;
1788*22dc650dSSadaf Ebrahimi           break;
1789*22dc650dSSadaf Ebrahimi 
1790*22dc650dSSadaf Ebrahimi           case PT_CLIST:
1791*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
1792*22dc650dSSadaf Ebrahimi           if (c > MAX_UTF_CODE_POINT)
1793*22dc650dSSadaf Ebrahimi             {
1794*22dc650dSSadaf Ebrahimi             OK = FALSE;
1795*22dc650dSSadaf Ebrahimi             break;
1796*22dc650dSSadaf Ebrahimi             }
1797*22dc650dSSadaf Ebrahimi #endif
1798*22dc650dSSadaf Ebrahimi           cp = PRIV(ucd_caseless_sets) + code[3];
1799*22dc650dSSadaf Ebrahimi           for (;;)
1800*22dc650dSSadaf Ebrahimi             {
1801*22dc650dSSadaf Ebrahimi             if (c < *cp) { OK = FALSE; break; }
1802*22dc650dSSadaf Ebrahimi             if (c == *cp++) { OK = TRUE; break; }
1803*22dc650dSSadaf Ebrahimi             }
1804*22dc650dSSadaf Ebrahimi           break;
1805*22dc650dSSadaf Ebrahimi 
1806*22dc650dSSadaf Ebrahimi           case PT_UCNC:
1807*22dc650dSSadaf Ebrahimi           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1808*22dc650dSSadaf Ebrahimi                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1809*22dc650dSSadaf Ebrahimi                c >= 0xe000;
1810*22dc650dSSadaf Ebrahimi           break;
1811*22dc650dSSadaf Ebrahimi 
1812*22dc650dSSadaf Ebrahimi           case PT_BIDICL:
1813*22dc650dSSadaf Ebrahimi           OK = UCD_BIDICLASS(c) == code[3];
1814*22dc650dSSadaf Ebrahimi           break;
1815*22dc650dSSadaf Ebrahimi 
1816*22dc650dSSadaf Ebrahimi           case PT_BOOL:
1817*22dc650dSSadaf Ebrahimi           OK = MAPBIT(PRIV(ucd_boolprop_sets) +
1818*22dc650dSSadaf Ebrahimi             UCD_BPROPS_PROP(prop), code[3]) != 0;
1819*22dc650dSSadaf Ebrahimi           break;
1820*22dc650dSSadaf Ebrahimi 
1821*22dc650dSSadaf Ebrahimi           /* Should never occur, but keep compilers from grumbling. */
1822*22dc650dSSadaf Ebrahimi 
1823*22dc650dSSadaf Ebrahimi           default:
1824*22dc650dSSadaf Ebrahimi           OK = codevalue != OP_PROP;
1825*22dc650dSSadaf Ebrahimi           break;
1826*22dc650dSSadaf Ebrahimi           }
1827*22dc650dSSadaf Ebrahimi 
1828*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_PROP))
1829*22dc650dSSadaf Ebrahimi           {
1830*22dc650dSSadaf Ebrahimi           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1831*22dc650dSSadaf Ebrahimi               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1832*22dc650dSSadaf Ebrahimi             {
1833*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1834*22dc650dSSadaf Ebrahimi             next_active_state--;
1835*22dc650dSSadaf Ebrahimi             }
1836*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + count, 0);
1837*22dc650dSSadaf Ebrahimi           }
1838*22dc650dSSadaf Ebrahimi         }
1839*22dc650dSSadaf Ebrahimi       break;
1840*22dc650dSSadaf Ebrahimi 
1841*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1842*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1843*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1844*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1845*22dc650dSSadaf Ebrahimi       count = 2;
1846*22dc650dSSadaf Ebrahimi       goto QS2;
1847*22dc650dSSadaf Ebrahimi 
1848*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1849*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1850*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1851*22dc650dSSadaf Ebrahimi       count = 0;
1852*22dc650dSSadaf Ebrahimi 
1853*22dc650dSSadaf Ebrahimi       QS2:
1854*22dc650dSSadaf Ebrahimi 
1855*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1856*22dc650dSSadaf Ebrahimi       if (clen > 0)
1857*22dc650dSSadaf Ebrahimi         {
1858*22dc650dSSadaf Ebrahimi         int ncount = 0;
1859*22dc650dSSadaf Ebrahimi         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1860*22dc650dSSadaf Ebrahimi             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1861*22dc650dSSadaf Ebrahimi           {
1862*22dc650dSSadaf Ebrahimi           active_count--;           /* Remove non-match possibility */
1863*22dc650dSSadaf Ebrahimi           next_active_state--;
1864*22dc650dSSadaf Ebrahimi           }
1865*22dc650dSSadaf Ebrahimi         (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
1866*22dc650dSSadaf Ebrahimi           &ncount);
1867*22dc650dSSadaf Ebrahimi         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1868*22dc650dSSadaf Ebrahimi         }
1869*22dc650dSSadaf Ebrahimi       break;
1870*22dc650dSSadaf Ebrahimi #endif
1871*22dc650dSSadaf Ebrahimi 
1872*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1873*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1874*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1875*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1876*22dc650dSSadaf Ebrahimi       count = 2;
1877*22dc650dSSadaf Ebrahimi       goto QS3;
1878*22dc650dSSadaf Ebrahimi 
1879*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPESTAR:
1880*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1881*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1882*22dc650dSSadaf Ebrahimi       count = 0;
1883*22dc650dSSadaf Ebrahimi 
1884*22dc650dSSadaf Ebrahimi       QS3:
1885*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1886*22dc650dSSadaf Ebrahimi       if (clen > 0)
1887*22dc650dSSadaf Ebrahimi         {
1888*22dc650dSSadaf Ebrahimi         int ncount = 0;
1889*22dc650dSSadaf Ebrahimi         switch (c)
1890*22dc650dSSadaf Ebrahimi           {
1891*22dc650dSSadaf Ebrahimi           case CHAR_VT:
1892*22dc650dSSadaf Ebrahimi           case CHAR_FF:
1893*22dc650dSSadaf Ebrahimi           case CHAR_NEL:
1894*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
1895*22dc650dSSadaf Ebrahimi           case 0x2028:
1896*22dc650dSSadaf Ebrahimi           case 0x2029:
1897*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
1898*22dc650dSSadaf Ebrahimi           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
1899*22dc650dSSadaf Ebrahimi           goto ANYNL02;
1900*22dc650dSSadaf Ebrahimi 
1901*22dc650dSSadaf Ebrahimi           case CHAR_CR:
1902*22dc650dSSadaf Ebrahimi           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1903*22dc650dSSadaf Ebrahimi           /* Fall through */
1904*22dc650dSSadaf Ebrahimi 
1905*22dc650dSSadaf Ebrahimi           ANYNL02:
1906*22dc650dSSadaf Ebrahimi           case CHAR_LF:
1907*22dc650dSSadaf Ebrahimi           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1908*22dc650dSSadaf Ebrahimi               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1909*22dc650dSSadaf Ebrahimi             {
1910*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1911*22dc650dSSadaf Ebrahimi             next_active_state--;
1912*22dc650dSSadaf Ebrahimi             }
1913*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1914*22dc650dSSadaf Ebrahimi           break;
1915*22dc650dSSadaf Ebrahimi 
1916*22dc650dSSadaf Ebrahimi           default:
1917*22dc650dSSadaf Ebrahimi           break;
1918*22dc650dSSadaf Ebrahimi           }
1919*22dc650dSSadaf Ebrahimi         }
1920*22dc650dSSadaf Ebrahimi       break;
1921*22dc650dSSadaf Ebrahimi 
1922*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1923*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1924*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1925*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1926*22dc650dSSadaf Ebrahimi       count = 2;
1927*22dc650dSSadaf Ebrahimi       goto QS4;
1928*22dc650dSSadaf Ebrahimi 
1929*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPESTAR:
1930*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1931*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1932*22dc650dSSadaf Ebrahimi       count = 0;
1933*22dc650dSSadaf Ebrahimi 
1934*22dc650dSSadaf Ebrahimi       QS4:
1935*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1936*22dc650dSSadaf Ebrahimi       if (clen > 0)
1937*22dc650dSSadaf Ebrahimi         {
1938*22dc650dSSadaf Ebrahimi         BOOL OK;
1939*22dc650dSSadaf Ebrahimi         switch (c)
1940*22dc650dSSadaf Ebrahimi           {
1941*22dc650dSSadaf Ebrahimi           VSPACE_CASES:
1942*22dc650dSSadaf Ebrahimi           OK = TRUE;
1943*22dc650dSSadaf Ebrahimi           break;
1944*22dc650dSSadaf Ebrahimi 
1945*22dc650dSSadaf Ebrahimi           default:
1946*22dc650dSSadaf Ebrahimi           OK = FALSE;
1947*22dc650dSSadaf Ebrahimi           break;
1948*22dc650dSSadaf Ebrahimi           }
1949*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_VSPACE))
1950*22dc650dSSadaf Ebrahimi           {
1951*22dc650dSSadaf Ebrahimi           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1952*22dc650dSSadaf Ebrahimi               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1953*22dc650dSSadaf Ebrahimi             {
1954*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1955*22dc650dSSadaf Ebrahimi             next_active_state--;
1956*22dc650dSSadaf Ebrahimi             }
1957*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1958*22dc650dSSadaf Ebrahimi           }
1959*22dc650dSSadaf Ebrahimi         }
1960*22dc650dSSadaf Ebrahimi       break;
1961*22dc650dSSadaf Ebrahimi 
1962*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
1963*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1964*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1965*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1966*22dc650dSSadaf Ebrahimi       count = 2;
1967*22dc650dSSadaf Ebrahimi       goto QS5;
1968*22dc650dSSadaf Ebrahimi 
1969*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPESTAR:
1970*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1971*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1972*22dc650dSSadaf Ebrahimi       count = 0;
1973*22dc650dSSadaf Ebrahimi 
1974*22dc650dSSadaf Ebrahimi       QS5:
1975*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + 2, 0);
1976*22dc650dSSadaf Ebrahimi       if (clen > 0)
1977*22dc650dSSadaf Ebrahimi         {
1978*22dc650dSSadaf Ebrahimi         BOOL OK;
1979*22dc650dSSadaf Ebrahimi         switch (c)
1980*22dc650dSSadaf Ebrahimi           {
1981*22dc650dSSadaf Ebrahimi           HSPACE_CASES:
1982*22dc650dSSadaf Ebrahimi           OK = TRUE;
1983*22dc650dSSadaf Ebrahimi           break;
1984*22dc650dSSadaf Ebrahimi 
1985*22dc650dSSadaf Ebrahimi           default:
1986*22dc650dSSadaf Ebrahimi           OK = FALSE;
1987*22dc650dSSadaf Ebrahimi           break;
1988*22dc650dSSadaf Ebrahimi           }
1989*22dc650dSSadaf Ebrahimi 
1990*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_HSPACE))
1991*22dc650dSSadaf Ebrahimi           {
1992*22dc650dSSadaf Ebrahimi           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1993*22dc650dSSadaf Ebrahimi               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1994*22dc650dSSadaf Ebrahimi             {
1995*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
1996*22dc650dSSadaf Ebrahimi             next_active_state--;
1997*22dc650dSSadaf Ebrahimi             }
1998*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1999*22dc650dSSadaf Ebrahimi           }
2000*22dc650dSSadaf Ebrahimi         }
2001*22dc650dSSadaf Ebrahimi       break;
2002*22dc650dSSadaf Ebrahimi 
2003*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2004*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2005*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEEXACT:
2006*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEUPTO:
2007*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
2008*22dc650dSSadaf Ebrahimi       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
2009*22dc650dSSadaf Ebrahimi       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
2010*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
2011*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2012*22dc650dSSadaf Ebrahimi       if (clen > 0)
2013*22dc650dSSadaf Ebrahimi         {
2014*22dc650dSSadaf Ebrahimi         BOOL OK;
2015*22dc650dSSadaf Ebrahimi         int chartype;
2016*22dc650dSSadaf Ebrahimi         const uint32_t *cp;
2017*22dc650dSSadaf Ebrahimi         const ucd_record * prop = GET_UCD(c);
2018*22dc650dSSadaf Ebrahimi         switch(code[1 + IMM2_SIZE + 1])
2019*22dc650dSSadaf Ebrahimi           {
2020*22dc650dSSadaf Ebrahimi           case PT_ANY:
2021*22dc650dSSadaf Ebrahimi           OK = TRUE;
2022*22dc650dSSadaf Ebrahimi           break;
2023*22dc650dSSadaf Ebrahimi 
2024*22dc650dSSadaf Ebrahimi           case PT_LAMP:
2025*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
2026*22dc650dSSadaf Ebrahimi           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
2027*22dc650dSSadaf Ebrahimi           break;
2028*22dc650dSSadaf Ebrahimi 
2029*22dc650dSSadaf Ebrahimi           case PT_GC:
2030*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
2031*22dc650dSSadaf Ebrahimi           break;
2032*22dc650dSSadaf Ebrahimi 
2033*22dc650dSSadaf Ebrahimi           case PT_PC:
2034*22dc650dSSadaf Ebrahimi           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
2035*22dc650dSSadaf Ebrahimi           break;
2036*22dc650dSSadaf Ebrahimi 
2037*22dc650dSSadaf Ebrahimi           case PT_SC:
2038*22dc650dSSadaf Ebrahimi           OK = prop->script == code[1 + IMM2_SIZE + 2];
2039*22dc650dSSadaf Ebrahimi           break;
2040*22dc650dSSadaf Ebrahimi 
2041*22dc650dSSadaf Ebrahimi           case PT_SCX:
2042*22dc650dSSadaf Ebrahimi           OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
2043*22dc650dSSadaf Ebrahimi                 MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
2044*22dc650dSSadaf Ebrahimi                   code[1 + IMM2_SIZE + 2]) != 0);
2045*22dc650dSSadaf Ebrahimi           break;
2046*22dc650dSSadaf Ebrahimi 
2047*22dc650dSSadaf Ebrahimi           /* These are specials for combination cases. */
2048*22dc650dSSadaf Ebrahimi 
2049*22dc650dSSadaf Ebrahimi           case PT_ALNUM:
2050*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
2051*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
2052*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N;
2053*22dc650dSSadaf Ebrahimi           break;
2054*22dc650dSSadaf Ebrahimi 
2055*22dc650dSSadaf Ebrahimi           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2056*22dc650dSSadaf Ebrahimi           which means that Perl space and POSIX space are now identical. PCRE
2057*22dc650dSSadaf Ebrahimi           was changed at release 8.34. */
2058*22dc650dSSadaf Ebrahimi 
2059*22dc650dSSadaf Ebrahimi           case PT_SPACE:    /* Perl space */
2060*22dc650dSSadaf Ebrahimi           case PT_PXSPACE:  /* POSIX space */
2061*22dc650dSSadaf Ebrahimi           switch(c)
2062*22dc650dSSadaf Ebrahimi             {
2063*22dc650dSSadaf Ebrahimi             HSPACE_CASES:
2064*22dc650dSSadaf Ebrahimi             VSPACE_CASES:
2065*22dc650dSSadaf Ebrahimi             OK = TRUE;
2066*22dc650dSSadaf Ebrahimi             break;
2067*22dc650dSSadaf Ebrahimi 
2068*22dc650dSSadaf Ebrahimi             default:
2069*22dc650dSSadaf Ebrahimi             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
2070*22dc650dSSadaf Ebrahimi             break;
2071*22dc650dSSadaf Ebrahimi             }
2072*22dc650dSSadaf Ebrahimi           break;
2073*22dc650dSSadaf Ebrahimi 
2074*22dc650dSSadaf Ebrahimi           case PT_WORD:
2075*22dc650dSSadaf Ebrahimi           chartype = prop->chartype;
2076*22dc650dSSadaf Ebrahimi           OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
2077*22dc650dSSadaf Ebrahimi                PRIV(ucp_gentype)[chartype] == ucp_N ||
2078*22dc650dSSadaf Ebrahimi                chartype == ucp_Mn || chartype == ucp_Pc;
2079*22dc650dSSadaf Ebrahimi           break;
2080*22dc650dSSadaf Ebrahimi 
2081*22dc650dSSadaf Ebrahimi           case PT_CLIST:
2082*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
2083*22dc650dSSadaf Ebrahimi           if (c > MAX_UTF_CODE_POINT)
2084*22dc650dSSadaf Ebrahimi             {
2085*22dc650dSSadaf Ebrahimi             OK = FALSE;
2086*22dc650dSSadaf Ebrahimi             break;
2087*22dc650dSSadaf Ebrahimi             }
2088*22dc650dSSadaf Ebrahimi #endif
2089*22dc650dSSadaf Ebrahimi           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
2090*22dc650dSSadaf Ebrahimi           for (;;)
2091*22dc650dSSadaf Ebrahimi             {
2092*22dc650dSSadaf Ebrahimi             if (c < *cp) { OK = FALSE; break; }
2093*22dc650dSSadaf Ebrahimi             if (c == *cp++) { OK = TRUE; break; }
2094*22dc650dSSadaf Ebrahimi             }
2095*22dc650dSSadaf Ebrahimi           break;
2096*22dc650dSSadaf Ebrahimi 
2097*22dc650dSSadaf Ebrahimi           case PT_UCNC:
2098*22dc650dSSadaf Ebrahimi           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2099*22dc650dSSadaf Ebrahimi                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2100*22dc650dSSadaf Ebrahimi                c >= 0xe000;
2101*22dc650dSSadaf Ebrahimi           break;
2102*22dc650dSSadaf Ebrahimi 
2103*22dc650dSSadaf Ebrahimi           case PT_BIDICL:
2104*22dc650dSSadaf Ebrahimi           OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
2105*22dc650dSSadaf Ebrahimi           break;
2106*22dc650dSSadaf Ebrahimi 
2107*22dc650dSSadaf Ebrahimi           case PT_BOOL:
2108*22dc650dSSadaf Ebrahimi           OK = MAPBIT(PRIV(ucd_boolprop_sets) +
2109*22dc650dSSadaf Ebrahimi             UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
2110*22dc650dSSadaf Ebrahimi           break;
2111*22dc650dSSadaf Ebrahimi 
2112*22dc650dSSadaf Ebrahimi           /* Should never occur, but keep compilers from grumbling. */
2113*22dc650dSSadaf Ebrahimi 
2114*22dc650dSSadaf Ebrahimi           default:
2115*22dc650dSSadaf Ebrahimi           OK = codevalue != OP_PROP;
2116*22dc650dSSadaf Ebrahimi           break;
2117*22dc650dSSadaf Ebrahimi           }
2118*22dc650dSSadaf Ebrahimi 
2119*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_PROP))
2120*22dc650dSSadaf Ebrahimi           {
2121*22dc650dSSadaf Ebrahimi           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
2122*22dc650dSSadaf Ebrahimi             {
2123*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
2124*22dc650dSSadaf Ebrahimi             next_active_state--;
2125*22dc650dSSadaf Ebrahimi             }
2126*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2127*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
2128*22dc650dSSadaf Ebrahimi           else
2129*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset, count); }
2130*22dc650dSSadaf Ebrahimi           }
2131*22dc650dSSadaf Ebrahimi         }
2132*22dc650dSSadaf Ebrahimi       break;
2133*22dc650dSSadaf Ebrahimi 
2134*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2135*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
2136*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
2137*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
2138*22dc650dSSadaf Ebrahimi       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
2139*22dc650dSSadaf Ebrahimi       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
2140*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2141*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2142*22dc650dSSadaf Ebrahimi       if (clen > 0)
2143*22dc650dSSadaf Ebrahimi         {
2144*22dc650dSSadaf Ebrahimi         PCRE2_SPTR nptr;
2145*22dc650dSSadaf Ebrahimi         int ncount = 0;
2146*22dc650dSSadaf Ebrahimi         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
2147*22dc650dSSadaf Ebrahimi           {
2148*22dc650dSSadaf Ebrahimi           active_count--;           /* Remove non-match possibility */
2149*22dc650dSSadaf Ebrahimi           next_active_state--;
2150*22dc650dSSadaf Ebrahimi           }
2151*22dc650dSSadaf Ebrahimi         nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
2152*22dc650dSSadaf Ebrahimi           &ncount);
2153*22dc650dSSadaf Ebrahimi         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
2154*22dc650dSSadaf Ebrahimi             reset_could_continue = TRUE;
2155*22dc650dSSadaf Ebrahimi         if (++count >= (int)GET2(code, 1))
2156*22dc650dSSadaf Ebrahimi           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2157*22dc650dSSadaf Ebrahimi         else
2158*22dc650dSSadaf Ebrahimi           { ADD_NEW_DATA(-state_offset, count, ncount); }
2159*22dc650dSSadaf Ebrahimi         }
2160*22dc650dSSadaf Ebrahimi       break;
2161*22dc650dSSadaf Ebrahimi #endif
2162*22dc650dSSadaf Ebrahimi 
2163*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2164*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
2165*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
2166*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
2167*22dc650dSSadaf Ebrahimi       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
2168*22dc650dSSadaf Ebrahimi       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
2169*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2170*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2171*22dc650dSSadaf Ebrahimi       if (clen > 0)
2172*22dc650dSSadaf Ebrahimi         {
2173*22dc650dSSadaf Ebrahimi         int ncount = 0;
2174*22dc650dSSadaf Ebrahimi         switch (c)
2175*22dc650dSSadaf Ebrahimi           {
2176*22dc650dSSadaf Ebrahimi           case CHAR_VT:
2177*22dc650dSSadaf Ebrahimi           case CHAR_FF:
2178*22dc650dSSadaf Ebrahimi           case CHAR_NEL:
2179*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
2180*22dc650dSSadaf Ebrahimi           case 0x2028:
2181*22dc650dSSadaf Ebrahimi           case 0x2029:
2182*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
2183*22dc650dSSadaf Ebrahimi           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
2184*22dc650dSSadaf Ebrahimi           goto ANYNL03;
2185*22dc650dSSadaf Ebrahimi 
2186*22dc650dSSadaf Ebrahimi           case CHAR_CR:
2187*22dc650dSSadaf Ebrahimi           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
2188*22dc650dSSadaf Ebrahimi           /* Fall through */
2189*22dc650dSSadaf Ebrahimi 
2190*22dc650dSSadaf Ebrahimi           ANYNL03:
2191*22dc650dSSadaf Ebrahimi           case CHAR_LF:
2192*22dc650dSSadaf Ebrahimi           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2193*22dc650dSSadaf Ebrahimi             {
2194*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
2195*22dc650dSSadaf Ebrahimi             next_active_state--;
2196*22dc650dSSadaf Ebrahimi             }
2197*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2198*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2199*22dc650dSSadaf Ebrahimi           else
2200*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-state_offset, count, ncount); }
2201*22dc650dSSadaf Ebrahimi           break;
2202*22dc650dSSadaf Ebrahimi 
2203*22dc650dSSadaf Ebrahimi           default:
2204*22dc650dSSadaf Ebrahimi           break;
2205*22dc650dSSadaf Ebrahimi           }
2206*22dc650dSSadaf Ebrahimi         }
2207*22dc650dSSadaf Ebrahimi       break;
2208*22dc650dSSadaf Ebrahimi 
2209*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2210*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
2211*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
2212*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2213*22dc650dSSadaf Ebrahimi       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2214*22dc650dSSadaf Ebrahimi       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2215*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2216*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2217*22dc650dSSadaf Ebrahimi       if (clen > 0)
2218*22dc650dSSadaf Ebrahimi         {
2219*22dc650dSSadaf Ebrahimi         BOOL OK;
2220*22dc650dSSadaf Ebrahimi         switch (c)
2221*22dc650dSSadaf Ebrahimi           {
2222*22dc650dSSadaf Ebrahimi           VSPACE_CASES:
2223*22dc650dSSadaf Ebrahimi           OK = TRUE;
2224*22dc650dSSadaf Ebrahimi           break;
2225*22dc650dSSadaf Ebrahimi 
2226*22dc650dSSadaf Ebrahimi           default:
2227*22dc650dSSadaf Ebrahimi           OK = FALSE;
2228*22dc650dSSadaf Ebrahimi           }
2229*22dc650dSSadaf Ebrahimi 
2230*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_VSPACE))
2231*22dc650dSSadaf Ebrahimi           {
2232*22dc650dSSadaf Ebrahimi           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2233*22dc650dSSadaf Ebrahimi             {
2234*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
2235*22dc650dSSadaf Ebrahimi             next_active_state--;
2236*22dc650dSSadaf Ebrahimi             }
2237*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2238*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2239*22dc650dSSadaf Ebrahimi           else
2240*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-state_offset, count, 0); }
2241*22dc650dSSadaf Ebrahimi           }
2242*22dc650dSSadaf Ebrahimi         }
2243*22dc650dSSadaf Ebrahimi       break;
2244*22dc650dSSadaf Ebrahimi 
2245*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2246*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2247*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2248*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2249*22dc650dSSadaf Ebrahimi       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2250*22dc650dSSadaf Ebrahimi       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2251*22dc650dSSadaf Ebrahimi         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2252*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2253*22dc650dSSadaf Ebrahimi       if (clen > 0)
2254*22dc650dSSadaf Ebrahimi         {
2255*22dc650dSSadaf Ebrahimi         BOOL OK;
2256*22dc650dSSadaf Ebrahimi         switch (c)
2257*22dc650dSSadaf Ebrahimi           {
2258*22dc650dSSadaf Ebrahimi           HSPACE_CASES:
2259*22dc650dSSadaf Ebrahimi           OK = TRUE;
2260*22dc650dSSadaf Ebrahimi           break;
2261*22dc650dSSadaf Ebrahimi 
2262*22dc650dSSadaf Ebrahimi           default:
2263*22dc650dSSadaf Ebrahimi           OK = FALSE;
2264*22dc650dSSadaf Ebrahimi           break;
2265*22dc650dSSadaf Ebrahimi           }
2266*22dc650dSSadaf Ebrahimi 
2267*22dc650dSSadaf Ebrahimi         if (OK == (d == OP_HSPACE))
2268*22dc650dSSadaf Ebrahimi           {
2269*22dc650dSSadaf Ebrahimi           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2270*22dc650dSSadaf Ebrahimi             {
2271*22dc650dSSadaf Ebrahimi             active_count--;           /* Remove non-match possibility */
2272*22dc650dSSadaf Ebrahimi             next_active_state--;
2273*22dc650dSSadaf Ebrahimi             }
2274*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2275*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2276*22dc650dSSadaf Ebrahimi           else
2277*22dc650dSSadaf Ebrahimi             { ADD_NEW_DATA(-state_offset, count, 0); }
2278*22dc650dSSadaf Ebrahimi           }
2279*22dc650dSSadaf Ebrahimi         }
2280*22dc650dSSadaf Ebrahimi       break;
2281*22dc650dSSadaf Ebrahimi 
2282*22dc650dSSadaf Ebrahimi /* ========================================================================== */
2283*22dc650dSSadaf Ebrahimi       /* These opcodes are followed by a character that is usually compared
2284*22dc650dSSadaf Ebrahimi       to the current subject character; it is loaded into d. We still get
2285*22dc650dSSadaf Ebrahimi       here even if there is no subject character, because in some cases zero
2286*22dc650dSSadaf Ebrahimi       repetitions are permitted. */
2287*22dc650dSSadaf Ebrahimi 
2288*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2289*22dc650dSSadaf Ebrahimi       case OP_CHAR:
2290*22dc650dSSadaf Ebrahimi       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
2291*22dc650dSSadaf Ebrahimi       break;
2292*22dc650dSSadaf Ebrahimi 
2293*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2294*22dc650dSSadaf Ebrahimi       case OP_CHARI:
2295*22dc650dSSadaf Ebrahimi       if (clen == 0) break;
2296*22dc650dSSadaf Ebrahimi 
2297*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2298*22dc650dSSadaf Ebrahimi       if (utf_or_ucp)
2299*22dc650dSSadaf Ebrahimi         {
2300*22dc650dSSadaf Ebrahimi         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2301*22dc650dSSadaf Ebrahimi           {
2302*22dc650dSSadaf Ebrahimi           unsigned int othercase;
2303*22dc650dSSadaf Ebrahimi           if (c < 128)
2304*22dc650dSSadaf Ebrahimi             othercase = fcc[c];
2305*22dc650dSSadaf Ebrahimi           else
2306*22dc650dSSadaf Ebrahimi             othercase = UCD_OTHERCASE(c);
2307*22dc650dSSadaf Ebrahimi           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2308*22dc650dSSadaf Ebrahimi           }
2309*22dc650dSSadaf Ebrahimi         }
2310*22dc650dSSadaf Ebrahimi       else
2311*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2312*22dc650dSSadaf Ebrahimi       /* Not UTF or UCP mode */
2313*22dc650dSSadaf Ebrahimi         {
2314*22dc650dSSadaf Ebrahimi         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2315*22dc650dSSadaf Ebrahimi           { ADD_NEW(state_offset + 2, 0); }
2316*22dc650dSSadaf Ebrahimi         }
2317*22dc650dSSadaf Ebrahimi       break;
2318*22dc650dSSadaf Ebrahimi 
2319*22dc650dSSadaf Ebrahimi 
2320*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2321*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2322*22dc650dSSadaf Ebrahimi       /* This is a tricky one because it can match more than one character.
2323*22dc650dSSadaf Ebrahimi       Find out how many characters to skip, and then set up a negative state
2324*22dc650dSSadaf Ebrahimi       to wait for them to pass before continuing. */
2325*22dc650dSSadaf Ebrahimi 
2326*22dc650dSSadaf Ebrahimi       case OP_EXTUNI:
2327*22dc650dSSadaf Ebrahimi       if (clen > 0)
2328*22dc650dSSadaf Ebrahimi         {
2329*22dc650dSSadaf Ebrahimi         int ncount = 0;
2330*22dc650dSSadaf Ebrahimi         PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
2331*22dc650dSSadaf Ebrahimi           end_subject, utf, &ncount);
2332*22dc650dSSadaf Ebrahimi         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
2333*22dc650dSSadaf Ebrahimi             reset_could_continue = TRUE;
2334*22dc650dSSadaf Ebrahimi         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2335*22dc650dSSadaf Ebrahimi         }
2336*22dc650dSSadaf Ebrahimi       break;
2337*22dc650dSSadaf Ebrahimi #endif
2338*22dc650dSSadaf Ebrahimi 
2339*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2340*22dc650dSSadaf Ebrahimi       /* This is a tricky like EXTUNI because it too can match more than one
2341*22dc650dSSadaf Ebrahimi       character (when CR is followed by LF). In this case, set up a negative
2342*22dc650dSSadaf Ebrahimi       state to wait for one character to pass before continuing. */
2343*22dc650dSSadaf Ebrahimi 
2344*22dc650dSSadaf Ebrahimi       case OP_ANYNL:
2345*22dc650dSSadaf Ebrahimi       if (clen > 0) switch(c)
2346*22dc650dSSadaf Ebrahimi         {
2347*22dc650dSSadaf Ebrahimi         case CHAR_VT:
2348*22dc650dSSadaf Ebrahimi         case CHAR_FF:
2349*22dc650dSSadaf Ebrahimi         case CHAR_NEL:
2350*22dc650dSSadaf Ebrahimi #ifndef EBCDIC
2351*22dc650dSSadaf Ebrahimi         case 0x2028:
2352*22dc650dSSadaf Ebrahimi         case 0x2029:
2353*22dc650dSSadaf Ebrahimi #endif  /* Not EBCDIC */
2354*22dc650dSSadaf Ebrahimi         if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
2355*22dc650dSSadaf Ebrahimi         /* Fall through */
2356*22dc650dSSadaf Ebrahimi 
2357*22dc650dSSadaf Ebrahimi         case CHAR_LF:
2358*22dc650dSSadaf Ebrahimi         ADD_NEW(state_offset + 1, 0);
2359*22dc650dSSadaf Ebrahimi         break;
2360*22dc650dSSadaf Ebrahimi 
2361*22dc650dSSadaf Ebrahimi         case CHAR_CR:
2362*22dc650dSSadaf Ebrahimi         if (ptr + 1 >= end_subject)
2363*22dc650dSSadaf Ebrahimi           {
2364*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + 1, 0);
2365*22dc650dSSadaf Ebrahimi           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
2366*22dc650dSSadaf Ebrahimi             reset_could_continue = TRUE;
2367*22dc650dSSadaf Ebrahimi           }
2368*22dc650dSSadaf Ebrahimi         else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
2369*22dc650dSSadaf Ebrahimi           {
2370*22dc650dSSadaf Ebrahimi           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2371*22dc650dSSadaf Ebrahimi           }
2372*22dc650dSSadaf Ebrahimi         else
2373*22dc650dSSadaf Ebrahimi           {
2374*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + 1, 0);
2375*22dc650dSSadaf Ebrahimi           }
2376*22dc650dSSadaf Ebrahimi         break;
2377*22dc650dSSadaf Ebrahimi         }
2378*22dc650dSSadaf Ebrahimi       break;
2379*22dc650dSSadaf Ebrahimi 
2380*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2381*22dc650dSSadaf Ebrahimi       case OP_NOT_VSPACE:
2382*22dc650dSSadaf Ebrahimi       if (clen > 0) switch(c)
2383*22dc650dSSadaf Ebrahimi         {
2384*22dc650dSSadaf Ebrahimi         VSPACE_CASES:
2385*22dc650dSSadaf Ebrahimi         break;
2386*22dc650dSSadaf Ebrahimi 
2387*22dc650dSSadaf Ebrahimi         default:
2388*22dc650dSSadaf Ebrahimi         ADD_NEW(state_offset + 1, 0);
2389*22dc650dSSadaf Ebrahimi         break;
2390*22dc650dSSadaf Ebrahimi         }
2391*22dc650dSSadaf Ebrahimi       break;
2392*22dc650dSSadaf Ebrahimi 
2393*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2394*22dc650dSSadaf Ebrahimi       case OP_VSPACE:
2395*22dc650dSSadaf Ebrahimi       if (clen > 0) switch(c)
2396*22dc650dSSadaf Ebrahimi         {
2397*22dc650dSSadaf Ebrahimi         VSPACE_CASES:
2398*22dc650dSSadaf Ebrahimi         ADD_NEW(state_offset + 1, 0);
2399*22dc650dSSadaf Ebrahimi         break;
2400*22dc650dSSadaf Ebrahimi 
2401*22dc650dSSadaf Ebrahimi         default:
2402*22dc650dSSadaf Ebrahimi         break;
2403*22dc650dSSadaf Ebrahimi         }
2404*22dc650dSSadaf Ebrahimi       break;
2405*22dc650dSSadaf Ebrahimi 
2406*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2407*22dc650dSSadaf Ebrahimi       case OP_NOT_HSPACE:
2408*22dc650dSSadaf Ebrahimi       if (clen > 0) switch(c)
2409*22dc650dSSadaf Ebrahimi         {
2410*22dc650dSSadaf Ebrahimi         HSPACE_CASES:
2411*22dc650dSSadaf Ebrahimi         break;
2412*22dc650dSSadaf Ebrahimi 
2413*22dc650dSSadaf Ebrahimi         default:
2414*22dc650dSSadaf Ebrahimi         ADD_NEW(state_offset + 1, 0);
2415*22dc650dSSadaf Ebrahimi         break;
2416*22dc650dSSadaf Ebrahimi         }
2417*22dc650dSSadaf Ebrahimi       break;
2418*22dc650dSSadaf Ebrahimi 
2419*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2420*22dc650dSSadaf Ebrahimi       case OP_HSPACE:
2421*22dc650dSSadaf Ebrahimi       if (clen > 0) switch(c)
2422*22dc650dSSadaf Ebrahimi         {
2423*22dc650dSSadaf Ebrahimi         HSPACE_CASES:
2424*22dc650dSSadaf Ebrahimi         ADD_NEW(state_offset + 1, 0);
2425*22dc650dSSadaf Ebrahimi         break;
2426*22dc650dSSadaf Ebrahimi 
2427*22dc650dSSadaf Ebrahimi         default:
2428*22dc650dSSadaf Ebrahimi         break;
2429*22dc650dSSadaf Ebrahimi         }
2430*22dc650dSSadaf Ebrahimi       break;
2431*22dc650dSSadaf Ebrahimi 
2432*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2433*22dc650dSSadaf Ebrahimi       /* Match a negated single character casefully. */
2434*22dc650dSSadaf Ebrahimi 
2435*22dc650dSSadaf Ebrahimi       case OP_NOT:
2436*22dc650dSSadaf Ebrahimi       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2437*22dc650dSSadaf Ebrahimi       break;
2438*22dc650dSSadaf Ebrahimi 
2439*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2440*22dc650dSSadaf Ebrahimi       /* Match a negated single character caselessly. */
2441*22dc650dSSadaf Ebrahimi 
2442*22dc650dSSadaf Ebrahimi       case OP_NOTI:
2443*22dc650dSSadaf Ebrahimi       if (clen > 0)
2444*22dc650dSSadaf Ebrahimi         {
2445*22dc650dSSadaf Ebrahimi         uint32_t otherd;
2446*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2447*22dc650dSSadaf Ebrahimi         if (utf_or_ucp && d >= 128)
2448*22dc650dSSadaf Ebrahimi           otherd = UCD_OTHERCASE(d);
2449*22dc650dSSadaf Ebrahimi         else
2450*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2451*22dc650dSSadaf Ebrahimi         otherd = TABLE_GET(d, fcc, d);
2452*22dc650dSSadaf Ebrahimi         if (c != d && c != otherd)
2453*22dc650dSSadaf Ebrahimi           { ADD_NEW(state_offset + dlen + 1, 0); }
2454*22dc650dSSadaf Ebrahimi         }
2455*22dc650dSSadaf Ebrahimi       break;
2456*22dc650dSSadaf Ebrahimi 
2457*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2458*22dc650dSSadaf Ebrahimi       case OP_PLUSI:
2459*22dc650dSSadaf Ebrahimi       case OP_MINPLUSI:
2460*22dc650dSSadaf Ebrahimi       case OP_POSPLUSI:
2461*22dc650dSSadaf Ebrahimi       case OP_NOTPLUSI:
2462*22dc650dSSadaf Ebrahimi       case OP_NOTMINPLUSI:
2463*22dc650dSSadaf Ebrahimi       case OP_NOTPOSPLUSI:
2464*22dc650dSSadaf Ebrahimi       caseless = TRUE;
2465*22dc650dSSadaf Ebrahimi       codevalue -= OP_STARI - OP_STAR;
2466*22dc650dSSadaf Ebrahimi 
2467*22dc650dSSadaf Ebrahimi       /* Fall through */
2468*22dc650dSSadaf Ebrahimi       case OP_PLUS:
2469*22dc650dSSadaf Ebrahimi       case OP_MINPLUS:
2470*22dc650dSSadaf Ebrahimi       case OP_POSPLUS:
2471*22dc650dSSadaf Ebrahimi       case OP_NOTPLUS:
2472*22dc650dSSadaf Ebrahimi       case OP_NOTMINPLUS:
2473*22dc650dSSadaf Ebrahimi       case OP_NOTPOSPLUS:
2474*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Already matched */
2475*22dc650dSSadaf Ebrahimi       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2476*22dc650dSSadaf Ebrahimi       if (clen > 0)
2477*22dc650dSSadaf Ebrahimi         {
2478*22dc650dSSadaf Ebrahimi         uint32_t otherd = NOTACHAR;
2479*22dc650dSSadaf Ebrahimi         if (caseless)
2480*22dc650dSSadaf Ebrahimi           {
2481*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2482*22dc650dSSadaf Ebrahimi           if (utf_or_ucp && d >= 128)
2483*22dc650dSSadaf Ebrahimi             otherd = UCD_OTHERCASE(d);
2484*22dc650dSSadaf Ebrahimi           else
2485*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2486*22dc650dSSadaf Ebrahimi           otherd = TABLE_GET(d, fcc, d);
2487*22dc650dSSadaf Ebrahimi           }
2488*22dc650dSSadaf Ebrahimi         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2489*22dc650dSSadaf Ebrahimi           {
2490*22dc650dSSadaf Ebrahimi           if (count > 0 &&
2491*22dc650dSSadaf Ebrahimi               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2492*22dc650dSSadaf Ebrahimi             {
2493*22dc650dSSadaf Ebrahimi             active_count--;             /* Remove non-match possibility */
2494*22dc650dSSadaf Ebrahimi             next_active_state--;
2495*22dc650dSSadaf Ebrahimi             }
2496*22dc650dSSadaf Ebrahimi           count++;
2497*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset, count);
2498*22dc650dSSadaf Ebrahimi           }
2499*22dc650dSSadaf Ebrahimi         }
2500*22dc650dSSadaf Ebrahimi       break;
2501*22dc650dSSadaf Ebrahimi 
2502*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2503*22dc650dSSadaf Ebrahimi       case OP_QUERYI:
2504*22dc650dSSadaf Ebrahimi       case OP_MINQUERYI:
2505*22dc650dSSadaf Ebrahimi       case OP_POSQUERYI:
2506*22dc650dSSadaf Ebrahimi       case OP_NOTQUERYI:
2507*22dc650dSSadaf Ebrahimi       case OP_NOTMINQUERYI:
2508*22dc650dSSadaf Ebrahimi       case OP_NOTPOSQUERYI:
2509*22dc650dSSadaf Ebrahimi       caseless = TRUE;
2510*22dc650dSSadaf Ebrahimi       codevalue -= OP_STARI - OP_STAR;
2511*22dc650dSSadaf Ebrahimi       /* Fall through */
2512*22dc650dSSadaf Ebrahimi       case OP_QUERY:
2513*22dc650dSSadaf Ebrahimi       case OP_MINQUERY:
2514*22dc650dSSadaf Ebrahimi       case OP_POSQUERY:
2515*22dc650dSSadaf Ebrahimi       case OP_NOTQUERY:
2516*22dc650dSSadaf Ebrahimi       case OP_NOTMINQUERY:
2517*22dc650dSSadaf Ebrahimi       case OP_NOTPOSQUERY:
2518*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + dlen + 1, 0);
2519*22dc650dSSadaf Ebrahimi       if (clen > 0)
2520*22dc650dSSadaf Ebrahimi         {
2521*22dc650dSSadaf Ebrahimi         uint32_t otherd = NOTACHAR;
2522*22dc650dSSadaf Ebrahimi         if (caseless)
2523*22dc650dSSadaf Ebrahimi           {
2524*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2525*22dc650dSSadaf Ebrahimi           if (utf_or_ucp && d >= 128)
2526*22dc650dSSadaf Ebrahimi             otherd = UCD_OTHERCASE(d);
2527*22dc650dSSadaf Ebrahimi           else
2528*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2529*22dc650dSSadaf Ebrahimi           otherd = TABLE_GET(d, fcc, d);
2530*22dc650dSSadaf Ebrahimi           }
2531*22dc650dSSadaf Ebrahimi         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2532*22dc650dSSadaf Ebrahimi           {
2533*22dc650dSSadaf Ebrahimi           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2534*22dc650dSSadaf Ebrahimi             {
2535*22dc650dSSadaf Ebrahimi             active_count--;            /* Remove non-match possibility */
2536*22dc650dSSadaf Ebrahimi             next_active_state--;
2537*22dc650dSSadaf Ebrahimi             }
2538*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset + dlen + 1, 0);
2539*22dc650dSSadaf Ebrahimi           }
2540*22dc650dSSadaf Ebrahimi         }
2541*22dc650dSSadaf Ebrahimi       break;
2542*22dc650dSSadaf Ebrahimi 
2543*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2544*22dc650dSSadaf Ebrahimi       case OP_STARI:
2545*22dc650dSSadaf Ebrahimi       case OP_MINSTARI:
2546*22dc650dSSadaf Ebrahimi       case OP_POSSTARI:
2547*22dc650dSSadaf Ebrahimi       case OP_NOTSTARI:
2548*22dc650dSSadaf Ebrahimi       case OP_NOTMINSTARI:
2549*22dc650dSSadaf Ebrahimi       case OP_NOTPOSSTARI:
2550*22dc650dSSadaf Ebrahimi       caseless = TRUE;
2551*22dc650dSSadaf Ebrahimi       codevalue -= OP_STARI - OP_STAR;
2552*22dc650dSSadaf Ebrahimi       /* Fall through */
2553*22dc650dSSadaf Ebrahimi       case OP_STAR:
2554*22dc650dSSadaf Ebrahimi       case OP_MINSTAR:
2555*22dc650dSSadaf Ebrahimi       case OP_POSSTAR:
2556*22dc650dSSadaf Ebrahimi       case OP_NOTSTAR:
2557*22dc650dSSadaf Ebrahimi       case OP_NOTMINSTAR:
2558*22dc650dSSadaf Ebrahimi       case OP_NOTPOSSTAR:
2559*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + dlen + 1, 0);
2560*22dc650dSSadaf Ebrahimi       if (clen > 0)
2561*22dc650dSSadaf Ebrahimi         {
2562*22dc650dSSadaf Ebrahimi         uint32_t otherd = NOTACHAR;
2563*22dc650dSSadaf Ebrahimi         if (caseless)
2564*22dc650dSSadaf Ebrahimi           {
2565*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2566*22dc650dSSadaf Ebrahimi           if (utf_or_ucp && d >= 128)
2567*22dc650dSSadaf Ebrahimi             otherd = UCD_OTHERCASE(d);
2568*22dc650dSSadaf Ebrahimi           else
2569*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2570*22dc650dSSadaf Ebrahimi           otherd = TABLE_GET(d, fcc, d);
2571*22dc650dSSadaf Ebrahimi           }
2572*22dc650dSSadaf Ebrahimi         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2573*22dc650dSSadaf Ebrahimi           {
2574*22dc650dSSadaf Ebrahimi           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2575*22dc650dSSadaf Ebrahimi             {
2576*22dc650dSSadaf Ebrahimi             active_count--;            /* Remove non-match possibility */
2577*22dc650dSSadaf Ebrahimi             next_active_state--;
2578*22dc650dSSadaf Ebrahimi             }
2579*22dc650dSSadaf Ebrahimi           ADD_NEW(state_offset, 0);
2580*22dc650dSSadaf Ebrahimi           }
2581*22dc650dSSadaf Ebrahimi         }
2582*22dc650dSSadaf Ebrahimi       break;
2583*22dc650dSSadaf Ebrahimi 
2584*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2585*22dc650dSSadaf Ebrahimi       case OP_EXACTI:
2586*22dc650dSSadaf Ebrahimi       case OP_NOTEXACTI:
2587*22dc650dSSadaf Ebrahimi       caseless = TRUE;
2588*22dc650dSSadaf Ebrahimi       codevalue -= OP_STARI - OP_STAR;
2589*22dc650dSSadaf Ebrahimi       /* Fall through */
2590*22dc650dSSadaf Ebrahimi       case OP_EXACT:
2591*22dc650dSSadaf Ebrahimi       case OP_NOTEXACT:
2592*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2593*22dc650dSSadaf Ebrahimi       if (clen > 0)
2594*22dc650dSSadaf Ebrahimi         {
2595*22dc650dSSadaf Ebrahimi         uint32_t otherd = NOTACHAR;
2596*22dc650dSSadaf Ebrahimi         if (caseless)
2597*22dc650dSSadaf Ebrahimi           {
2598*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2599*22dc650dSSadaf Ebrahimi           if (utf_or_ucp && d >= 128)
2600*22dc650dSSadaf Ebrahimi             otherd = UCD_OTHERCASE(d);
2601*22dc650dSSadaf Ebrahimi           else
2602*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2603*22dc650dSSadaf Ebrahimi           otherd = TABLE_GET(d, fcc, d);
2604*22dc650dSSadaf Ebrahimi           }
2605*22dc650dSSadaf Ebrahimi         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2606*22dc650dSSadaf Ebrahimi           {
2607*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2608*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2609*22dc650dSSadaf Ebrahimi           else
2610*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset, count); }
2611*22dc650dSSadaf Ebrahimi           }
2612*22dc650dSSadaf Ebrahimi         }
2613*22dc650dSSadaf Ebrahimi       break;
2614*22dc650dSSadaf Ebrahimi 
2615*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2616*22dc650dSSadaf Ebrahimi       case OP_UPTOI:
2617*22dc650dSSadaf Ebrahimi       case OP_MINUPTOI:
2618*22dc650dSSadaf Ebrahimi       case OP_POSUPTOI:
2619*22dc650dSSadaf Ebrahimi       case OP_NOTUPTOI:
2620*22dc650dSSadaf Ebrahimi       case OP_NOTMINUPTOI:
2621*22dc650dSSadaf Ebrahimi       case OP_NOTPOSUPTOI:
2622*22dc650dSSadaf Ebrahimi       caseless = TRUE;
2623*22dc650dSSadaf Ebrahimi       codevalue -= OP_STARI - OP_STAR;
2624*22dc650dSSadaf Ebrahimi       /* Fall through */
2625*22dc650dSSadaf Ebrahimi       case OP_UPTO:
2626*22dc650dSSadaf Ebrahimi       case OP_MINUPTO:
2627*22dc650dSSadaf Ebrahimi       case OP_POSUPTO:
2628*22dc650dSSadaf Ebrahimi       case OP_NOTUPTO:
2629*22dc650dSSadaf Ebrahimi       case OP_NOTMINUPTO:
2630*22dc650dSSadaf Ebrahimi       case OP_NOTPOSUPTO:
2631*22dc650dSSadaf Ebrahimi       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2632*22dc650dSSadaf Ebrahimi       count = current_state->count;  /* Number already matched */
2633*22dc650dSSadaf Ebrahimi       if (clen > 0)
2634*22dc650dSSadaf Ebrahimi         {
2635*22dc650dSSadaf Ebrahimi         uint32_t otherd = NOTACHAR;
2636*22dc650dSSadaf Ebrahimi         if (caseless)
2637*22dc650dSSadaf Ebrahimi           {
2638*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2639*22dc650dSSadaf Ebrahimi           if (utf_or_ucp && d >= 128)
2640*22dc650dSSadaf Ebrahimi             otherd = UCD_OTHERCASE(d);
2641*22dc650dSSadaf Ebrahimi           else
2642*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
2643*22dc650dSSadaf Ebrahimi           otherd = TABLE_GET(d, fcc, d);
2644*22dc650dSSadaf Ebrahimi           }
2645*22dc650dSSadaf Ebrahimi         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2646*22dc650dSSadaf Ebrahimi           {
2647*22dc650dSSadaf Ebrahimi           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2648*22dc650dSSadaf Ebrahimi             {
2649*22dc650dSSadaf Ebrahimi             active_count--;             /* Remove non-match possibility */
2650*22dc650dSSadaf Ebrahimi             next_active_state--;
2651*22dc650dSSadaf Ebrahimi             }
2652*22dc650dSSadaf Ebrahimi           if (++count >= (int)GET2(code, 1))
2653*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2654*22dc650dSSadaf Ebrahimi           else
2655*22dc650dSSadaf Ebrahimi             { ADD_NEW(state_offset, count); }
2656*22dc650dSSadaf Ebrahimi           }
2657*22dc650dSSadaf Ebrahimi         }
2658*22dc650dSSadaf Ebrahimi       break;
2659*22dc650dSSadaf Ebrahimi 
2660*22dc650dSSadaf Ebrahimi 
2661*22dc650dSSadaf Ebrahimi /* ========================================================================== */
2662*22dc650dSSadaf Ebrahimi       /* These are the class-handling opcodes */
2663*22dc650dSSadaf Ebrahimi 
2664*22dc650dSSadaf Ebrahimi       case OP_CLASS:
2665*22dc650dSSadaf Ebrahimi       case OP_NCLASS:
2666*22dc650dSSadaf Ebrahimi       case OP_XCLASS:
2667*22dc650dSSadaf Ebrahimi         {
2668*22dc650dSSadaf Ebrahimi         BOOL isinclass = FALSE;
2669*22dc650dSSadaf Ebrahimi         int next_state_offset;
2670*22dc650dSSadaf Ebrahimi         PCRE2_SPTR ecode;
2671*22dc650dSSadaf Ebrahimi 
2672*22dc650dSSadaf Ebrahimi         /* For a simple class, there is always just a 32-byte table, and we
2673*22dc650dSSadaf Ebrahimi         can set isinclass from it. */
2674*22dc650dSSadaf Ebrahimi 
2675*22dc650dSSadaf Ebrahimi         if (codevalue != OP_XCLASS)
2676*22dc650dSSadaf Ebrahimi           {
2677*22dc650dSSadaf Ebrahimi           ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
2678*22dc650dSSadaf Ebrahimi           if (clen > 0)
2679*22dc650dSSadaf Ebrahimi             {
2680*22dc650dSSadaf Ebrahimi             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2681*22dc650dSSadaf Ebrahimi               ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
2682*22dc650dSSadaf Ebrahimi             }
2683*22dc650dSSadaf Ebrahimi           }
2684*22dc650dSSadaf Ebrahimi 
2685*22dc650dSSadaf Ebrahimi         /* An extended class may have a table or a list of single characters,
2686*22dc650dSSadaf Ebrahimi         ranges, or both, and it may be positive or negative. There's a
2687*22dc650dSSadaf Ebrahimi         function that sorts all this out. */
2688*22dc650dSSadaf Ebrahimi 
2689*22dc650dSSadaf Ebrahimi         else
2690*22dc650dSSadaf Ebrahimi          {
2691*22dc650dSSadaf Ebrahimi          ecode = code + GET(code, 1);
2692*22dc650dSSadaf Ebrahimi          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2693*22dc650dSSadaf Ebrahimi          }
2694*22dc650dSSadaf Ebrahimi 
2695*22dc650dSSadaf Ebrahimi         /* At this point, isinclass is set for all kinds of class, and ecode
2696*22dc650dSSadaf Ebrahimi         points to the byte after the end of the class. If there is a
2697*22dc650dSSadaf Ebrahimi         quantifier, this is where it will be. */
2698*22dc650dSSadaf Ebrahimi 
2699*22dc650dSSadaf Ebrahimi         next_state_offset = (int)(ecode - start_code);
2700*22dc650dSSadaf Ebrahimi 
2701*22dc650dSSadaf Ebrahimi         switch (*ecode)
2702*22dc650dSSadaf Ebrahimi           {
2703*22dc650dSSadaf Ebrahimi           case OP_CRSTAR:
2704*22dc650dSSadaf Ebrahimi           case OP_CRMINSTAR:
2705*22dc650dSSadaf Ebrahimi           case OP_CRPOSSTAR:
2706*22dc650dSSadaf Ebrahimi           ADD_ACTIVE(next_state_offset + 1, 0);
2707*22dc650dSSadaf Ebrahimi           if (isinclass)
2708*22dc650dSSadaf Ebrahimi             {
2709*22dc650dSSadaf Ebrahimi             if (*ecode == OP_CRPOSSTAR)
2710*22dc650dSSadaf Ebrahimi               {
2711*22dc650dSSadaf Ebrahimi               active_count--;           /* Remove non-match possibility */
2712*22dc650dSSadaf Ebrahimi               next_active_state--;
2713*22dc650dSSadaf Ebrahimi               }
2714*22dc650dSSadaf Ebrahimi             ADD_NEW(state_offset, 0);
2715*22dc650dSSadaf Ebrahimi             }
2716*22dc650dSSadaf Ebrahimi           break;
2717*22dc650dSSadaf Ebrahimi 
2718*22dc650dSSadaf Ebrahimi           case OP_CRPLUS:
2719*22dc650dSSadaf Ebrahimi           case OP_CRMINPLUS:
2720*22dc650dSSadaf Ebrahimi           case OP_CRPOSPLUS:
2721*22dc650dSSadaf Ebrahimi           count = current_state->count;  /* Already matched */
2722*22dc650dSSadaf Ebrahimi           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2723*22dc650dSSadaf Ebrahimi           if (isinclass)
2724*22dc650dSSadaf Ebrahimi             {
2725*22dc650dSSadaf Ebrahimi             if (count > 0 && *ecode == OP_CRPOSPLUS)
2726*22dc650dSSadaf Ebrahimi               {
2727*22dc650dSSadaf Ebrahimi               active_count--;           /* Remove non-match possibility */
2728*22dc650dSSadaf Ebrahimi               next_active_state--;
2729*22dc650dSSadaf Ebrahimi               }
2730*22dc650dSSadaf Ebrahimi             count++;
2731*22dc650dSSadaf Ebrahimi             ADD_NEW(state_offset, count);
2732*22dc650dSSadaf Ebrahimi             }
2733*22dc650dSSadaf Ebrahimi           break;
2734*22dc650dSSadaf Ebrahimi 
2735*22dc650dSSadaf Ebrahimi           case OP_CRQUERY:
2736*22dc650dSSadaf Ebrahimi           case OP_CRMINQUERY:
2737*22dc650dSSadaf Ebrahimi           case OP_CRPOSQUERY:
2738*22dc650dSSadaf Ebrahimi           ADD_ACTIVE(next_state_offset + 1, 0);
2739*22dc650dSSadaf Ebrahimi           if (isinclass)
2740*22dc650dSSadaf Ebrahimi             {
2741*22dc650dSSadaf Ebrahimi             if (*ecode == OP_CRPOSQUERY)
2742*22dc650dSSadaf Ebrahimi               {
2743*22dc650dSSadaf Ebrahimi               active_count--;           /* Remove non-match possibility */
2744*22dc650dSSadaf Ebrahimi               next_active_state--;
2745*22dc650dSSadaf Ebrahimi               }
2746*22dc650dSSadaf Ebrahimi             ADD_NEW(next_state_offset + 1, 0);
2747*22dc650dSSadaf Ebrahimi             }
2748*22dc650dSSadaf Ebrahimi           break;
2749*22dc650dSSadaf Ebrahimi 
2750*22dc650dSSadaf Ebrahimi           case OP_CRRANGE:
2751*22dc650dSSadaf Ebrahimi           case OP_CRMINRANGE:
2752*22dc650dSSadaf Ebrahimi           case OP_CRPOSRANGE:
2753*22dc650dSSadaf Ebrahimi           count = current_state->count;  /* Already matched */
2754*22dc650dSSadaf Ebrahimi           if (count >= (int)GET2(ecode, 1))
2755*22dc650dSSadaf Ebrahimi             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2756*22dc650dSSadaf Ebrahimi           if (isinclass)
2757*22dc650dSSadaf Ebrahimi             {
2758*22dc650dSSadaf Ebrahimi             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2759*22dc650dSSadaf Ebrahimi 
2760*22dc650dSSadaf Ebrahimi             if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
2761*22dc650dSSadaf Ebrahimi               {
2762*22dc650dSSadaf Ebrahimi               active_count--;           /* Remove non-match possibility */
2763*22dc650dSSadaf Ebrahimi               next_active_state--;
2764*22dc650dSSadaf Ebrahimi               }
2765*22dc650dSSadaf Ebrahimi 
2766*22dc650dSSadaf Ebrahimi             if (++count >= max && max != 0)   /* Max 0 => no limit */
2767*22dc650dSSadaf Ebrahimi               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2768*22dc650dSSadaf Ebrahimi             else
2769*22dc650dSSadaf Ebrahimi               { ADD_NEW(state_offset, count); }
2770*22dc650dSSadaf Ebrahimi             }
2771*22dc650dSSadaf Ebrahimi           break;
2772*22dc650dSSadaf Ebrahimi 
2773*22dc650dSSadaf Ebrahimi           default:
2774*22dc650dSSadaf Ebrahimi           if (isinclass) { ADD_NEW(next_state_offset, 0); }
2775*22dc650dSSadaf Ebrahimi           break;
2776*22dc650dSSadaf Ebrahimi           }
2777*22dc650dSSadaf Ebrahimi         }
2778*22dc650dSSadaf Ebrahimi       break;
2779*22dc650dSSadaf Ebrahimi 
2780*22dc650dSSadaf Ebrahimi /* ========================================================================== */
2781*22dc650dSSadaf Ebrahimi       /* These are the opcodes for fancy brackets of various kinds. We have
2782*22dc650dSSadaf Ebrahimi       to use recursion in order to handle them. The "always failing" assertion
2783*22dc650dSSadaf Ebrahimi       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2784*22dc650dSSadaf Ebrahimi       though the other "backtracking verbs" are not supported. */
2785*22dc650dSSadaf Ebrahimi 
2786*22dc650dSSadaf Ebrahimi       case OP_FAIL:
2787*22dc650dSSadaf Ebrahimi       forced_fail++;    /* Count FAILs for multiple states */
2788*22dc650dSSadaf Ebrahimi       break;
2789*22dc650dSSadaf Ebrahimi 
2790*22dc650dSSadaf Ebrahimi       case OP_ASSERT:
2791*22dc650dSSadaf Ebrahimi       case OP_ASSERT_NOT:
2792*22dc650dSSadaf Ebrahimi       case OP_ASSERTBACK:
2793*22dc650dSSadaf Ebrahimi       case OP_ASSERTBACK_NOT:
2794*22dc650dSSadaf Ebrahimi         {
2795*22dc650dSSadaf Ebrahimi         int rc;
2796*22dc650dSSadaf Ebrahimi         int *local_workspace;
2797*22dc650dSSadaf Ebrahimi         PCRE2_SIZE *local_offsets;
2798*22dc650dSSadaf Ebrahimi         PCRE2_SPTR endasscode = code + GET(code, 1);
2799*22dc650dSSadaf Ebrahimi         RWS_anchor *rws = (RWS_anchor *)RWS;
2800*22dc650dSSadaf Ebrahimi 
2801*22dc650dSSadaf Ebrahimi         if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
2802*22dc650dSSadaf Ebrahimi           {
2803*22dc650dSSadaf Ebrahimi           rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
2804*22dc650dSSadaf Ebrahimi           if (rc != 0) return rc;
2805*22dc650dSSadaf Ebrahimi           RWS = (int *)rws;
2806*22dc650dSSadaf Ebrahimi           }
2807*22dc650dSSadaf Ebrahimi 
2808*22dc650dSSadaf Ebrahimi         local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2809*22dc650dSSadaf Ebrahimi         local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
2810*22dc650dSSadaf Ebrahimi         rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
2811*22dc650dSSadaf Ebrahimi 
2812*22dc650dSSadaf Ebrahimi         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2813*22dc650dSSadaf Ebrahimi 
2814*22dc650dSSadaf Ebrahimi         rc = internal_dfa_match(
2815*22dc650dSSadaf Ebrahimi           mb,                                   /* static match data */
2816*22dc650dSSadaf Ebrahimi           code,                                 /* this subexpression's code */
2817*22dc650dSSadaf Ebrahimi           ptr,                                  /* where we currently are */
2818*22dc650dSSadaf Ebrahimi           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
2819*22dc650dSSadaf Ebrahimi           local_offsets,                        /* offset vector */
2820*22dc650dSSadaf Ebrahimi           RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
2821*22dc650dSSadaf Ebrahimi           local_workspace,                      /* workspace vector */
2822*22dc650dSSadaf Ebrahimi           RWS_RSIZE,                            /* size of same */
2823*22dc650dSSadaf Ebrahimi           rlevel,                               /* function recursion level */
2824*22dc650dSSadaf Ebrahimi           RWS);                                 /* recursion workspace */
2825*22dc650dSSadaf Ebrahimi 
2826*22dc650dSSadaf Ebrahimi         rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
2827*22dc650dSSadaf Ebrahimi 
2828*22dc650dSSadaf Ebrahimi         if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
2829*22dc650dSSadaf Ebrahimi         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2830*22dc650dSSadaf Ebrahimi             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2831*22dc650dSSadaf Ebrahimi         }
2832*22dc650dSSadaf Ebrahimi       break;
2833*22dc650dSSadaf Ebrahimi 
2834*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2835*22dc650dSSadaf Ebrahimi       case OP_COND:
2836*22dc650dSSadaf Ebrahimi       case OP_SCOND:
2837*22dc650dSSadaf Ebrahimi         {
2838*22dc650dSSadaf Ebrahimi         int codelink = (int)GET(code, 1);
2839*22dc650dSSadaf Ebrahimi         PCRE2_UCHAR condcode;
2840*22dc650dSSadaf Ebrahimi 
2841*22dc650dSSadaf Ebrahimi         /* Because of the way auto-callout works during compile, a callout item
2842*22dc650dSSadaf Ebrahimi         is inserted between OP_COND and an assertion condition. This does not
2843*22dc650dSSadaf Ebrahimi         happen for the other conditions. */
2844*22dc650dSSadaf Ebrahimi 
2845*22dc650dSSadaf Ebrahimi         if (code[LINK_SIZE + 1] == OP_CALLOUT
2846*22dc650dSSadaf Ebrahimi             || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
2847*22dc650dSSadaf Ebrahimi           {
2848*22dc650dSSadaf Ebrahimi           PCRE2_SIZE callout_length;
2849*22dc650dSSadaf Ebrahimi           rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
2850*22dc650dSSadaf Ebrahimi             1 + LINK_SIZE, &callout_length);
2851*22dc650dSSadaf Ebrahimi           if (rrc < 0) return rrc;                 /* Abandon */
2852*22dc650dSSadaf Ebrahimi           if (rrc > 0) break;                      /* Fail this thread */
2853*22dc650dSSadaf Ebrahimi           code += callout_length;                  /* Skip callout data */
2854*22dc650dSSadaf Ebrahimi           }
2855*22dc650dSSadaf Ebrahimi 
2856*22dc650dSSadaf Ebrahimi         condcode = code[LINK_SIZE+1];
2857*22dc650dSSadaf Ebrahimi 
2858*22dc650dSSadaf Ebrahimi         /* Back reference conditions and duplicate named recursion conditions
2859*22dc650dSSadaf Ebrahimi         are not supported */
2860*22dc650dSSadaf Ebrahimi 
2861*22dc650dSSadaf Ebrahimi         if (condcode == OP_CREF || condcode == OP_DNCREF ||
2862*22dc650dSSadaf Ebrahimi             condcode == OP_DNRREF)
2863*22dc650dSSadaf Ebrahimi           return PCRE2_ERROR_DFA_UCOND;
2864*22dc650dSSadaf Ebrahimi 
2865*22dc650dSSadaf Ebrahimi         /* The DEFINE condition is always false, and the assertion (?!) is
2866*22dc650dSSadaf Ebrahimi         converted to OP_FAIL. */
2867*22dc650dSSadaf Ebrahimi 
2868*22dc650dSSadaf Ebrahimi         if (condcode == OP_FALSE || condcode == OP_FAIL)
2869*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2870*22dc650dSSadaf Ebrahimi 
2871*22dc650dSSadaf Ebrahimi         /* There is also an always-true condition */
2872*22dc650dSSadaf Ebrahimi 
2873*22dc650dSSadaf Ebrahimi         else if (condcode == OP_TRUE)
2874*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
2875*22dc650dSSadaf Ebrahimi 
2876*22dc650dSSadaf Ebrahimi         /* The only supported version of OP_RREF is for the value RREF_ANY,
2877*22dc650dSSadaf Ebrahimi         which means "test if in any recursion". We can't test for specifically
2878*22dc650dSSadaf Ebrahimi         recursed groups. */
2879*22dc650dSSadaf Ebrahimi 
2880*22dc650dSSadaf Ebrahimi         else if (condcode == OP_RREF)
2881*22dc650dSSadaf Ebrahimi           {
2882*22dc650dSSadaf Ebrahimi           unsigned int value = GET2(code, LINK_SIZE + 2);
2883*22dc650dSSadaf Ebrahimi           if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
2884*22dc650dSSadaf Ebrahimi           if (mb->recursive != NULL)
2885*22dc650dSSadaf Ebrahimi             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2886*22dc650dSSadaf Ebrahimi           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2887*22dc650dSSadaf Ebrahimi           }
2888*22dc650dSSadaf Ebrahimi 
2889*22dc650dSSadaf Ebrahimi         /* Otherwise, the condition is an assertion */
2890*22dc650dSSadaf Ebrahimi 
2891*22dc650dSSadaf Ebrahimi         else
2892*22dc650dSSadaf Ebrahimi           {
2893*22dc650dSSadaf Ebrahimi           int rc;
2894*22dc650dSSadaf Ebrahimi           int *local_workspace;
2895*22dc650dSSadaf Ebrahimi           PCRE2_SIZE *local_offsets;
2896*22dc650dSSadaf Ebrahimi           PCRE2_SPTR asscode = code + LINK_SIZE + 1;
2897*22dc650dSSadaf Ebrahimi           PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
2898*22dc650dSSadaf Ebrahimi           RWS_anchor *rws = (RWS_anchor *)RWS;
2899*22dc650dSSadaf Ebrahimi 
2900*22dc650dSSadaf Ebrahimi           if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
2901*22dc650dSSadaf Ebrahimi             {
2902*22dc650dSSadaf Ebrahimi             rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
2903*22dc650dSSadaf Ebrahimi             if (rc != 0) return rc;
2904*22dc650dSSadaf Ebrahimi             RWS = (int *)rws;
2905*22dc650dSSadaf Ebrahimi             }
2906*22dc650dSSadaf Ebrahimi 
2907*22dc650dSSadaf Ebrahimi           local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2908*22dc650dSSadaf Ebrahimi           local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
2909*22dc650dSSadaf Ebrahimi           rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
2910*22dc650dSSadaf Ebrahimi 
2911*22dc650dSSadaf Ebrahimi           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2912*22dc650dSSadaf Ebrahimi 
2913*22dc650dSSadaf Ebrahimi           rc = internal_dfa_match(
2914*22dc650dSSadaf Ebrahimi             mb,                                   /* fixed match data */
2915*22dc650dSSadaf Ebrahimi             asscode,                              /* this subexpression's code */
2916*22dc650dSSadaf Ebrahimi             ptr,                                  /* where we currently are */
2917*22dc650dSSadaf Ebrahimi             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
2918*22dc650dSSadaf Ebrahimi             local_offsets,                        /* offset vector */
2919*22dc650dSSadaf Ebrahimi             RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
2920*22dc650dSSadaf Ebrahimi             local_workspace,                      /* workspace vector */
2921*22dc650dSSadaf Ebrahimi             RWS_RSIZE,                            /* size of same */
2922*22dc650dSSadaf Ebrahimi             rlevel,                               /* function recursion level */
2923*22dc650dSSadaf Ebrahimi             RWS);                                 /* recursion workspace */
2924*22dc650dSSadaf Ebrahimi 
2925*22dc650dSSadaf Ebrahimi           rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
2926*22dc650dSSadaf Ebrahimi 
2927*22dc650dSSadaf Ebrahimi           if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
2928*22dc650dSSadaf Ebrahimi           if ((rc >= 0) ==
2929*22dc650dSSadaf Ebrahimi                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2930*22dc650dSSadaf Ebrahimi             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2931*22dc650dSSadaf Ebrahimi           else
2932*22dc650dSSadaf Ebrahimi             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2933*22dc650dSSadaf Ebrahimi           }
2934*22dc650dSSadaf Ebrahimi         }
2935*22dc650dSSadaf Ebrahimi       break;
2936*22dc650dSSadaf Ebrahimi 
2937*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
2938*22dc650dSSadaf Ebrahimi       case OP_RECURSE:
2939*22dc650dSSadaf Ebrahimi         {
2940*22dc650dSSadaf Ebrahimi         int rc;
2941*22dc650dSSadaf Ebrahimi         int *local_workspace;
2942*22dc650dSSadaf Ebrahimi         PCRE2_SIZE *local_offsets;
2943*22dc650dSSadaf Ebrahimi         RWS_anchor *rws = (RWS_anchor *)RWS;
2944*22dc650dSSadaf Ebrahimi         PCRE2_SPTR callpat = start_code + GET(code, 1);
2945*22dc650dSSadaf Ebrahimi         uint32_t recno = (callpat == mb->start_code)? 0 :
2946*22dc650dSSadaf Ebrahimi           GET2(callpat, 1 + LINK_SIZE);
2947*22dc650dSSadaf Ebrahimi 
2948*22dc650dSSadaf Ebrahimi         if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
2949*22dc650dSSadaf Ebrahimi           {
2950*22dc650dSSadaf Ebrahimi           rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
2951*22dc650dSSadaf Ebrahimi           if (rc != 0) return rc;
2952*22dc650dSSadaf Ebrahimi           RWS = (int *)rws;
2953*22dc650dSSadaf Ebrahimi           }
2954*22dc650dSSadaf Ebrahimi 
2955*22dc650dSSadaf Ebrahimi         local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2956*22dc650dSSadaf Ebrahimi         local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
2957*22dc650dSSadaf Ebrahimi         rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
2958*22dc650dSSadaf Ebrahimi 
2959*22dc650dSSadaf Ebrahimi         /* Check for repeating a recursion without advancing the subject
2960*22dc650dSSadaf Ebrahimi         pointer or last used character. This should catch convoluted mutual
2961*22dc650dSSadaf Ebrahimi         recursions. (Some simple cases are caught at compile time.) */
2962*22dc650dSSadaf Ebrahimi 
2963*22dc650dSSadaf Ebrahimi         for (dfa_recursion_info *ri = mb->recursive;
2964*22dc650dSSadaf Ebrahimi              ri != NULL;
2965*22dc650dSSadaf Ebrahimi              ri = ri->prevrec)
2966*22dc650dSSadaf Ebrahimi           {
2967*22dc650dSSadaf Ebrahimi           if (recno == ri->group_num && ptr == ri->subject_position &&
2968*22dc650dSSadaf Ebrahimi               mb->last_used_ptr == ri->last_used_ptr)
2969*22dc650dSSadaf Ebrahimi             return PCRE2_ERROR_RECURSELOOP;
2970*22dc650dSSadaf Ebrahimi           }
2971*22dc650dSSadaf Ebrahimi 
2972*22dc650dSSadaf Ebrahimi         /* Remember this recursion and where we started it so as to
2973*22dc650dSSadaf Ebrahimi         catch infinite loops. */
2974*22dc650dSSadaf Ebrahimi 
2975*22dc650dSSadaf Ebrahimi         new_recursive.group_num = recno;
2976*22dc650dSSadaf Ebrahimi         new_recursive.subject_position = ptr;
2977*22dc650dSSadaf Ebrahimi         new_recursive.last_used_ptr = mb->last_used_ptr;
2978*22dc650dSSadaf Ebrahimi         new_recursive.prevrec = mb->recursive;
2979*22dc650dSSadaf Ebrahimi         mb->recursive = &new_recursive;
2980*22dc650dSSadaf Ebrahimi 
2981*22dc650dSSadaf Ebrahimi         rc = internal_dfa_match(
2982*22dc650dSSadaf Ebrahimi           mb,                                   /* fixed match data */
2983*22dc650dSSadaf Ebrahimi           callpat,                              /* this subexpression's code */
2984*22dc650dSSadaf Ebrahimi           ptr,                                  /* where we currently are */
2985*22dc650dSSadaf Ebrahimi           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
2986*22dc650dSSadaf Ebrahimi           local_offsets,                        /* offset vector */
2987*22dc650dSSadaf Ebrahimi           RWS_OVEC_RSIZE/OVEC_UNIT,             /* size of same */
2988*22dc650dSSadaf Ebrahimi           local_workspace,                      /* workspace vector */
2989*22dc650dSSadaf Ebrahimi           RWS_RSIZE,                            /* size of same */
2990*22dc650dSSadaf Ebrahimi           rlevel,                               /* function recursion level */
2991*22dc650dSSadaf Ebrahimi           RWS);                                 /* recursion workspace */
2992*22dc650dSSadaf Ebrahimi 
2993*22dc650dSSadaf Ebrahimi         rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
2994*22dc650dSSadaf Ebrahimi         mb->recursive = new_recursive.prevrec;  /* Done this recursion */
2995*22dc650dSSadaf Ebrahimi 
2996*22dc650dSSadaf Ebrahimi         /* Ran out of internal offsets */
2997*22dc650dSSadaf Ebrahimi 
2998*22dc650dSSadaf Ebrahimi         if (rc == 0) return PCRE2_ERROR_DFA_RECURSE;
2999*22dc650dSSadaf Ebrahimi 
3000*22dc650dSSadaf Ebrahimi         /* For each successful matched substring, set up the next state with a
3001*22dc650dSSadaf Ebrahimi         count of characters to skip before trying it. Note that the count is in
3002*22dc650dSSadaf Ebrahimi         characters, not bytes. */
3003*22dc650dSSadaf Ebrahimi 
3004*22dc650dSSadaf Ebrahimi         if (rc > 0)
3005*22dc650dSSadaf Ebrahimi           {
3006*22dc650dSSadaf Ebrahimi           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
3007*22dc650dSSadaf Ebrahimi             {
3008*22dc650dSSadaf Ebrahimi             PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc];
3009*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3010*22dc650dSSadaf Ebrahimi             if (utf)
3011*22dc650dSSadaf Ebrahimi               {
3012*22dc650dSSadaf Ebrahimi               PCRE2_SPTR p = start_subject + local_offsets[rc];
3013*22dc650dSSadaf Ebrahimi               PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
3014*22dc650dSSadaf Ebrahimi               while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
3015*22dc650dSSadaf Ebrahimi               }
3016*22dc650dSSadaf Ebrahimi #endif
3017*22dc650dSSadaf Ebrahimi             if (charcount > 0)
3018*22dc650dSSadaf Ebrahimi               {
3019*22dc650dSSadaf Ebrahimi               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0,
3020*22dc650dSSadaf Ebrahimi                 (int)(charcount - 1));
3021*22dc650dSSadaf Ebrahimi               }
3022*22dc650dSSadaf Ebrahimi             else
3023*22dc650dSSadaf Ebrahimi               {
3024*22dc650dSSadaf Ebrahimi               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
3025*22dc650dSSadaf Ebrahimi               }
3026*22dc650dSSadaf Ebrahimi             }
3027*22dc650dSSadaf Ebrahimi           }
3028*22dc650dSSadaf Ebrahimi         else if (rc != PCRE2_ERROR_NOMATCH) return rc;
3029*22dc650dSSadaf Ebrahimi         }
3030*22dc650dSSadaf Ebrahimi       break;
3031*22dc650dSSadaf Ebrahimi 
3032*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
3033*22dc650dSSadaf Ebrahimi       case OP_BRAPOS:
3034*22dc650dSSadaf Ebrahimi       case OP_SBRAPOS:
3035*22dc650dSSadaf Ebrahimi       case OP_CBRAPOS:
3036*22dc650dSSadaf Ebrahimi       case OP_SCBRAPOS:
3037*22dc650dSSadaf Ebrahimi       case OP_BRAPOSZERO:
3038*22dc650dSSadaf Ebrahimi         {
3039*22dc650dSSadaf Ebrahimi         int rc;
3040*22dc650dSSadaf Ebrahimi         int *local_workspace;
3041*22dc650dSSadaf Ebrahimi         PCRE2_SIZE *local_offsets;
3042*22dc650dSSadaf Ebrahimi         PCRE2_SIZE charcount, matched_count;
3043*22dc650dSSadaf Ebrahimi         PCRE2_SPTR local_ptr = ptr;
3044*22dc650dSSadaf Ebrahimi         RWS_anchor *rws = (RWS_anchor *)RWS;
3045*22dc650dSSadaf Ebrahimi         BOOL allow_zero;
3046*22dc650dSSadaf Ebrahimi 
3047*22dc650dSSadaf Ebrahimi         if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
3048*22dc650dSSadaf Ebrahimi           {
3049*22dc650dSSadaf Ebrahimi           rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
3050*22dc650dSSadaf Ebrahimi           if (rc != 0) return rc;
3051*22dc650dSSadaf Ebrahimi           RWS = (int *)rws;
3052*22dc650dSSadaf Ebrahimi           }
3053*22dc650dSSadaf Ebrahimi 
3054*22dc650dSSadaf Ebrahimi         local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
3055*22dc650dSSadaf Ebrahimi         local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
3056*22dc650dSSadaf Ebrahimi         rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
3057*22dc650dSSadaf Ebrahimi 
3058*22dc650dSSadaf Ebrahimi         if (codevalue == OP_BRAPOSZERO)
3059*22dc650dSSadaf Ebrahimi           {
3060*22dc650dSSadaf Ebrahimi           allow_zero = TRUE;
3061*22dc650dSSadaf Ebrahimi           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
3062*22dc650dSSadaf Ebrahimi           }
3063*22dc650dSSadaf Ebrahimi         else allow_zero = FALSE;
3064*22dc650dSSadaf Ebrahimi 
3065*22dc650dSSadaf Ebrahimi         /* Loop to match the subpattern as many times as possible as if it were
3066*22dc650dSSadaf Ebrahimi         a complete pattern. */
3067*22dc650dSSadaf Ebrahimi 
3068*22dc650dSSadaf Ebrahimi         for (matched_count = 0;; matched_count++)
3069*22dc650dSSadaf Ebrahimi           {
3070*22dc650dSSadaf Ebrahimi           rc = internal_dfa_match(
3071*22dc650dSSadaf Ebrahimi             mb,                                   /* fixed match data */
3072*22dc650dSSadaf Ebrahimi             code,                                 /* this subexpression's code */
3073*22dc650dSSadaf Ebrahimi             local_ptr,                            /* where we currently are */
3074*22dc650dSSadaf Ebrahimi             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
3075*22dc650dSSadaf Ebrahimi             local_offsets,                        /* offset vector */
3076*22dc650dSSadaf Ebrahimi             RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
3077*22dc650dSSadaf Ebrahimi             local_workspace,                      /* workspace vector */
3078*22dc650dSSadaf Ebrahimi             RWS_RSIZE,                            /* size of same */
3079*22dc650dSSadaf Ebrahimi             rlevel,                               /* function recursion level */
3080*22dc650dSSadaf Ebrahimi             RWS);                                 /* recursion workspace */
3081*22dc650dSSadaf Ebrahimi 
3082*22dc650dSSadaf Ebrahimi           /* Failed to match */
3083*22dc650dSSadaf Ebrahimi 
3084*22dc650dSSadaf Ebrahimi           if (rc < 0)
3085*22dc650dSSadaf Ebrahimi             {
3086*22dc650dSSadaf Ebrahimi             if (rc != PCRE2_ERROR_NOMATCH) return rc;
3087*22dc650dSSadaf Ebrahimi             break;
3088*22dc650dSSadaf Ebrahimi             }
3089*22dc650dSSadaf Ebrahimi 
3090*22dc650dSSadaf Ebrahimi           /* Matched: break the loop if zero characters matched. */
3091*22dc650dSSadaf Ebrahimi 
3092*22dc650dSSadaf Ebrahimi           charcount = local_offsets[1] - local_offsets[0];
3093*22dc650dSSadaf Ebrahimi           if (charcount == 0) break;
3094*22dc650dSSadaf Ebrahimi           local_ptr += charcount;    /* Advance temporary position ptr */
3095*22dc650dSSadaf Ebrahimi           }
3096*22dc650dSSadaf Ebrahimi 
3097*22dc650dSSadaf Ebrahimi         rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
3098*22dc650dSSadaf Ebrahimi 
3099*22dc650dSSadaf Ebrahimi         /* At this point we have matched the subpattern matched_count
3100*22dc650dSSadaf Ebrahimi         times, and local_ptr is pointing to the character after the end of the
3101*22dc650dSSadaf Ebrahimi         last match. */
3102*22dc650dSSadaf Ebrahimi 
3103*22dc650dSSadaf Ebrahimi         if (matched_count > 0 || allow_zero)
3104*22dc650dSSadaf Ebrahimi           {
3105*22dc650dSSadaf Ebrahimi           PCRE2_SPTR end_subpattern = code;
3106*22dc650dSSadaf Ebrahimi           int next_state_offset;
3107*22dc650dSSadaf Ebrahimi 
3108*22dc650dSSadaf Ebrahimi           do { end_subpattern += GET(end_subpattern, 1); }
3109*22dc650dSSadaf Ebrahimi             while (*end_subpattern == OP_ALT);
3110*22dc650dSSadaf Ebrahimi           next_state_offset =
3111*22dc650dSSadaf Ebrahimi             (int)(end_subpattern - start_code + LINK_SIZE + 1);
3112*22dc650dSSadaf Ebrahimi 
3113*22dc650dSSadaf Ebrahimi           /* Optimization: if there are no more active states, and there
3114*22dc650dSSadaf Ebrahimi           are no new states yet set up, then skip over the subject string
3115*22dc650dSSadaf Ebrahimi           right here, to save looping. Otherwise, set up the new state to swing
3116*22dc650dSSadaf Ebrahimi           into action when the end of the matched substring is reached. */
3117*22dc650dSSadaf Ebrahimi 
3118*22dc650dSSadaf Ebrahimi           if (i + 1 >= active_count && new_count == 0)
3119*22dc650dSSadaf Ebrahimi             {
3120*22dc650dSSadaf Ebrahimi             ptr = local_ptr;
3121*22dc650dSSadaf Ebrahimi             clen = 0;
3122*22dc650dSSadaf Ebrahimi             ADD_NEW(next_state_offset, 0);
3123*22dc650dSSadaf Ebrahimi             }
3124*22dc650dSSadaf Ebrahimi           else
3125*22dc650dSSadaf Ebrahimi             {
3126*22dc650dSSadaf Ebrahimi             PCRE2_SPTR p = ptr;
3127*22dc650dSSadaf Ebrahimi             PCRE2_SPTR pp = local_ptr;
3128*22dc650dSSadaf Ebrahimi             charcount = (PCRE2_SIZE)(pp - p);
3129*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3130*22dc650dSSadaf Ebrahimi             if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
3131*22dc650dSSadaf Ebrahimi #endif
3132*22dc650dSSadaf Ebrahimi             ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
3133*22dc650dSSadaf Ebrahimi             }
3134*22dc650dSSadaf Ebrahimi           }
3135*22dc650dSSadaf Ebrahimi         }
3136*22dc650dSSadaf Ebrahimi       break;
3137*22dc650dSSadaf Ebrahimi 
3138*22dc650dSSadaf Ebrahimi       /*-----------------------------------------------------------------*/
3139*22dc650dSSadaf Ebrahimi       case OP_ONCE:
3140*22dc650dSSadaf Ebrahimi         {
3141*22dc650dSSadaf Ebrahimi         int rc;
3142*22dc650dSSadaf Ebrahimi         int *local_workspace;
3143*22dc650dSSadaf Ebrahimi         PCRE2_SIZE *local_offsets;
3144*22dc650dSSadaf Ebrahimi         RWS_anchor *rws = (RWS_anchor *)RWS;
3145*22dc650dSSadaf Ebrahimi 
3146*22dc650dSSadaf Ebrahimi         if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
3147*22dc650dSSadaf Ebrahimi           {
3148*22dc650dSSadaf Ebrahimi           rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
3149*22dc650dSSadaf Ebrahimi           if (rc != 0) return rc;
3150*22dc650dSSadaf Ebrahimi           RWS = (int *)rws;
3151*22dc650dSSadaf Ebrahimi           }
3152*22dc650dSSadaf Ebrahimi 
3153*22dc650dSSadaf Ebrahimi         local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
3154*22dc650dSSadaf Ebrahimi         local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
3155*22dc650dSSadaf Ebrahimi         rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
3156*22dc650dSSadaf Ebrahimi 
3157*22dc650dSSadaf Ebrahimi         rc = internal_dfa_match(
3158*22dc650dSSadaf Ebrahimi           mb,                                   /* fixed match data */
3159*22dc650dSSadaf Ebrahimi           code,                                 /* this subexpression's code */
3160*22dc650dSSadaf Ebrahimi           ptr,                                  /* where we currently are */
3161*22dc650dSSadaf Ebrahimi           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
3162*22dc650dSSadaf Ebrahimi           local_offsets,                        /* offset vector */
3163*22dc650dSSadaf Ebrahimi           RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
3164*22dc650dSSadaf Ebrahimi           local_workspace,                      /* workspace vector */
3165*22dc650dSSadaf Ebrahimi           RWS_RSIZE,                            /* size of same */
3166*22dc650dSSadaf Ebrahimi           rlevel,                               /* function recursion level */
3167*22dc650dSSadaf Ebrahimi           RWS);                                 /* recursion workspace */
3168*22dc650dSSadaf Ebrahimi 
3169*22dc650dSSadaf Ebrahimi         rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
3170*22dc650dSSadaf Ebrahimi 
3171*22dc650dSSadaf Ebrahimi         if (rc >= 0)
3172*22dc650dSSadaf Ebrahimi           {
3173*22dc650dSSadaf Ebrahimi           PCRE2_SPTR end_subpattern = code;
3174*22dc650dSSadaf Ebrahimi           PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0];
3175*22dc650dSSadaf Ebrahimi           int next_state_offset, repeat_state_offset;
3176*22dc650dSSadaf Ebrahimi 
3177*22dc650dSSadaf Ebrahimi           do { end_subpattern += GET(end_subpattern, 1); }
3178*22dc650dSSadaf Ebrahimi             while (*end_subpattern == OP_ALT);
3179*22dc650dSSadaf Ebrahimi           next_state_offset =
3180*22dc650dSSadaf Ebrahimi             (int)(end_subpattern - start_code + LINK_SIZE + 1);
3181*22dc650dSSadaf Ebrahimi 
3182*22dc650dSSadaf Ebrahimi           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
3183*22dc650dSSadaf Ebrahimi           arrange for the repeat state also to be added to the relevant list.
3184*22dc650dSSadaf Ebrahimi           Calculate the offset, or set -1 for no repeat. */
3185*22dc650dSSadaf Ebrahimi 
3186*22dc650dSSadaf Ebrahimi           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
3187*22dc650dSSadaf Ebrahimi                                  *end_subpattern == OP_KETRMIN)?
3188*22dc650dSSadaf Ebrahimi             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
3189*22dc650dSSadaf Ebrahimi 
3190*22dc650dSSadaf Ebrahimi           /* If we have matched an empty string, add the next state at the
3191*22dc650dSSadaf Ebrahimi           current character pointer. This is important so that the duplicate
3192*22dc650dSSadaf Ebrahimi           checking kicks in, which is what breaks infinite loops that match an
3193*22dc650dSSadaf Ebrahimi           empty string. */
3194*22dc650dSSadaf Ebrahimi 
3195*22dc650dSSadaf Ebrahimi           if (charcount == 0)
3196*22dc650dSSadaf Ebrahimi             {
3197*22dc650dSSadaf Ebrahimi             ADD_ACTIVE(next_state_offset, 0);
3198*22dc650dSSadaf Ebrahimi             }
3199*22dc650dSSadaf Ebrahimi 
3200*22dc650dSSadaf Ebrahimi           /* Optimization: if there are no more active states, and there
3201*22dc650dSSadaf Ebrahimi           are no new states yet set up, then skip over the subject string
3202*22dc650dSSadaf Ebrahimi           right here, to save looping. Otherwise, set up the new state to swing
3203*22dc650dSSadaf Ebrahimi           into action when the end of the matched substring is reached. */
3204*22dc650dSSadaf Ebrahimi 
3205*22dc650dSSadaf Ebrahimi           else if (i + 1 >= active_count && new_count == 0)
3206*22dc650dSSadaf Ebrahimi             {
3207*22dc650dSSadaf Ebrahimi             ptr += charcount;
3208*22dc650dSSadaf Ebrahimi             clen = 0;
3209*22dc650dSSadaf Ebrahimi             ADD_NEW(next_state_offset, 0);
3210*22dc650dSSadaf Ebrahimi 
3211*22dc650dSSadaf Ebrahimi             /* If we are adding a repeat state at the new character position,
3212*22dc650dSSadaf Ebrahimi             we must fudge things so that it is the only current state.
3213*22dc650dSSadaf Ebrahimi             Otherwise, it might be a duplicate of one we processed before, and
3214*22dc650dSSadaf Ebrahimi             that would cause it to be skipped. */
3215*22dc650dSSadaf Ebrahimi 
3216*22dc650dSSadaf Ebrahimi             if (repeat_state_offset >= 0)
3217*22dc650dSSadaf Ebrahimi               {
3218*22dc650dSSadaf Ebrahimi               next_active_state = active_states;
3219*22dc650dSSadaf Ebrahimi               active_count = 0;
3220*22dc650dSSadaf Ebrahimi               i = -1;
3221*22dc650dSSadaf Ebrahimi               ADD_ACTIVE(repeat_state_offset, 0);
3222*22dc650dSSadaf Ebrahimi               }
3223*22dc650dSSadaf Ebrahimi             }
3224*22dc650dSSadaf Ebrahimi           else
3225*22dc650dSSadaf Ebrahimi             {
3226*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3227*22dc650dSSadaf Ebrahimi             if (utf)
3228*22dc650dSSadaf Ebrahimi               {
3229*22dc650dSSadaf Ebrahimi               PCRE2_SPTR p = start_subject + local_offsets[0];
3230*22dc650dSSadaf Ebrahimi               PCRE2_SPTR pp = start_subject + local_offsets[1];
3231*22dc650dSSadaf Ebrahimi               while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
3232*22dc650dSSadaf Ebrahimi               }
3233*22dc650dSSadaf Ebrahimi #endif
3234*22dc650dSSadaf Ebrahimi             ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
3235*22dc650dSSadaf Ebrahimi             if (repeat_state_offset >= 0)
3236*22dc650dSSadaf Ebrahimi               { ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); }
3237*22dc650dSSadaf Ebrahimi             }
3238*22dc650dSSadaf Ebrahimi           }
3239*22dc650dSSadaf Ebrahimi         else if (rc != PCRE2_ERROR_NOMATCH) return rc;
3240*22dc650dSSadaf Ebrahimi         }
3241*22dc650dSSadaf Ebrahimi       break;
3242*22dc650dSSadaf Ebrahimi 
3243*22dc650dSSadaf Ebrahimi 
3244*22dc650dSSadaf Ebrahimi /* ========================================================================== */
3245*22dc650dSSadaf Ebrahimi       /* Handle callouts */
3246*22dc650dSSadaf Ebrahimi 
3247*22dc650dSSadaf Ebrahimi       case OP_CALLOUT:
3248*22dc650dSSadaf Ebrahimi       case OP_CALLOUT_STR:
3249*22dc650dSSadaf Ebrahimi         {
3250*22dc650dSSadaf Ebrahimi         PCRE2_SIZE callout_length;
3251*22dc650dSSadaf Ebrahimi         rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
3252*22dc650dSSadaf Ebrahimi           &callout_length);
3253*22dc650dSSadaf Ebrahimi         if (rrc < 0) return rrc;   /* Abandon */
3254*22dc650dSSadaf Ebrahimi         if (rrc == 0)
3255*22dc650dSSadaf Ebrahimi           { ADD_ACTIVE(state_offset + (int)callout_length, 0); }
3256*22dc650dSSadaf Ebrahimi         }
3257*22dc650dSSadaf Ebrahimi       break;
3258*22dc650dSSadaf Ebrahimi 
3259*22dc650dSSadaf Ebrahimi 
3260*22dc650dSSadaf Ebrahimi /* ========================================================================== */
3261*22dc650dSSadaf Ebrahimi       default:        /* Unsupported opcode */
3262*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_DFA_UITEM;
3263*22dc650dSSadaf Ebrahimi       }
3264*22dc650dSSadaf Ebrahimi 
3265*22dc650dSSadaf Ebrahimi     NEXT_ACTIVE_STATE: continue;
3266*22dc650dSSadaf Ebrahimi 
3267*22dc650dSSadaf Ebrahimi     }      /* End of loop scanning active states */
3268*22dc650dSSadaf Ebrahimi 
3269*22dc650dSSadaf Ebrahimi   /* We have finished the processing at the current subject character. If no
3270*22dc650dSSadaf Ebrahimi   new states have been set for the next character, we have found all the
3271*22dc650dSSadaf Ebrahimi   matches that we are going to find. If partial matching has been requested,
3272*22dc650dSSadaf Ebrahimi   check for appropriate conditions.
3273*22dc650dSSadaf Ebrahimi 
3274*22dc650dSSadaf Ebrahimi   The "forced_ fail" variable counts the number of (*F) encountered for the
3275*22dc650dSSadaf Ebrahimi   character. If it is equal to the original active_count (saved in
3276*22dc650dSSadaf Ebrahimi   workspace[1]) it means that (*F) was found on every active state. In this
3277*22dc650dSSadaf Ebrahimi   case we don't want to give a partial match.
3278*22dc650dSSadaf Ebrahimi 
3279*22dc650dSSadaf Ebrahimi   The "could_continue" variable is true if a state could have continued but
3280*22dc650dSSadaf Ebrahimi   for the fact that the end of the subject was reached. */
3281*22dc650dSSadaf Ebrahimi 
3282*22dc650dSSadaf Ebrahimi   if (new_count <= 0)
3283*22dc650dSSadaf Ebrahimi     {
3284*22dc650dSSadaf Ebrahimi     if (could_continue &&                            /* Some could go on, and */
3285*22dc650dSSadaf Ebrahimi         forced_fail != workspace[1] &&               /* Not all forced fail & */
3286*22dc650dSSadaf Ebrahimi         (                                            /* either... */
3287*22dc650dSSadaf Ebrahimi         (mb->moptions & PCRE2_PARTIAL_HARD) != 0      /* Hard partial */
3288*22dc650dSSadaf Ebrahimi         ||                                           /* or... */
3289*22dc650dSSadaf Ebrahimi         ((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3290*22dc650dSSadaf Ebrahimi          match_count < 0)                             /* no matches */
3291*22dc650dSSadaf Ebrahimi         ) &&                                         /* And... */
3292*22dc650dSSadaf Ebrahimi         (
3293*22dc650dSSadaf Ebrahimi         partial_newline ||                   /* Either partial NL */
3294*22dc650dSSadaf Ebrahimi           (                                  /* or ... */
3295*22dc650dSSadaf Ebrahimi           ptr >= end_subject &&              /* End of subject and */
3296*22dc650dSSadaf Ebrahimi             (                                  /* either */
3297*22dc650dSSadaf Ebrahimi             ptr > mb->start_used_ptr ||        /* Inspected non-empty string */
3298*22dc650dSSadaf Ebrahimi             mb->allowemptypartial              /* or pattern has lookbehind */
3299*22dc650dSSadaf Ebrahimi             )                                  /* or could match empty */
3300*22dc650dSSadaf Ebrahimi           )
3301*22dc650dSSadaf Ebrahimi         ))
3302*22dc650dSSadaf Ebrahimi       match_count = PCRE2_ERROR_PARTIAL;
3303*22dc650dSSadaf Ebrahimi     break;  /* Exit from loop along the subject string */
3304*22dc650dSSadaf Ebrahimi     }
3305*22dc650dSSadaf Ebrahimi 
3306*22dc650dSSadaf Ebrahimi   /* One or more states are active for the next character. */
3307*22dc650dSSadaf Ebrahimi 
3308*22dc650dSSadaf Ebrahimi   ptr += clen;    /* Advance to next subject character */
3309*22dc650dSSadaf Ebrahimi   }               /* Loop to move along the subject string */
3310*22dc650dSSadaf Ebrahimi 
3311*22dc650dSSadaf Ebrahimi /* Control gets here from "break" a few lines above. If we have a match and
3312*22dc650dSSadaf Ebrahimi PCRE2_ENDANCHORED is set, the match fails. */
3313*22dc650dSSadaf Ebrahimi 
3314*22dc650dSSadaf Ebrahimi if (match_count >= 0 &&
3315*22dc650dSSadaf Ebrahimi     ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
3316*22dc650dSSadaf Ebrahimi     ptr < end_subject)
3317*22dc650dSSadaf Ebrahimi   match_count = PCRE2_ERROR_NOMATCH;
3318*22dc650dSSadaf Ebrahimi 
3319*22dc650dSSadaf Ebrahimi return match_count;
3320*22dc650dSSadaf Ebrahimi }
3321*22dc650dSSadaf Ebrahimi 
3322*22dc650dSSadaf Ebrahimi 
3323*22dc650dSSadaf Ebrahimi 
3324*22dc650dSSadaf Ebrahimi /*************************************************
3325*22dc650dSSadaf Ebrahimi *     Match a pattern using the DFA algorithm    *
3326*22dc650dSSadaf Ebrahimi *************************************************/
3327*22dc650dSSadaf Ebrahimi 
3328*22dc650dSSadaf Ebrahimi /* This function matches a compiled pattern to a subject string, using the
3329*22dc650dSSadaf Ebrahimi alternate matching algorithm that finds all matches at once.
3330*22dc650dSSadaf Ebrahimi 
3331*22dc650dSSadaf Ebrahimi Arguments:
3332*22dc650dSSadaf Ebrahimi   code          points to the compiled pattern
3333*22dc650dSSadaf Ebrahimi   subject       subject string
3334*22dc650dSSadaf Ebrahimi   length        length of subject string
3335*22dc650dSSadaf Ebrahimi   startoffset   where to start matching in the subject
3336*22dc650dSSadaf Ebrahimi   options       option bits
3337*22dc650dSSadaf Ebrahimi   match_data    points to a match data structure
3338*22dc650dSSadaf Ebrahimi   gcontext      points to a match context
3339*22dc650dSSadaf Ebrahimi   workspace     pointer to workspace
3340*22dc650dSSadaf Ebrahimi   wscount       size of workspace
3341*22dc650dSSadaf Ebrahimi 
3342*22dc650dSSadaf Ebrahimi Returns:        > 0 => number of match offset pairs placed in offsets
3343*22dc650dSSadaf Ebrahimi                 = 0 => offsets overflowed; longest matches are present
3344*22dc650dSSadaf Ebrahimi                  -1 => failed to match
3345*22dc650dSSadaf Ebrahimi                < -1 => some kind of unexpected problem
3346*22dc650dSSadaf Ebrahimi */
3347*22dc650dSSadaf Ebrahimi 
3348*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_dfa_match(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,int * workspace,PCRE2_SIZE wscount)3349*22dc650dSSadaf Ebrahimi pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
3350*22dc650dSSadaf Ebrahimi   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
3351*22dc650dSSadaf Ebrahimi   pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
3352*22dc650dSSadaf Ebrahimi {
3353*22dc650dSSadaf Ebrahimi int rc;
3354*22dc650dSSadaf Ebrahimi int was_zero_terminated = 0;
3355*22dc650dSSadaf Ebrahimi 
3356*22dc650dSSadaf Ebrahimi const pcre2_real_code *re = (const pcre2_real_code *)code;
3357*22dc650dSSadaf Ebrahimi 
3358*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_match;
3359*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject;
3360*22dc650dSSadaf Ebrahimi PCRE2_SPTR bumpalong_limit;
3361*22dc650dSSadaf Ebrahimi PCRE2_SPTR req_cu_ptr;
3362*22dc650dSSadaf Ebrahimi 
3363*22dc650dSSadaf Ebrahimi BOOL utf, anchored, startline, firstline;
3364*22dc650dSSadaf Ebrahimi BOOL has_first_cu = FALSE;
3365*22dc650dSSadaf Ebrahimi BOOL has_req_cu = FALSE;
3366*22dc650dSSadaf Ebrahimi 
3367*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3368*22dc650dSSadaf Ebrahimi PCRE2_SPTR memchr_found_first_cu = NULL;
3369*22dc650dSSadaf Ebrahimi PCRE2_SPTR memchr_found_first_cu2 = NULL;
3370*22dc650dSSadaf Ebrahimi #endif
3371*22dc650dSSadaf Ebrahimi 
3372*22dc650dSSadaf Ebrahimi PCRE2_UCHAR first_cu = 0;
3373*22dc650dSSadaf Ebrahimi PCRE2_UCHAR first_cu2 = 0;
3374*22dc650dSSadaf Ebrahimi PCRE2_UCHAR req_cu = 0;
3375*22dc650dSSadaf Ebrahimi PCRE2_UCHAR req_cu2 = 0;
3376*22dc650dSSadaf Ebrahimi 
3377*22dc650dSSadaf Ebrahimi const uint8_t *start_bits = NULL;
3378*22dc650dSSadaf Ebrahimi 
3379*22dc650dSSadaf Ebrahimi /* We need to have mb pointing to a match block, because the IS_NEWLINE macro
3380*22dc650dSSadaf Ebrahimi is used below, and it expects NLBLOCK to be defined as a pointer. */
3381*22dc650dSSadaf Ebrahimi 
3382*22dc650dSSadaf Ebrahimi pcre2_callout_block cb;
3383*22dc650dSSadaf Ebrahimi dfa_match_block actual_match_block;
3384*22dc650dSSadaf Ebrahimi dfa_match_block *mb = &actual_match_block;
3385*22dc650dSSadaf Ebrahimi 
3386*22dc650dSSadaf Ebrahimi /* Set up a starting block of memory for use during recursive calls to
3387*22dc650dSSadaf Ebrahimi internal_dfa_match(). By putting this on the stack, it minimizes resource use
3388*22dc650dSSadaf Ebrahimi in the case when it is not needed. If this is too small, more memory is
3389*22dc650dSSadaf Ebrahimi obtained from the heap. At the start of each block is an anchor structure.*/
3390*22dc650dSSadaf Ebrahimi 
3391*22dc650dSSadaf Ebrahimi int base_recursion_workspace[RWS_BASE_SIZE];
3392*22dc650dSSadaf Ebrahimi RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
3393*22dc650dSSadaf Ebrahimi rws->next = NULL;
3394*22dc650dSSadaf Ebrahimi rws->size = RWS_BASE_SIZE;
3395*22dc650dSSadaf Ebrahimi rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
3396*22dc650dSSadaf Ebrahimi 
3397*22dc650dSSadaf Ebrahimi /* Recognize NULL, length 0 as an empty string. */
3398*22dc650dSSadaf Ebrahimi 
3399*22dc650dSSadaf Ebrahimi if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
3400*22dc650dSSadaf Ebrahimi 
3401*22dc650dSSadaf Ebrahimi /* Plausibility checks */
3402*22dc650dSSadaf Ebrahimi 
3403*22dc650dSSadaf Ebrahimi if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
3404*22dc650dSSadaf Ebrahimi if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
3405*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_NULL;
3406*22dc650dSSadaf Ebrahimi 
3407*22dc650dSSadaf Ebrahimi if (length == PCRE2_ZERO_TERMINATED)
3408*22dc650dSSadaf Ebrahimi   {
3409*22dc650dSSadaf Ebrahimi   length = PRIV(strlen)(subject);
3410*22dc650dSSadaf Ebrahimi   was_zero_terminated = 1;
3411*22dc650dSSadaf Ebrahimi   }
3412*22dc650dSSadaf Ebrahimi 
3413*22dc650dSSadaf Ebrahimi if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
3414*22dc650dSSadaf Ebrahimi if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
3415*22dc650dSSadaf Ebrahimi 
3416*22dc650dSSadaf Ebrahimi /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
3417*22dc650dSSadaf Ebrahimi time. */
3418*22dc650dSSadaf Ebrahimi 
3419*22dc650dSSadaf Ebrahimi if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
3420*22dc650dSSadaf Ebrahimi    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
3421*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_BADOPTION;
3422*22dc650dSSadaf Ebrahimi 
3423*22dc650dSSadaf Ebrahimi /* Invalid UTF support is not available for DFA matching. */
3424*22dc650dSSadaf Ebrahimi 
3425*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
3426*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_DFA_UINVALID_UTF;
3427*22dc650dSSadaf Ebrahimi 
3428*22dc650dSSadaf Ebrahimi /* Check that the first field in the block is the magic number. If it is not,
3429*22dc650dSSadaf Ebrahimi return with PCRE2_ERROR_BADMAGIC. */
3430*22dc650dSSadaf Ebrahimi 
3431*22dc650dSSadaf Ebrahimi if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
3432*22dc650dSSadaf Ebrahimi 
3433*22dc650dSSadaf Ebrahimi /* Check the code unit width. */
3434*22dc650dSSadaf Ebrahimi 
3435*22dc650dSSadaf Ebrahimi if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
3436*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_BADMODE;
3437*22dc650dSSadaf Ebrahimi 
3438*22dc650dSSadaf Ebrahimi /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
3439*22dc650dSSadaf Ebrahimi options variable for this function. Users of PCRE2 who are not calling the
3440*22dc650dSSadaf Ebrahimi function directly would like to have a way of setting these flags, in the same
3441*22dc650dSSadaf Ebrahimi way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
3442*22dc650dSSadaf Ebrahimi constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
3443*22dc650dSSadaf Ebrahimi (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
3444*22dc650dSSadaf Ebrahimi transferred to the options for this function. The bits are guaranteed to be
3445*22dc650dSSadaf Ebrahimi adjacent, but do not have the same values. This bit of Boolean trickery assumes
3446*22dc650dSSadaf Ebrahimi that the match-time bits are not more significant than the flag bits. If by
3447*22dc650dSSadaf Ebrahimi accident this is not the case, a compile-time division by zero error will
3448*22dc650dSSadaf Ebrahimi occur. */
3449*22dc650dSSadaf Ebrahimi 
3450*22dc650dSSadaf Ebrahimi #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
3451*22dc650dSSadaf Ebrahimi #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
3452*22dc650dSSadaf Ebrahimi options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
3453*22dc650dSSadaf Ebrahimi #undef FF
3454*22dc650dSSadaf Ebrahimi #undef OO
3455*22dc650dSSadaf Ebrahimi 
3456*22dc650dSSadaf Ebrahimi /* If restarting after a partial match, do some sanity checks on the contents
3457*22dc650dSSadaf Ebrahimi of the workspace. */
3458*22dc650dSSadaf Ebrahimi 
3459*22dc650dSSadaf Ebrahimi if ((options & PCRE2_DFA_RESTART) != 0)
3460*22dc650dSSadaf Ebrahimi   {
3461*22dc650dSSadaf Ebrahimi   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3462*22dc650dSSadaf Ebrahimi     workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
3463*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_DFA_BADRESTART;
3464*22dc650dSSadaf Ebrahimi   }
3465*22dc650dSSadaf Ebrahimi 
3466*22dc650dSSadaf Ebrahimi /* Set some local values */
3467*22dc650dSSadaf Ebrahimi 
3468*22dc650dSSadaf Ebrahimi utf = (re->overall_options & PCRE2_UTF) != 0;
3469*22dc650dSSadaf Ebrahimi start_match = subject + start_offset;
3470*22dc650dSSadaf Ebrahimi end_subject = subject + length;
3471*22dc650dSSadaf Ebrahimi req_cu_ptr = start_match - 1;
3472*22dc650dSSadaf Ebrahimi anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
3473*22dc650dSSadaf Ebrahimi   (re->overall_options & PCRE2_ANCHORED) != 0;
3474*22dc650dSSadaf Ebrahimi 
3475*22dc650dSSadaf Ebrahimi /* The "must be at the start of a line" flags are used in a loop when finding
3476*22dc650dSSadaf Ebrahimi where to start. */
3477*22dc650dSSadaf Ebrahimi 
3478*22dc650dSSadaf Ebrahimi startline = (re->flags & PCRE2_STARTLINE) != 0;
3479*22dc650dSSadaf Ebrahimi firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
3480*22dc650dSSadaf Ebrahimi bumpalong_limit = end_subject;
3481*22dc650dSSadaf Ebrahimi 
3482*22dc650dSSadaf Ebrahimi /* Initialize and set up the fixed fields in the callout block, with a pointer
3483*22dc650dSSadaf Ebrahimi in the match block. */
3484*22dc650dSSadaf Ebrahimi 
3485*22dc650dSSadaf Ebrahimi mb->cb = &cb;
3486*22dc650dSSadaf Ebrahimi cb.version = 2;
3487*22dc650dSSadaf Ebrahimi cb.subject = subject;
3488*22dc650dSSadaf Ebrahimi cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
3489*22dc650dSSadaf Ebrahimi cb.callout_flags = 0;
3490*22dc650dSSadaf Ebrahimi cb.capture_top      = 1;      /* No capture support */
3491*22dc650dSSadaf Ebrahimi cb.capture_last     = 0;
3492*22dc650dSSadaf Ebrahimi cb.mark             = NULL;   /* No (*MARK) support */
3493*22dc650dSSadaf Ebrahimi 
3494*22dc650dSSadaf Ebrahimi /* Get data from the match context, if present, and fill in the remaining
3495*22dc650dSSadaf Ebrahimi fields in the match block. It is an error to set an offset limit without
3496*22dc650dSSadaf Ebrahimi setting the flag at compile time. */
3497*22dc650dSSadaf Ebrahimi 
3498*22dc650dSSadaf Ebrahimi if (mcontext == NULL)
3499*22dc650dSSadaf Ebrahimi   {
3500*22dc650dSSadaf Ebrahimi   mb->callout = NULL;
3501*22dc650dSSadaf Ebrahimi   mb->memctl = re->memctl;
3502*22dc650dSSadaf Ebrahimi   mb->match_limit = PRIV(default_match_context).match_limit;
3503*22dc650dSSadaf Ebrahimi   mb->match_limit_depth = PRIV(default_match_context).depth_limit;
3504*22dc650dSSadaf Ebrahimi   mb->heap_limit = PRIV(default_match_context).heap_limit;
3505*22dc650dSSadaf Ebrahimi   }
3506*22dc650dSSadaf Ebrahimi else
3507*22dc650dSSadaf Ebrahimi   {
3508*22dc650dSSadaf Ebrahimi   if (mcontext->offset_limit != PCRE2_UNSET)
3509*22dc650dSSadaf Ebrahimi     {
3510*22dc650dSSadaf Ebrahimi     if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
3511*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_BADOFFSETLIMIT;
3512*22dc650dSSadaf Ebrahimi     bumpalong_limit = subject + mcontext->offset_limit;
3513*22dc650dSSadaf Ebrahimi     }
3514*22dc650dSSadaf Ebrahimi   mb->callout = mcontext->callout;
3515*22dc650dSSadaf Ebrahimi   mb->callout_data = mcontext->callout_data;
3516*22dc650dSSadaf Ebrahimi   mb->memctl = mcontext->memctl;
3517*22dc650dSSadaf Ebrahimi   mb->match_limit = mcontext->match_limit;
3518*22dc650dSSadaf Ebrahimi   mb->match_limit_depth = mcontext->depth_limit;
3519*22dc650dSSadaf Ebrahimi   mb->heap_limit = mcontext->heap_limit;
3520*22dc650dSSadaf Ebrahimi   }
3521*22dc650dSSadaf Ebrahimi 
3522*22dc650dSSadaf Ebrahimi if (mb->match_limit > re->limit_match)
3523*22dc650dSSadaf Ebrahimi   mb->match_limit = re->limit_match;
3524*22dc650dSSadaf Ebrahimi 
3525*22dc650dSSadaf Ebrahimi if (mb->match_limit_depth > re->limit_depth)
3526*22dc650dSSadaf Ebrahimi   mb->match_limit_depth = re->limit_depth;
3527*22dc650dSSadaf Ebrahimi 
3528*22dc650dSSadaf Ebrahimi if (mb->heap_limit > re->limit_heap)
3529*22dc650dSSadaf Ebrahimi   mb->heap_limit = re->limit_heap;
3530*22dc650dSSadaf Ebrahimi 
3531*22dc650dSSadaf Ebrahimi mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
3532*22dc650dSSadaf Ebrahimi   re->name_count * re->name_entry_size;
3533*22dc650dSSadaf Ebrahimi mb->tables = re->tables;
3534*22dc650dSSadaf Ebrahimi mb->start_subject = subject;
3535*22dc650dSSadaf Ebrahimi mb->end_subject = end_subject;
3536*22dc650dSSadaf Ebrahimi mb->start_offset = start_offset;
3537*22dc650dSSadaf Ebrahimi mb->allowemptypartial = (re->max_lookbehind > 0) ||
3538*22dc650dSSadaf Ebrahimi   (re->flags & PCRE2_MATCH_EMPTY) != 0;
3539*22dc650dSSadaf Ebrahimi mb->moptions = options;
3540*22dc650dSSadaf Ebrahimi mb->poptions = re->overall_options;
3541*22dc650dSSadaf Ebrahimi mb->match_call_count = 0;
3542*22dc650dSSadaf Ebrahimi mb->heap_used = 0;
3543*22dc650dSSadaf Ebrahimi 
3544*22dc650dSSadaf Ebrahimi /* Process the \R and newline settings. */
3545*22dc650dSSadaf Ebrahimi 
3546*22dc650dSSadaf Ebrahimi mb->bsr_convention = re->bsr_convention;
3547*22dc650dSSadaf Ebrahimi mb->nltype = NLTYPE_FIXED;
3548*22dc650dSSadaf Ebrahimi switch(re->newline_convention)
3549*22dc650dSSadaf Ebrahimi   {
3550*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CR:
3551*22dc650dSSadaf Ebrahimi   mb->nllen = 1;
3552*22dc650dSSadaf Ebrahimi   mb->nl[0] = CHAR_CR;
3553*22dc650dSSadaf Ebrahimi   break;
3554*22dc650dSSadaf Ebrahimi 
3555*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_LF:
3556*22dc650dSSadaf Ebrahimi   mb->nllen = 1;
3557*22dc650dSSadaf Ebrahimi   mb->nl[0] = CHAR_NL;
3558*22dc650dSSadaf Ebrahimi   break;
3559*22dc650dSSadaf Ebrahimi 
3560*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_NUL:
3561*22dc650dSSadaf Ebrahimi   mb->nllen = 1;
3562*22dc650dSSadaf Ebrahimi   mb->nl[0] = CHAR_NUL;
3563*22dc650dSSadaf Ebrahimi   break;
3564*22dc650dSSadaf Ebrahimi 
3565*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_CRLF:
3566*22dc650dSSadaf Ebrahimi   mb->nllen = 2;
3567*22dc650dSSadaf Ebrahimi   mb->nl[0] = CHAR_CR;
3568*22dc650dSSadaf Ebrahimi   mb->nl[1] = CHAR_NL;
3569*22dc650dSSadaf Ebrahimi   break;
3570*22dc650dSSadaf Ebrahimi 
3571*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANY:
3572*22dc650dSSadaf Ebrahimi   mb->nltype = NLTYPE_ANY;
3573*22dc650dSSadaf Ebrahimi   break;
3574*22dc650dSSadaf Ebrahimi 
3575*22dc650dSSadaf Ebrahimi   case PCRE2_NEWLINE_ANYCRLF:
3576*22dc650dSSadaf Ebrahimi   mb->nltype = NLTYPE_ANYCRLF;
3577*22dc650dSSadaf Ebrahimi   break;
3578*22dc650dSSadaf Ebrahimi 
3579*22dc650dSSadaf Ebrahimi   default: return PCRE2_ERROR_INTERNAL;
3580*22dc650dSSadaf Ebrahimi   }
3581*22dc650dSSadaf Ebrahimi 
3582*22dc650dSSadaf Ebrahimi /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
3583*22dc650dSSadaf Ebrahimi we must also check that a starting offset does not point into the middle of a
3584*22dc650dSSadaf Ebrahimi multiunit character. We check only the portion of the subject that is going to
3585*22dc650dSSadaf Ebrahimi be inspected during matching - from the offset minus the maximum back reference
3586*22dc650dSSadaf Ebrahimi to the given length. This saves time when a small part of a large subject is
3587*22dc650dSSadaf Ebrahimi being matched by the use of a starting offset. Note that the maximum lookbehind
3588*22dc650dSSadaf Ebrahimi is a number of characters, not code units. */
3589*22dc650dSSadaf Ebrahimi 
3590*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3591*22dc650dSSadaf Ebrahimi if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
3592*22dc650dSSadaf Ebrahimi   {
3593*22dc650dSSadaf Ebrahimi   PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
3594*22dc650dSSadaf Ebrahimi 
3595*22dc650dSSadaf Ebrahimi   if (start_offset > 0)
3596*22dc650dSSadaf Ebrahimi     {
3597*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 32
3598*22dc650dSSadaf Ebrahimi     unsigned int i;
3599*22dc650dSSadaf Ebrahimi     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
3600*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_BADUTFOFFSET;
3601*22dc650dSSadaf Ebrahimi     for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
3602*22dc650dSSadaf Ebrahimi       {
3603*22dc650dSSadaf Ebrahimi       check_subject--;
3604*22dc650dSSadaf Ebrahimi       while (check_subject > subject &&
3605*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3606*22dc650dSSadaf Ebrahimi       (*check_subject & 0xc0) == 0x80)
3607*22dc650dSSadaf Ebrahimi #else  /* 16-bit */
3608*22dc650dSSadaf Ebrahimi       (*check_subject & 0xfc00) == 0xdc00)
3609*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3610*22dc650dSSadaf Ebrahimi         check_subject--;
3611*22dc650dSSadaf Ebrahimi       }
3612*22dc650dSSadaf Ebrahimi #else   /* In the 32-bit library, one code unit equals one character. */
3613*22dc650dSSadaf Ebrahimi     check_subject -= re->max_lookbehind;
3614*22dc650dSSadaf Ebrahimi     if (check_subject < subject) check_subject = subject;
3615*22dc650dSSadaf Ebrahimi #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
3616*22dc650dSSadaf Ebrahimi     }
3617*22dc650dSSadaf Ebrahimi 
3618*22dc650dSSadaf Ebrahimi   /* Validate the relevant portion of the subject. After an error, adjust the
3619*22dc650dSSadaf Ebrahimi   offset to be an absolute offset in the whole string. */
3620*22dc650dSSadaf Ebrahimi 
3621*22dc650dSSadaf Ebrahimi   match_data->rc = PRIV(valid_utf)(check_subject,
3622*22dc650dSSadaf Ebrahimi     length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
3623*22dc650dSSadaf Ebrahimi   if (match_data->rc != 0)
3624*22dc650dSSadaf Ebrahimi     {
3625*22dc650dSSadaf Ebrahimi     match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
3626*22dc650dSSadaf Ebrahimi     return match_data->rc;
3627*22dc650dSSadaf Ebrahimi     }
3628*22dc650dSSadaf Ebrahimi   }
3629*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
3630*22dc650dSSadaf Ebrahimi 
3631*22dc650dSSadaf Ebrahimi /* Set up the first code unit to match, if available. If there's no first code
3632*22dc650dSSadaf Ebrahimi unit there may be a bitmap of possible first characters. */
3633*22dc650dSSadaf Ebrahimi 
3634*22dc650dSSadaf Ebrahimi if ((re->flags & PCRE2_FIRSTSET) != 0)
3635*22dc650dSSadaf Ebrahimi   {
3636*22dc650dSSadaf Ebrahimi   has_first_cu = TRUE;
3637*22dc650dSSadaf Ebrahimi   first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
3638*22dc650dSSadaf Ebrahimi   if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
3639*22dc650dSSadaf Ebrahimi     {
3640*22dc650dSSadaf Ebrahimi     first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
3641*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3642*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3643*22dc650dSSadaf Ebrahimi     if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
3644*22dc650dSSadaf Ebrahimi       first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
3645*22dc650dSSadaf Ebrahimi #else
3646*22dc650dSSadaf Ebrahimi     if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
3647*22dc650dSSadaf Ebrahimi       first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
3648*22dc650dSSadaf Ebrahimi #endif
3649*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
3650*22dc650dSSadaf Ebrahimi     }
3651*22dc650dSSadaf Ebrahimi   }
3652*22dc650dSSadaf Ebrahimi else
3653*22dc650dSSadaf Ebrahimi   if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
3654*22dc650dSSadaf Ebrahimi     start_bits = re->start_bitmap;
3655*22dc650dSSadaf Ebrahimi 
3656*22dc650dSSadaf Ebrahimi /* There may be a "last known required code unit" set. */
3657*22dc650dSSadaf Ebrahimi 
3658*22dc650dSSadaf Ebrahimi if ((re->flags & PCRE2_LASTSET) != 0)
3659*22dc650dSSadaf Ebrahimi   {
3660*22dc650dSSadaf Ebrahimi   has_req_cu = TRUE;
3661*22dc650dSSadaf Ebrahimi   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
3662*22dc650dSSadaf Ebrahimi   if ((re->flags & PCRE2_LASTCASELESS) != 0)
3663*22dc650dSSadaf Ebrahimi     {
3664*22dc650dSSadaf Ebrahimi     req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
3665*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3666*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3667*22dc650dSSadaf Ebrahimi     if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
3668*22dc650dSSadaf Ebrahimi       req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
3669*22dc650dSSadaf Ebrahimi #else
3670*22dc650dSSadaf Ebrahimi     if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
3671*22dc650dSSadaf Ebrahimi       req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
3672*22dc650dSSadaf Ebrahimi #endif
3673*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
3674*22dc650dSSadaf Ebrahimi     }
3675*22dc650dSSadaf Ebrahimi   }
3676*22dc650dSSadaf Ebrahimi 
3677*22dc650dSSadaf Ebrahimi /* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
3678*22dc650dSSadaf Ebrahimi free the memory that was obtained. */
3679*22dc650dSSadaf Ebrahimi 
3680*22dc650dSSadaf Ebrahimi if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
3681*22dc650dSSadaf Ebrahimi   {
3682*22dc650dSSadaf Ebrahimi   match_data->memctl.free((void *)match_data->subject,
3683*22dc650dSSadaf Ebrahimi     match_data->memctl.memory_data);
3684*22dc650dSSadaf Ebrahimi   match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
3685*22dc650dSSadaf Ebrahimi   }
3686*22dc650dSSadaf Ebrahimi 
3687*22dc650dSSadaf Ebrahimi /* Fill in fields that are always returned in the match data. */
3688*22dc650dSSadaf Ebrahimi 
3689*22dc650dSSadaf Ebrahimi match_data->code = re;
3690*22dc650dSSadaf Ebrahimi match_data->subject = NULL;  /* Default for no match */
3691*22dc650dSSadaf Ebrahimi match_data->mark = NULL;
3692*22dc650dSSadaf Ebrahimi match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
3693*22dc650dSSadaf Ebrahimi 
3694*22dc650dSSadaf Ebrahimi /* Call the main matching function, looping for a non-anchored regex after a
3695*22dc650dSSadaf Ebrahimi failed match. If not restarting, perform certain optimizations at the start of
3696*22dc650dSSadaf Ebrahimi a match. */
3697*22dc650dSSadaf Ebrahimi 
3698*22dc650dSSadaf Ebrahimi for (;;)
3699*22dc650dSSadaf Ebrahimi   {
3700*22dc650dSSadaf Ebrahimi   /* ----------------- Start of match optimizations ---------------- */
3701*22dc650dSSadaf Ebrahimi 
3702*22dc650dSSadaf Ebrahimi   /* There are some optimizations that avoid running the match if a known
3703*22dc650dSSadaf Ebrahimi   starting point is not found, or if a known later code unit is not present.
3704*22dc650dSSadaf Ebrahimi   However, there is an option (settable at compile time) that disables
3705*22dc650dSSadaf Ebrahimi   these, for testing and for ensuring that all callouts do actually occur.
3706*22dc650dSSadaf Ebrahimi   The optimizations must also be avoided when restarting a DFA match. */
3707*22dc650dSSadaf Ebrahimi 
3708*22dc650dSSadaf Ebrahimi   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
3709*22dc650dSSadaf Ebrahimi       (options & PCRE2_DFA_RESTART) == 0)
3710*22dc650dSSadaf Ebrahimi     {
3711*22dc650dSSadaf Ebrahimi     /* If firstline is TRUE, the start of the match is constrained to the first
3712*22dc650dSSadaf Ebrahimi     line of a multiline string. That is, the match must be before or at the
3713*22dc650dSSadaf Ebrahimi     first newline following the start of matching. Temporarily adjust
3714*22dc650dSSadaf Ebrahimi     end_subject so that we stop the optimization scans for a first code unit
3715*22dc650dSSadaf Ebrahimi     immediately after the first character of a newline (the first code unit can
3716*22dc650dSSadaf Ebrahimi     legitimately be a newline). If the match fails at the newline, later code
3717*22dc650dSSadaf Ebrahimi     breaks this loop. */
3718*22dc650dSSadaf Ebrahimi 
3719*22dc650dSSadaf Ebrahimi     if (firstline)
3720*22dc650dSSadaf Ebrahimi       {
3721*22dc650dSSadaf Ebrahimi       PCRE2_SPTR t = start_match;
3722*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3723*22dc650dSSadaf Ebrahimi       if (utf)
3724*22dc650dSSadaf Ebrahimi         {
3725*22dc650dSSadaf Ebrahimi         while (t < end_subject && !IS_NEWLINE(t))
3726*22dc650dSSadaf Ebrahimi           {
3727*22dc650dSSadaf Ebrahimi           t++;
3728*22dc650dSSadaf Ebrahimi           ACROSSCHAR(t < end_subject, t, t++);
3729*22dc650dSSadaf Ebrahimi           }
3730*22dc650dSSadaf Ebrahimi         }
3731*22dc650dSSadaf Ebrahimi       else
3732*22dc650dSSadaf Ebrahimi #endif
3733*22dc650dSSadaf Ebrahimi       while (t < end_subject && !IS_NEWLINE(t)) t++;
3734*22dc650dSSadaf Ebrahimi       end_subject = t;
3735*22dc650dSSadaf Ebrahimi       }
3736*22dc650dSSadaf Ebrahimi 
3737*22dc650dSSadaf Ebrahimi     /* Anchored: check the first code unit if one is recorded. This may seem
3738*22dc650dSSadaf Ebrahimi     pointless but it can help in detecting a no match case without scanning for
3739*22dc650dSSadaf Ebrahimi     the required code unit. */
3740*22dc650dSSadaf Ebrahimi 
3741*22dc650dSSadaf Ebrahimi     if (anchored)
3742*22dc650dSSadaf Ebrahimi       {
3743*22dc650dSSadaf Ebrahimi       if (has_first_cu || start_bits != NULL)
3744*22dc650dSSadaf Ebrahimi         {
3745*22dc650dSSadaf Ebrahimi         BOOL ok = start_match < end_subject;
3746*22dc650dSSadaf Ebrahimi         if (ok)
3747*22dc650dSSadaf Ebrahimi           {
3748*22dc650dSSadaf Ebrahimi           PCRE2_UCHAR c = UCHAR21TEST(start_match);
3749*22dc650dSSadaf Ebrahimi           ok = has_first_cu && (c == first_cu || c == first_cu2);
3750*22dc650dSSadaf Ebrahimi           if (!ok && start_bits != NULL)
3751*22dc650dSSadaf Ebrahimi             {
3752*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3753*22dc650dSSadaf Ebrahimi             if (c > 255) c = 255;
3754*22dc650dSSadaf Ebrahimi #endif
3755*22dc650dSSadaf Ebrahimi             ok = (start_bits[c/8] & (1u << (c&7))) != 0;
3756*22dc650dSSadaf Ebrahimi             }
3757*22dc650dSSadaf Ebrahimi           }
3758*22dc650dSSadaf Ebrahimi         if (!ok) break;
3759*22dc650dSSadaf Ebrahimi         }
3760*22dc650dSSadaf Ebrahimi       }
3761*22dc650dSSadaf Ebrahimi 
3762*22dc650dSSadaf Ebrahimi     /* Not anchored. Advance to a unique first code unit if there is one. */
3763*22dc650dSSadaf Ebrahimi 
3764*22dc650dSSadaf Ebrahimi     else
3765*22dc650dSSadaf Ebrahimi       {
3766*22dc650dSSadaf Ebrahimi       if (has_first_cu)
3767*22dc650dSSadaf Ebrahimi         {
3768*22dc650dSSadaf Ebrahimi         if (first_cu != first_cu2)  /* Caseless */
3769*22dc650dSSadaf Ebrahimi           {
3770*22dc650dSSadaf Ebrahimi           /* In 16-bit and 32_bit modes we have to do our own search, so can
3771*22dc650dSSadaf Ebrahimi           look for both cases at once. */
3772*22dc650dSSadaf Ebrahimi 
3773*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3774*22dc650dSSadaf Ebrahimi           PCRE2_UCHAR smc;
3775*22dc650dSSadaf Ebrahimi           while (start_match < end_subject &&
3776*22dc650dSSadaf Ebrahimi                 (smc = UCHAR21TEST(start_match)) != first_cu &&
3777*22dc650dSSadaf Ebrahimi                  smc != first_cu2)
3778*22dc650dSSadaf Ebrahimi             start_match++;
3779*22dc650dSSadaf Ebrahimi #else
3780*22dc650dSSadaf Ebrahimi           /* In 8-bit mode, the use of memchr() gives a big speed up, even
3781*22dc650dSSadaf Ebrahimi           though we have to call it twice in order to find the earliest
3782*22dc650dSSadaf Ebrahimi           occurrence of the code unit in either of its cases. Caching is used
3783*22dc650dSSadaf Ebrahimi           to remember the positions of previously found code units. This can
3784*22dc650dSSadaf Ebrahimi           make a huge difference when the strings are very long and only one
3785*22dc650dSSadaf Ebrahimi           case is actually present. */
3786*22dc650dSSadaf Ebrahimi 
3787*22dc650dSSadaf Ebrahimi           PCRE2_SPTR pp1 = NULL;
3788*22dc650dSSadaf Ebrahimi           PCRE2_SPTR pp2 = NULL;
3789*22dc650dSSadaf Ebrahimi           PCRE2_SIZE searchlength = end_subject - start_match;
3790*22dc650dSSadaf Ebrahimi 
3791*22dc650dSSadaf Ebrahimi           /* If we haven't got a previously found position for first_cu, or if
3792*22dc650dSSadaf Ebrahimi           the current starting position is later, we need to do a search. If
3793*22dc650dSSadaf Ebrahimi           the code unit is not found, set it to the end. */
3794*22dc650dSSadaf Ebrahimi 
3795*22dc650dSSadaf Ebrahimi           if (memchr_found_first_cu == NULL ||
3796*22dc650dSSadaf Ebrahimi               start_match > memchr_found_first_cu)
3797*22dc650dSSadaf Ebrahimi             {
3798*22dc650dSSadaf Ebrahimi             pp1 = memchr(start_match, first_cu, searchlength);
3799*22dc650dSSadaf Ebrahimi             memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
3800*22dc650dSSadaf Ebrahimi             }
3801*22dc650dSSadaf Ebrahimi 
3802*22dc650dSSadaf Ebrahimi           /* If the start is before a previously found position, use the
3803*22dc650dSSadaf Ebrahimi           previous position, or NULL if a previous search failed. */
3804*22dc650dSSadaf Ebrahimi 
3805*22dc650dSSadaf Ebrahimi           else pp1 = (memchr_found_first_cu == end_subject)? NULL :
3806*22dc650dSSadaf Ebrahimi             memchr_found_first_cu;
3807*22dc650dSSadaf Ebrahimi 
3808*22dc650dSSadaf Ebrahimi           /* Do the same thing for the other case. */
3809*22dc650dSSadaf Ebrahimi 
3810*22dc650dSSadaf Ebrahimi           if (memchr_found_first_cu2 == NULL ||
3811*22dc650dSSadaf Ebrahimi               start_match > memchr_found_first_cu2)
3812*22dc650dSSadaf Ebrahimi             {
3813*22dc650dSSadaf Ebrahimi             pp2 = memchr(start_match, first_cu2, searchlength);
3814*22dc650dSSadaf Ebrahimi             memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
3815*22dc650dSSadaf Ebrahimi             }
3816*22dc650dSSadaf Ebrahimi 
3817*22dc650dSSadaf Ebrahimi           else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
3818*22dc650dSSadaf Ebrahimi             memchr_found_first_cu2;
3819*22dc650dSSadaf Ebrahimi 
3820*22dc650dSSadaf Ebrahimi           /* Set the start to the end of the subject if neither case was found.
3821*22dc650dSSadaf Ebrahimi           Otherwise, use the earlier found point. */
3822*22dc650dSSadaf Ebrahimi 
3823*22dc650dSSadaf Ebrahimi           if (pp1 == NULL)
3824*22dc650dSSadaf Ebrahimi             start_match = (pp2 == NULL)? end_subject : pp2;
3825*22dc650dSSadaf Ebrahimi           else
3826*22dc650dSSadaf Ebrahimi             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
3827*22dc650dSSadaf Ebrahimi 
3828*22dc650dSSadaf Ebrahimi #endif  /* 8-bit handling */
3829*22dc650dSSadaf Ebrahimi           }
3830*22dc650dSSadaf Ebrahimi 
3831*22dc650dSSadaf Ebrahimi         /* The caseful case is much simpler. */
3832*22dc650dSSadaf Ebrahimi 
3833*22dc650dSSadaf Ebrahimi         else
3834*22dc650dSSadaf Ebrahimi           {
3835*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3836*22dc650dSSadaf Ebrahimi           while (start_match < end_subject && UCHAR21TEST(start_match) !=
3837*22dc650dSSadaf Ebrahimi                  first_cu)
3838*22dc650dSSadaf Ebrahimi             start_match++;
3839*22dc650dSSadaf Ebrahimi #else  /* 8-bit code units */
3840*22dc650dSSadaf Ebrahimi           start_match = memchr(start_match, first_cu, end_subject - start_match);
3841*22dc650dSSadaf Ebrahimi           if (start_match == NULL) start_match = end_subject;
3842*22dc650dSSadaf Ebrahimi #endif
3843*22dc650dSSadaf Ebrahimi           }
3844*22dc650dSSadaf Ebrahimi 
3845*22dc650dSSadaf Ebrahimi         /* If we can't find the required code unit, having reached the true end
3846*22dc650dSSadaf Ebrahimi         of the subject, break the bumpalong loop, to force a match failure,
3847*22dc650dSSadaf Ebrahimi         except when doing partial matching, when we let the next cycle run at
3848*22dc650dSSadaf Ebrahimi         the end of the subject. To see why, consider the pattern /(?<=abc)def/,
3849*22dc650dSSadaf Ebrahimi         which partially matches "abc", even though the string does not contain
3850*22dc650dSSadaf Ebrahimi         the starting character "d". If we have not reached the true end of the
3851*22dc650dSSadaf Ebrahimi         subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
3852*22dc650dSSadaf Ebrahimi         we also let the cycle run, because the matching string is legitimately
3853*22dc650dSSadaf Ebrahimi         allowed to start with the first code unit of a newline. */
3854*22dc650dSSadaf Ebrahimi 
3855*22dc650dSSadaf Ebrahimi         if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
3856*22dc650dSSadaf Ebrahimi             start_match >= mb->end_subject)
3857*22dc650dSSadaf Ebrahimi           break;
3858*22dc650dSSadaf Ebrahimi         }
3859*22dc650dSSadaf Ebrahimi 
3860*22dc650dSSadaf Ebrahimi       /* If there's no first code unit, advance to just after a linebreak for a
3861*22dc650dSSadaf Ebrahimi       multiline match if required. */
3862*22dc650dSSadaf Ebrahimi 
3863*22dc650dSSadaf Ebrahimi       else if (startline)
3864*22dc650dSSadaf Ebrahimi         {
3865*22dc650dSSadaf Ebrahimi         if (start_match > mb->start_subject + start_offset)
3866*22dc650dSSadaf Ebrahimi           {
3867*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3868*22dc650dSSadaf Ebrahimi           if (utf)
3869*22dc650dSSadaf Ebrahimi             {
3870*22dc650dSSadaf Ebrahimi             while (start_match < end_subject && !WAS_NEWLINE(start_match))
3871*22dc650dSSadaf Ebrahimi               {
3872*22dc650dSSadaf Ebrahimi               start_match++;
3873*22dc650dSSadaf Ebrahimi               ACROSSCHAR(start_match < end_subject, start_match, start_match++);
3874*22dc650dSSadaf Ebrahimi               }
3875*22dc650dSSadaf Ebrahimi             }
3876*22dc650dSSadaf Ebrahimi           else
3877*22dc650dSSadaf Ebrahimi #endif
3878*22dc650dSSadaf Ebrahimi           while (start_match < end_subject && !WAS_NEWLINE(start_match))
3879*22dc650dSSadaf Ebrahimi             start_match++;
3880*22dc650dSSadaf Ebrahimi 
3881*22dc650dSSadaf Ebrahimi           /* If we have just passed a CR and the newline option is ANY or
3882*22dc650dSSadaf Ebrahimi           ANYCRLF, and we are now at a LF, advance the match position by one
3883*22dc650dSSadaf Ebrahimi           more code unit. */
3884*22dc650dSSadaf Ebrahimi 
3885*22dc650dSSadaf Ebrahimi           if (start_match[-1] == CHAR_CR &&
3886*22dc650dSSadaf Ebrahimi                (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
3887*22dc650dSSadaf Ebrahimi                start_match < end_subject &&
3888*22dc650dSSadaf Ebrahimi                UCHAR21TEST(start_match) == CHAR_NL)
3889*22dc650dSSadaf Ebrahimi             start_match++;
3890*22dc650dSSadaf Ebrahimi           }
3891*22dc650dSSadaf Ebrahimi         }
3892*22dc650dSSadaf Ebrahimi 
3893*22dc650dSSadaf Ebrahimi       /* If there's no first code unit or a requirement for a multiline line
3894*22dc650dSSadaf Ebrahimi       start, advance to a non-unique first code unit if any have been
3895*22dc650dSSadaf Ebrahimi       identified. The bitmap contains only 256 bits. When code units are 16 or
3896*22dc650dSSadaf Ebrahimi       32 bits wide, all code units greater than 254 set the 255 bit. */
3897*22dc650dSSadaf Ebrahimi 
3898*22dc650dSSadaf Ebrahimi       else if (start_bits != NULL)
3899*22dc650dSSadaf Ebrahimi         {
3900*22dc650dSSadaf Ebrahimi         while (start_match < end_subject)
3901*22dc650dSSadaf Ebrahimi           {
3902*22dc650dSSadaf Ebrahimi           uint32_t c = UCHAR21TEST(start_match);
3903*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3904*22dc650dSSadaf Ebrahimi           if (c > 255) c = 255;
3905*22dc650dSSadaf Ebrahimi #endif
3906*22dc650dSSadaf Ebrahimi           if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
3907*22dc650dSSadaf Ebrahimi           start_match++;
3908*22dc650dSSadaf Ebrahimi           }
3909*22dc650dSSadaf Ebrahimi 
3910*22dc650dSSadaf Ebrahimi         /* See comment above in first_cu checking about the next line. */
3911*22dc650dSSadaf Ebrahimi 
3912*22dc650dSSadaf Ebrahimi         if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
3913*22dc650dSSadaf Ebrahimi             start_match >= mb->end_subject)
3914*22dc650dSSadaf Ebrahimi           break;
3915*22dc650dSSadaf Ebrahimi         }
3916*22dc650dSSadaf Ebrahimi       }  /* End of first code unit handling */
3917*22dc650dSSadaf Ebrahimi 
3918*22dc650dSSadaf Ebrahimi     /* Restore fudged end_subject */
3919*22dc650dSSadaf Ebrahimi 
3920*22dc650dSSadaf Ebrahimi     end_subject = mb->end_subject;
3921*22dc650dSSadaf Ebrahimi 
3922*22dc650dSSadaf Ebrahimi     /* The following two optimizations are disabled for partial matching. */
3923*22dc650dSSadaf Ebrahimi 
3924*22dc650dSSadaf Ebrahimi     if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
3925*22dc650dSSadaf Ebrahimi       {
3926*22dc650dSSadaf Ebrahimi       PCRE2_SPTR p;
3927*22dc650dSSadaf Ebrahimi 
3928*22dc650dSSadaf Ebrahimi       /* The minimum matching length is a lower bound; no actual string of that
3929*22dc650dSSadaf Ebrahimi       length may actually match the pattern. Although the value is, strictly,
3930*22dc650dSSadaf Ebrahimi       in characters, we treat it as code units to avoid spending too much time
3931*22dc650dSSadaf Ebrahimi       in this optimization. */
3932*22dc650dSSadaf Ebrahimi 
3933*22dc650dSSadaf Ebrahimi       if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
3934*22dc650dSSadaf Ebrahimi 
3935*22dc650dSSadaf Ebrahimi       /* If req_cu is set, we know that that code unit must appear in the
3936*22dc650dSSadaf Ebrahimi       subject for the match to succeed. If the first code unit is set, req_cu
3937*22dc650dSSadaf Ebrahimi       must be later in the subject; otherwise the test starts at the match
3938*22dc650dSSadaf Ebrahimi       point. This optimization can save a huge amount of backtracking in
3939*22dc650dSSadaf Ebrahimi       patterns with nested unlimited repeats that aren't going to match.
3940*22dc650dSSadaf Ebrahimi       Writing separate code for cased/caseless versions makes it go faster, as
3941*22dc650dSSadaf Ebrahimi       does using an autoincrement and backing off on a match. As in the case of
3942*22dc650dSSadaf Ebrahimi       the first code unit, using memchr() in the 8-bit library gives a big
3943*22dc650dSSadaf Ebrahimi       speed up. Unlike the first_cu check above, we do not need to call
3944*22dc650dSSadaf Ebrahimi       memchr() twice in the caseless case because we only need to check for the
3945*22dc650dSSadaf Ebrahimi       presence of the character in either case, not find the first occurrence.
3946*22dc650dSSadaf Ebrahimi 
3947*22dc650dSSadaf Ebrahimi       The search can be skipped if the code unit was found later than the
3948*22dc650dSSadaf Ebrahimi       current starting point in a previous iteration of the bumpalong loop.
3949*22dc650dSSadaf Ebrahimi 
3950*22dc650dSSadaf Ebrahimi       HOWEVER: when the subject string is very, very long, searching to its end
3951*22dc650dSSadaf Ebrahimi       can take a long time, and give bad performance on quite ordinary
3952*22dc650dSSadaf Ebrahimi       patterns. This showed up when somebody was matching something like
3953*22dc650dSSadaf Ebrahimi       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
3954*22dc650dSSadaf Ebrahimi       sufficiently long, but it's worth searching a lot more for unanchored
3955*22dc650dSSadaf Ebrahimi       patterns. */
3956*22dc650dSSadaf Ebrahimi 
3957*22dc650dSSadaf Ebrahimi       p = start_match + (has_first_cu? 1:0);
3958*22dc650dSSadaf Ebrahimi       if (has_req_cu && p > req_cu_ptr)
3959*22dc650dSSadaf Ebrahimi         {
3960*22dc650dSSadaf Ebrahimi         PCRE2_SIZE check_length = end_subject - start_match;
3961*22dc650dSSadaf Ebrahimi 
3962*22dc650dSSadaf Ebrahimi         if (check_length < REQ_CU_MAX ||
3963*22dc650dSSadaf Ebrahimi               (!anchored && check_length < REQ_CU_MAX * 1000))
3964*22dc650dSSadaf Ebrahimi           {
3965*22dc650dSSadaf Ebrahimi           if (req_cu != req_cu2)  /* Caseless */
3966*22dc650dSSadaf Ebrahimi             {
3967*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3968*22dc650dSSadaf Ebrahimi             while (p < end_subject)
3969*22dc650dSSadaf Ebrahimi               {
3970*22dc650dSSadaf Ebrahimi               uint32_t pp = UCHAR21INCTEST(p);
3971*22dc650dSSadaf Ebrahimi               if (pp == req_cu || pp == req_cu2) { p--; break; }
3972*22dc650dSSadaf Ebrahimi               }
3973*22dc650dSSadaf Ebrahimi #else  /* 8-bit code units */
3974*22dc650dSSadaf Ebrahimi             PCRE2_SPTR pp = p;
3975*22dc650dSSadaf Ebrahimi             p = memchr(pp, req_cu, end_subject - pp);
3976*22dc650dSSadaf Ebrahimi             if (p == NULL)
3977*22dc650dSSadaf Ebrahimi               {
3978*22dc650dSSadaf Ebrahimi               p = memchr(pp, req_cu2, end_subject - pp);
3979*22dc650dSSadaf Ebrahimi               if (p == NULL) p = end_subject;
3980*22dc650dSSadaf Ebrahimi               }
3981*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3982*22dc650dSSadaf Ebrahimi             }
3983*22dc650dSSadaf Ebrahimi 
3984*22dc650dSSadaf Ebrahimi           /* The caseful case */
3985*22dc650dSSadaf Ebrahimi 
3986*22dc650dSSadaf Ebrahimi           else
3987*22dc650dSSadaf Ebrahimi             {
3988*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
3989*22dc650dSSadaf Ebrahimi             while (p < end_subject)
3990*22dc650dSSadaf Ebrahimi               {
3991*22dc650dSSadaf Ebrahimi               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
3992*22dc650dSSadaf Ebrahimi               }
3993*22dc650dSSadaf Ebrahimi 
3994*22dc650dSSadaf Ebrahimi #else  /* 8-bit code units */
3995*22dc650dSSadaf Ebrahimi             p = memchr(p, req_cu, end_subject - p);
3996*22dc650dSSadaf Ebrahimi             if (p == NULL) p = end_subject;
3997*22dc650dSSadaf Ebrahimi #endif
3998*22dc650dSSadaf Ebrahimi             }
3999*22dc650dSSadaf Ebrahimi 
4000*22dc650dSSadaf Ebrahimi           /* If we can't find the required code unit, break the matching loop,
4001*22dc650dSSadaf Ebrahimi           forcing a match failure. */
4002*22dc650dSSadaf Ebrahimi 
4003*22dc650dSSadaf Ebrahimi           if (p >= end_subject) break;
4004*22dc650dSSadaf Ebrahimi 
4005*22dc650dSSadaf Ebrahimi           /* If we have found the required code unit, save the point where we
4006*22dc650dSSadaf Ebrahimi           found it, so that we don't search again next time round the loop if
4007*22dc650dSSadaf Ebrahimi           the start hasn't passed this code unit yet. */
4008*22dc650dSSadaf Ebrahimi 
4009*22dc650dSSadaf Ebrahimi           req_cu_ptr = p;
4010*22dc650dSSadaf Ebrahimi           }
4011*22dc650dSSadaf Ebrahimi         }
4012*22dc650dSSadaf Ebrahimi       }
4013*22dc650dSSadaf Ebrahimi     }
4014*22dc650dSSadaf Ebrahimi 
4015*22dc650dSSadaf Ebrahimi   /* ------------ End of start of match optimizations ------------ */
4016*22dc650dSSadaf Ebrahimi 
4017*22dc650dSSadaf Ebrahimi   /* Give no match if we have passed the bumpalong limit. */
4018*22dc650dSSadaf Ebrahimi 
4019*22dc650dSSadaf Ebrahimi   if (start_match > bumpalong_limit) break;
4020*22dc650dSSadaf Ebrahimi 
4021*22dc650dSSadaf Ebrahimi   /* OK, now we can do the business */
4022*22dc650dSSadaf Ebrahimi 
4023*22dc650dSSadaf Ebrahimi   mb->start_used_ptr = start_match;
4024*22dc650dSSadaf Ebrahimi   mb->last_used_ptr = start_match;
4025*22dc650dSSadaf Ebrahimi   mb->recursive = NULL;
4026*22dc650dSSadaf Ebrahimi 
4027*22dc650dSSadaf Ebrahimi   rc = internal_dfa_match(
4028*22dc650dSSadaf Ebrahimi     mb,                           /* fixed match data */
4029*22dc650dSSadaf Ebrahimi     mb->start_code,               /* this subexpression's code */
4030*22dc650dSSadaf Ebrahimi     start_match,                  /* where we currently are */
4031*22dc650dSSadaf Ebrahimi     start_offset,                 /* start offset in subject */
4032*22dc650dSSadaf Ebrahimi     match_data->ovector,          /* offset vector */
4033*22dc650dSSadaf Ebrahimi     (uint32_t)match_data->oveccount * 2,  /* actual size of same */
4034*22dc650dSSadaf Ebrahimi     workspace,                    /* workspace vector */
4035*22dc650dSSadaf Ebrahimi     (int)wscount,                 /* size of same */
4036*22dc650dSSadaf Ebrahimi     0,                            /* function recurse level */
4037*22dc650dSSadaf Ebrahimi     base_recursion_workspace);    /* initial workspace for recursion */
4038*22dc650dSSadaf Ebrahimi 
4039*22dc650dSSadaf Ebrahimi   /* Anything other than "no match" means we are done, always; otherwise, carry
4040*22dc650dSSadaf Ebrahimi   on only if not anchored. */
4041*22dc650dSSadaf Ebrahimi 
4042*22dc650dSSadaf Ebrahimi   if (rc != PCRE2_ERROR_NOMATCH || anchored)
4043*22dc650dSSadaf Ebrahimi     {
4044*22dc650dSSadaf Ebrahimi     if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
4045*22dc650dSSadaf Ebrahimi       {
4046*22dc650dSSadaf Ebrahimi       match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
4047*22dc650dSSadaf Ebrahimi       match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
4048*22dc650dSSadaf Ebrahimi       }
4049*22dc650dSSadaf Ebrahimi     match_data->subject_length = length;
4050*22dc650dSSadaf Ebrahimi     match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
4051*22dc650dSSadaf Ebrahimi     match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
4052*22dc650dSSadaf Ebrahimi     match_data->startchar = (PCRE2_SIZE)(start_match - subject);
4053*22dc650dSSadaf Ebrahimi     match_data->rc = rc;
4054*22dc650dSSadaf Ebrahimi 
4055*22dc650dSSadaf Ebrahimi     if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
4056*22dc650dSSadaf Ebrahimi       {
4057*22dc650dSSadaf Ebrahimi       length = CU2BYTES(length + was_zero_terminated);
4058*22dc650dSSadaf Ebrahimi       match_data->subject = match_data->memctl.malloc(length,
4059*22dc650dSSadaf Ebrahimi         match_data->memctl.memory_data);
4060*22dc650dSSadaf Ebrahimi       if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
4061*22dc650dSSadaf Ebrahimi       memcpy((void *)match_data->subject, subject, length);
4062*22dc650dSSadaf Ebrahimi       match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
4063*22dc650dSSadaf Ebrahimi       }
4064*22dc650dSSadaf Ebrahimi     else
4065*22dc650dSSadaf Ebrahimi       {
4066*22dc650dSSadaf Ebrahimi       if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
4067*22dc650dSSadaf Ebrahimi       }
4068*22dc650dSSadaf Ebrahimi     goto EXIT;
4069*22dc650dSSadaf Ebrahimi     }
4070*22dc650dSSadaf Ebrahimi 
4071*22dc650dSSadaf Ebrahimi   /* Advance to the next subject character unless we are at the end of a line
4072*22dc650dSSadaf Ebrahimi   and firstline is set. */
4073*22dc650dSSadaf Ebrahimi 
4074*22dc650dSSadaf Ebrahimi   if (firstline && IS_NEWLINE(start_match)) break;
4075*22dc650dSSadaf Ebrahimi   start_match++;
4076*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
4077*22dc650dSSadaf Ebrahimi   if (utf)
4078*22dc650dSSadaf Ebrahimi     {
4079*22dc650dSSadaf Ebrahimi     ACROSSCHAR(start_match < end_subject, start_match, start_match++);
4080*22dc650dSSadaf Ebrahimi     }
4081*22dc650dSSadaf Ebrahimi #endif
4082*22dc650dSSadaf Ebrahimi   if (start_match > end_subject) break;
4083*22dc650dSSadaf Ebrahimi 
4084*22dc650dSSadaf Ebrahimi   /* If we have just passed a CR and we are now at a LF, and the pattern does
4085*22dc650dSSadaf Ebrahimi   not contain any explicit matches for \r or \n, and the newline option is CRLF
4086*22dc650dSSadaf Ebrahimi   or ANY or ANYCRLF, advance the match position by one more character. */
4087*22dc650dSSadaf Ebrahimi 
4088*22dc650dSSadaf Ebrahimi   if (UCHAR21TEST(start_match - 1) == CHAR_CR &&
4089*22dc650dSSadaf Ebrahimi       start_match < end_subject &&
4090*22dc650dSSadaf Ebrahimi       UCHAR21TEST(start_match) == CHAR_NL &&
4091*22dc650dSSadaf Ebrahimi       (re->flags & PCRE2_HASCRORLF) == 0 &&
4092*22dc650dSSadaf Ebrahimi         (mb->nltype == NLTYPE_ANY ||
4093*22dc650dSSadaf Ebrahimi          mb->nltype == NLTYPE_ANYCRLF ||
4094*22dc650dSSadaf Ebrahimi          mb->nllen == 2))
4095*22dc650dSSadaf Ebrahimi     start_match++;
4096*22dc650dSSadaf Ebrahimi 
4097*22dc650dSSadaf Ebrahimi   }   /* "Bumpalong" loop */
4098*22dc650dSSadaf Ebrahimi 
4099*22dc650dSSadaf Ebrahimi NOMATCH_EXIT:
4100*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_NOMATCH;
4101*22dc650dSSadaf Ebrahimi 
4102*22dc650dSSadaf Ebrahimi EXIT:
4103*22dc650dSSadaf Ebrahimi while (rws->next != NULL)
4104*22dc650dSSadaf Ebrahimi   {
4105*22dc650dSSadaf Ebrahimi   RWS_anchor *next = rws->next;
4106*22dc650dSSadaf Ebrahimi   rws->next = next->next;
4107*22dc650dSSadaf Ebrahimi   mb->memctl.free(next, mb->memctl.memory_data);
4108*22dc650dSSadaf Ebrahimi   }
4109*22dc650dSSadaf Ebrahimi 
4110*22dc650dSSadaf Ebrahimi return rc;
4111*22dc650dSSadaf Ebrahimi }
4112*22dc650dSSadaf Ebrahimi 
4113*22dc650dSSadaf Ebrahimi /* These #undefs are here to enable unity builds with CMake. */
4114*22dc650dSSadaf Ebrahimi 
4115*22dc650dSSadaf Ebrahimi #undef NLBLOCK /* Block containing newline information */
4116*22dc650dSSadaf Ebrahimi #undef PSSTART /* Field containing processed string start */
4117*22dc650dSSadaf Ebrahimi #undef PSEND   /* Field containing processed string end */
4118*22dc650dSSadaf Ebrahimi 
4119*22dc650dSSadaf Ebrahimi /* End of pcre2_dfa_match.c */
4120