1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi This module by Zoltan Herczeg
10*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
11*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2024 University of Cambridge
12*22dc650dSSadaf Ebrahimi
13*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
14*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
15*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
16*22dc650dSSadaf Ebrahimi
17*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
18*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
19*22dc650dSSadaf Ebrahimi
20*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
21*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
22*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
23*22dc650dSSadaf Ebrahimi
24*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
25*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
26*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
27*22dc650dSSadaf Ebrahimi
28*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
39*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
40*22dc650dSSadaf Ebrahimi */
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
43*22dc650dSSadaf Ebrahimi #include "config.h"
44*22dc650dSSadaf Ebrahimi #endif
45*22dc650dSSadaf Ebrahimi
46*22dc650dSSadaf Ebrahimi #if defined(__has_feature)
47*22dc650dSSadaf Ebrahimi #if __has_feature(memory_sanitizer)
48*22dc650dSSadaf Ebrahimi #include <sanitizer/msan_interface.h>
49*22dc650dSSadaf Ebrahimi #endif /* __has_feature(memory_sanitizer) */
50*22dc650dSSadaf Ebrahimi #endif /* defined(__has_feature) */
51*22dc650dSSadaf Ebrahimi
52*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
53*22dc650dSSadaf Ebrahimi
54*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
55*22dc650dSSadaf Ebrahimi
56*22dc650dSSadaf Ebrahimi /* All-in-one: Since we use the JIT compiler only from here,
57*22dc650dSSadaf Ebrahimi we just include it. This way we don't need to touch the build
58*22dc650dSSadaf Ebrahimi system files. */
59*22dc650dSSadaf Ebrahimi
60*22dc650dSSadaf Ebrahimi #define SLJIT_CONFIG_AUTO 1
61*22dc650dSSadaf Ebrahimi #define SLJIT_CONFIG_STATIC 1
62*22dc650dSSadaf Ebrahimi #define SLJIT_VERBOSE 0
63*22dc650dSSadaf Ebrahimi
64*22dc650dSSadaf Ebrahimi #ifdef PCRE2_DEBUG
65*22dc650dSSadaf Ebrahimi #define SLJIT_DEBUG 1
66*22dc650dSSadaf Ebrahimi #else
67*22dc650dSSadaf Ebrahimi #define SLJIT_DEBUG 0
68*22dc650dSSadaf Ebrahimi #endif
69*22dc650dSSadaf Ebrahimi
70*22dc650dSSadaf Ebrahimi #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71*22dc650dSSadaf Ebrahimi #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72*22dc650dSSadaf Ebrahimi
pcre2_jit_malloc(size_t size,void * allocator_data)73*22dc650dSSadaf Ebrahimi static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74*22dc650dSSadaf Ebrahimi {
75*22dc650dSSadaf Ebrahimi pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76*22dc650dSSadaf Ebrahimi return allocator->malloc(size, allocator->memory_data);
77*22dc650dSSadaf Ebrahimi }
78*22dc650dSSadaf Ebrahimi
pcre2_jit_free(void * ptr,void * allocator_data)79*22dc650dSSadaf Ebrahimi static void pcre2_jit_free(void *ptr, void *allocator_data)
80*22dc650dSSadaf Ebrahimi {
81*22dc650dSSadaf Ebrahimi pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82*22dc650dSSadaf Ebrahimi allocator->free(ptr, allocator->memory_data);
83*22dc650dSSadaf Ebrahimi }
84*22dc650dSSadaf Ebrahimi
85*22dc650dSSadaf Ebrahimi #include "sljit/sljitLir.c"
86*22dc650dSSadaf Ebrahimi
87*22dc650dSSadaf Ebrahimi #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88*22dc650dSSadaf Ebrahimi #error Unsupported architecture
89*22dc650dSSadaf Ebrahimi #endif
90*22dc650dSSadaf Ebrahimi
91*22dc650dSSadaf Ebrahimi /* Defines for debugging purposes. */
92*22dc650dSSadaf Ebrahimi
93*22dc650dSSadaf Ebrahimi /* 1 - Use unoptimized capturing brackets.
94*22dc650dSSadaf Ebrahimi 2 - Enable capture_last_ptr (includes option 1). */
95*22dc650dSSadaf Ebrahimi /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi /* 1 - Always have a control head. */
98*22dc650dSSadaf Ebrahimi /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99*22dc650dSSadaf Ebrahimi
100*22dc650dSSadaf Ebrahimi /* Allocate memory for the regex stack on the real machine stack.
101*22dc650dSSadaf Ebrahimi Fast, but limited size. */
102*22dc650dSSadaf Ebrahimi #define MACHINE_STACK_SIZE 32768
103*22dc650dSSadaf Ebrahimi
104*22dc650dSSadaf Ebrahimi /* Growth rate for stack allocated by the OS. Should be the multiply
105*22dc650dSSadaf Ebrahimi of page size. */
106*22dc650dSSadaf Ebrahimi #define STACK_GROWTH_RATE 8192
107*22dc650dSSadaf Ebrahimi
108*22dc650dSSadaf Ebrahimi /* Enable to check that the allocation could destroy temporaries. */
109*22dc650dSSadaf Ebrahimi #if defined SLJIT_DEBUG && SLJIT_DEBUG
110*22dc650dSSadaf Ebrahimi #define DESTROY_REGISTERS 1
111*22dc650dSSadaf Ebrahimi #endif
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi /*
114*22dc650dSSadaf Ebrahimi Short summary about the backtracking mechanism empolyed by the jit code generator:
115*22dc650dSSadaf Ebrahimi
116*22dc650dSSadaf Ebrahimi The code generator follows the recursive nature of the PERL compatible regular
117*22dc650dSSadaf Ebrahimi expressions. The basic blocks of regular expressions are condition checkers
118*22dc650dSSadaf Ebrahimi whose execute different commands depending on the result of the condition check.
119*22dc650dSSadaf Ebrahimi The relationship between the operators can be horizontal (concatenation) and
120*22dc650dSSadaf Ebrahimi vertical (sub-expression) (See struct backtrack_common for more details).
121*22dc650dSSadaf Ebrahimi
122*22dc650dSSadaf Ebrahimi 'ab' - 'a' and 'b' regexps are concatenated
123*22dc650dSSadaf Ebrahimi 'a+' - 'a' is the sub-expression of the '+' operator
124*22dc650dSSadaf Ebrahimi
125*22dc650dSSadaf Ebrahimi The condition checkers are boolean (true/false) checkers. Machine code is generated
126*22dc650dSSadaf Ebrahimi for the checker itself and for the actions depending on the result of the checker.
127*22dc650dSSadaf Ebrahimi The 'true' case is called as the matching path (expected path), and the other is called as
128*22dc650dSSadaf Ebrahimi the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129*22dc650dSSadaf Ebrahimi branches on the matching path.
130*22dc650dSSadaf Ebrahimi
131*22dc650dSSadaf Ebrahimi Greedy star operator (*) :
132*22dc650dSSadaf Ebrahimi Matching path: match happens.
133*22dc650dSSadaf Ebrahimi Backtrack path: match failed.
134*22dc650dSSadaf Ebrahimi Non-greedy star operator (*?) :
135*22dc650dSSadaf Ebrahimi Matching path: no need to perform a match.
136*22dc650dSSadaf Ebrahimi Backtrack path: match is required.
137*22dc650dSSadaf Ebrahimi
138*22dc650dSSadaf Ebrahimi The following example shows how the code generated for a capturing bracket
139*22dc650dSSadaf Ebrahimi with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140*22dc650dSSadaf Ebrahimi we have the following regular expression:
141*22dc650dSSadaf Ebrahimi
142*22dc650dSSadaf Ebrahimi A(B|C)D
143*22dc650dSSadaf Ebrahimi
144*22dc650dSSadaf Ebrahimi The generated code will be the following:
145*22dc650dSSadaf Ebrahimi
146*22dc650dSSadaf Ebrahimi A matching path
147*22dc650dSSadaf Ebrahimi '(' matching path (pushing arguments to the stack)
148*22dc650dSSadaf Ebrahimi B matching path
149*22dc650dSSadaf Ebrahimi ')' matching path (pushing arguments to the stack)
150*22dc650dSSadaf Ebrahimi D matching path
151*22dc650dSSadaf Ebrahimi return with successful match
152*22dc650dSSadaf Ebrahimi
153*22dc650dSSadaf Ebrahimi D backtrack path
154*22dc650dSSadaf Ebrahimi ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155*22dc650dSSadaf Ebrahimi B backtrack path
156*22dc650dSSadaf Ebrahimi C expected path
157*22dc650dSSadaf Ebrahimi jump to D matching path
158*22dc650dSSadaf Ebrahimi C backtrack path
159*22dc650dSSadaf Ebrahimi A backtrack path
160*22dc650dSSadaf Ebrahimi
161*22dc650dSSadaf Ebrahimi Notice, that the order of backtrack code paths are the opposite of the fast
162*22dc650dSSadaf Ebrahimi code paths. In this way the topmost value on the stack is always belong
163*22dc650dSSadaf Ebrahimi to the current backtrack code path. The backtrack path must check
164*22dc650dSSadaf Ebrahimi whether there is a next alternative. If so, it needs to jump back to
165*22dc650dSSadaf Ebrahimi the matching path eventually. Otherwise it needs to clear out its own stack
166*22dc650dSSadaf Ebrahimi frame and continue the execution on the backtrack code paths.
167*22dc650dSSadaf Ebrahimi */
168*22dc650dSSadaf Ebrahimi
169*22dc650dSSadaf Ebrahimi /*
170*22dc650dSSadaf Ebrahimi Saved stack frames:
171*22dc650dSSadaf Ebrahimi
172*22dc650dSSadaf Ebrahimi Atomic blocks and asserts require reloading the values of private data
173*22dc650dSSadaf Ebrahimi when the backtrack mechanism performed. Because of OP_RECURSE, the data
174*22dc650dSSadaf Ebrahimi are not necessarly known in compile time, thus we need a dynamic restore
175*22dc650dSSadaf Ebrahimi mechanism.
176*22dc650dSSadaf Ebrahimi
177*22dc650dSSadaf Ebrahimi The stack frames are stored in a chain list, and have the following format:
178*22dc650dSSadaf Ebrahimi ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179*22dc650dSSadaf Ebrahimi
180*22dc650dSSadaf Ebrahimi Thus we can restore the private data to a particular point in the stack.
181*22dc650dSSadaf Ebrahimi */
182*22dc650dSSadaf Ebrahimi
183*22dc650dSSadaf Ebrahimi typedef struct jit_arguments {
184*22dc650dSSadaf Ebrahimi /* Pointers first. */
185*22dc650dSSadaf Ebrahimi struct sljit_stack *stack;
186*22dc650dSSadaf Ebrahimi PCRE2_SPTR str;
187*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin;
188*22dc650dSSadaf Ebrahimi PCRE2_SPTR end;
189*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data;
190*22dc650dSSadaf Ebrahimi PCRE2_SPTR startchar_ptr;
191*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *mark_ptr;
192*22dc650dSSadaf Ebrahimi int (*callout)(pcre2_callout_block *, void *);
193*22dc650dSSadaf Ebrahimi void *callout_data;
194*22dc650dSSadaf Ebrahimi /* Everything else after. */
195*22dc650dSSadaf Ebrahimi sljit_uw offset_limit;
196*22dc650dSSadaf Ebrahimi sljit_u32 limit_match;
197*22dc650dSSadaf Ebrahimi sljit_u32 oveccount;
198*22dc650dSSadaf Ebrahimi sljit_u32 options;
199*22dc650dSSadaf Ebrahimi } jit_arguments;
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi #define JIT_NUMBER_OF_COMPILE_MODES 3
202*22dc650dSSadaf Ebrahimi
203*22dc650dSSadaf Ebrahimi typedef struct executable_functions {
204*22dc650dSSadaf Ebrahimi void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205*22dc650dSSadaf Ebrahimi void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206*22dc650dSSadaf Ebrahimi sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207*22dc650dSSadaf Ebrahimi sljit_u32 top_bracket;
208*22dc650dSSadaf Ebrahimi sljit_u32 limit_match;
209*22dc650dSSadaf Ebrahimi } executable_functions;
210*22dc650dSSadaf Ebrahimi
211*22dc650dSSadaf Ebrahimi typedef struct jump_list {
212*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
213*22dc650dSSadaf Ebrahimi struct jump_list *next;
214*22dc650dSSadaf Ebrahimi } jump_list;
215*22dc650dSSadaf Ebrahimi
216*22dc650dSSadaf Ebrahimi typedef struct stub_list {
217*22dc650dSSadaf Ebrahimi struct sljit_jump *start;
218*22dc650dSSadaf Ebrahimi struct sljit_label *quit;
219*22dc650dSSadaf Ebrahimi struct stub_list *next;
220*22dc650dSSadaf Ebrahimi } stub_list;
221*22dc650dSSadaf Ebrahimi
222*22dc650dSSadaf Ebrahimi enum frame_types {
223*22dc650dSSadaf Ebrahimi no_frame = -1,
224*22dc650dSSadaf Ebrahimi no_stack = -2
225*22dc650dSSadaf Ebrahimi };
226*22dc650dSSadaf Ebrahimi
227*22dc650dSSadaf Ebrahimi enum control_types {
228*22dc650dSSadaf Ebrahimi type_mark = 0,
229*22dc650dSSadaf Ebrahimi type_then_trap = 1
230*22dc650dSSadaf Ebrahimi };
231*22dc650dSSadaf Ebrahimi
232*22dc650dSSadaf Ebrahimi enum early_fail_types {
233*22dc650dSSadaf Ebrahimi type_skip = 0,
234*22dc650dSSadaf Ebrahimi type_fail = 1,
235*22dc650dSSadaf Ebrahimi type_fail_range = 2
236*22dc650dSSadaf Ebrahimi };
237*22dc650dSSadaf Ebrahimi
238*22dc650dSSadaf Ebrahimi typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239*22dc650dSSadaf Ebrahimi
240*22dc650dSSadaf Ebrahimi /* The following structure is the key data type for the recursive
241*22dc650dSSadaf Ebrahimi code generator. It is allocated by compile_matchingpath, and contains
242*22dc650dSSadaf Ebrahimi the arguments for compile_backtrackingpath. Must be the first member
243*22dc650dSSadaf Ebrahimi of its descendants. */
244*22dc650dSSadaf Ebrahimi typedef struct backtrack_common {
245*22dc650dSSadaf Ebrahimi /* Backtracking path of an opcode, which falls back
246*22dc650dSSadaf Ebrahimi to our opcode, if it cannot resume matching. */
247*22dc650dSSadaf Ebrahimi struct backtrack_common *prev;
248*22dc650dSSadaf Ebrahimi /* Backtracks for opcodes without backtracking path.
249*22dc650dSSadaf Ebrahimi These opcodes are between 'prev' and the current
250*22dc650dSSadaf Ebrahimi opcode, and they never resume the match. */
251*22dc650dSSadaf Ebrahimi jump_list *simple_backtracks;
252*22dc650dSSadaf Ebrahimi /* Internal backtracking list for block constructs
253*22dc650dSSadaf Ebrahimi which contains other opcodes, such as brackets,
254*22dc650dSSadaf Ebrahimi asserts, conditionals, etc. */
255*22dc650dSSadaf Ebrahimi struct backtrack_common *top;
256*22dc650dSSadaf Ebrahimi /* Backtracks used internally by the opcode. For component
257*22dc650dSSadaf Ebrahimi opcodes, this list is also used by those opcodes without
258*22dc650dSSadaf Ebrahimi backtracking path which follows the 'top' backtrack. */
259*22dc650dSSadaf Ebrahimi jump_list *own_backtracks;
260*22dc650dSSadaf Ebrahimi /* Opcode pointer. */
261*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc;
262*22dc650dSSadaf Ebrahimi } backtrack_common;
263*22dc650dSSadaf Ebrahimi
264*22dc650dSSadaf Ebrahimi typedef struct assert_backtrack {
265*22dc650dSSadaf Ebrahimi backtrack_common common;
266*22dc650dSSadaf Ebrahimi jump_list *condfailed;
267*22dc650dSSadaf Ebrahimi /* Less than 0 if a frame is not needed. */
268*22dc650dSSadaf Ebrahimi int framesize;
269*22dc650dSSadaf Ebrahimi /* Points to our private memory word on the stack. */
270*22dc650dSSadaf Ebrahimi int private_data_ptr;
271*22dc650dSSadaf Ebrahimi /* For iterators. */
272*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
273*22dc650dSSadaf Ebrahimi } assert_backtrack;
274*22dc650dSSadaf Ebrahimi
275*22dc650dSSadaf Ebrahimi typedef struct bracket_backtrack {
276*22dc650dSSadaf Ebrahimi backtrack_common common;
277*22dc650dSSadaf Ebrahimi /* Where to coninue if an alternative is successfully matched. */
278*22dc650dSSadaf Ebrahimi struct sljit_label *alternative_matchingpath;
279*22dc650dSSadaf Ebrahimi /* For rmin and rmax iterators. */
280*22dc650dSSadaf Ebrahimi struct sljit_label *recursive_matchingpath;
281*22dc650dSSadaf Ebrahimi /* For greedy ? operator. */
282*22dc650dSSadaf Ebrahimi struct sljit_label *zero_matchingpath;
283*22dc650dSSadaf Ebrahimi /* Contains the branches of a failed condition. */
284*22dc650dSSadaf Ebrahimi union {
285*22dc650dSSadaf Ebrahimi /* Both for OP_COND, OP_SCOND. */
286*22dc650dSSadaf Ebrahimi jump_list *condfailed;
287*22dc650dSSadaf Ebrahimi assert_backtrack *assert;
288*22dc650dSSadaf Ebrahimi /* For OP_ONCE. Less than 0 if not needed. */
289*22dc650dSSadaf Ebrahimi int framesize;
290*22dc650dSSadaf Ebrahimi /* For brackets with >3 alternatives. */
291*22dc650dSSadaf Ebrahimi struct sljit_jump *matching_mov_addr;
292*22dc650dSSadaf Ebrahimi } u;
293*22dc650dSSadaf Ebrahimi /* Points to our private memory word on the stack. */
294*22dc650dSSadaf Ebrahimi int private_data_ptr;
295*22dc650dSSadaf Ebrahimi } bracket_backtrack;
296*22dc650dSSadaf Ebrahimi
297*22dc650dSSadaf Ebrahimi typedef struct bracketpos_backtrack {
298*22dc650dSSadaf Ebrahimi backtrack_common common;
299*22dc650dSSadaf Ebrahimi /* Points to our private memory word on the stack. */
300*22dc650dSSadaf Ebrahimi int private_data_ptr;
301*22dc650dSSadaf Ebrahimi /* Reverting stack is needed. */
302*22dc650dSSadaf Ebrahimi int framesize;
303*22dc650dSSadaf Ebrahimi /* Allocated stack size. */
304*22dc650dSSadaf Ebrahimi int stacksize;
305*22dc650dSSadaf Ebrahimi } bracketpos_backtrack;
306*22dc650dSSadaf Ebrahimi
307*22dc650dSSadaf Ebrahimi typedef struct braminzero_backtrack {
308*22dc650dSSadaf Ebrahimi backtrack_common common;
309*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
310*22dc650dSSadaf Ebrahimi } braminzero_backtrack;
311*22dc650dSSadaf Ebrahimi
312*22dc650dSSadaf Ebrahimi typedef struct char_iterator_backtrack {
313*22dc650dSSadaf Ebrahimi backtrack_common common;
314*22dc650dSSadaf Ebrahimi /* Next iteration. */
315*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
316*22dc650dSSadaf Ebrahimi union {
317*22dc650dSSadaf Ebrahimi jump_list *backtracks;
318*22dc650dSSadaf Ebrahimi struct {
319*22dc650dSSadaf Ebrahimi unsigned int othercasebit;
320*22dc650dSSadaf Ebrahimi PCRE2_UCHAR chr;
321*22dc650dSSadaf Ebrahimi BOOL enabled;
322*22dc650dSSadaf Ebrahimi } charpos;
323*22dc650dSSadaf Ebrahimi } u;
324*22dc650dSSadaf Ebrahimi } char_iterator_backtrack;
325*22dc650dSSadaf Ebrahimi
326*22dc650dSSadaf Ebrahimi typedef struct ref_iterator_backtrack {
327*22dc650dSSadaf Ebrahimi backtrack_common common;
328*22dc650dSSadaf Ebrahimi /* Next iteration. */
329*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
330*22dc650dSSadaf Ebrahimi } ref_iterator_backtrack;
331*22dc650dSSadaf Ebrahimi
332*22dc650dSSadaf Ebrahimi typedef struct recurse_entry {
333*22dc650dSSadaf Ebrahimi struct recurse_entry *next;
334*22dc650dSSadaf Ebrahimi /* Contains the function entry label. */
335*22dc650dSSadaf Ebrahimi struct sljit_label *entry_label;
336*22dc650dSSadaf Ebrahimi /* Contains the function entry label. */
337*22dc650dSSadaf Ebrahimi struct sljit_label *backtrack_label;
338*22dc650dSSadaf Ebrahimi /* Collects the entry calls until the function is not created. */
339*22dc650dSSadaf Ebrahimi jump_list *entry_calls;
340*22dc650dSSadaf Ebrahimi /* Collects the backtrack calls until the function is not created. */
341*22dc650dSSadaf Ebrahimi jump_list *backtrack_calls;
342*22dc650dSSadaf Ebrahimi /* Points to the starting opcode. */
343*22dc650dSSadaf Ebrahimi sljit_sw start;
344*22dc650dSSadaf Ebrahimi } recurse_entry;
345*22dc650dSSadaf Ebrahimi
346*22dc650dSSadaf Ebrahimi typedef struct recurse_backtrack {
347*22dc650dSSadaf Ebrahimi backtrack_common common;
348*22dc650dSSadaf Ebrahimi /* Return to the matching path. */
349*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
350*22dc650dSSadaf Ebrahimi /* Recursive pattern. */
351*22dc650dSSadaf Ebrahimi recurse_entry *entry;
352*22dc650dSSadaf Ebrahimi /* Pattern is inlined. */
353*22dc650dSSadaf Ebrahimi BOOL inlined_pattern;
354*22dc650dSSadaf Ebrahimi } recurse_backtrack;
355*22dc650dSSadaf Ebrahimi
356*22dc650dSSadaf Ebrahimi typedef struct vreverse_backtrack {
357*22dc650dSSadaf Ebrahimi backtrack_common common;
358*22dc650dSSadaf Ebrahimi /* Return to the matching path. */
359*22dc650dSSadaf Ebrahimi struct sljit_label *matchingpath;
360*22dc650dSSadaf Ebrahimi } vreverse_backtrack;
361*22dc650dSSadaf Ebrahimi
362*22dc650dSSadaf Ebrahimi #define OP_THEN_TRAP OP_TABLE_LENGTH
363*22dc650dSSadaf Ebrahimi
364*22dc650dSSadaf Ebrahimi typedef struct then_trap_backtrack {
365*22dc650dSSadaf Ebrahimi backtrack_common common;
366*22dc650dSSadaf Ebrahimi /* If then_trap is not NULL, this structure contains the real
367*22dc650dSSadaf Ebrahimi then_trap for the backtracking path. */
368*22dc650dSSadaf Ebrahimi struct then_trap_backtrack *then_trap;
369*22dc650dSSadaf Ebrahimi /* Points to the starting opcode. */
370*22dc650dSSadaf Ebrahimi sljit_sw start;
371*22dc650dSSadaf Ebrahimi /* Exit point for the then opcodes of this alternative. */
372*22dc650dSSadaf Ebrahimi jump_list *quit;
373*22dc650dSSadaf Ebrahimi /* Frame size of the current alternative. */
374*22dc650dSSadaf Ebrahimi int framesize;
375*22dc650dSSadaf Ebrahimi } then_trap_backtrack;
376*22dc650dSSadaf Ebrahimi
377*22dc650dSSadaf Ebrahimi #define MAX_N_CHARS 12
378*22dc650dSSadaf Ebrahimi #define MAX_DIFF_CHARS 5
379*22dc650dSSadaf Ebrahimi
380*22dc650dSSadaf Ebrahimi typedef struct fast_forward_char_data {
381*22dc650dSSadaf Ebrahimi /* Number of characters in the chars array, 255 for any character. */
382*22dc650dSSadaf Ebrahimi sljit_u8 count;
383*22dc650dSSadaf Ebrahimi /* Number of last UTF-8 characters in the chars array. */
384*22dc650dSSadaf Ebrahimi sljit_u8 last_count;
385*22dc650dSSadaf Ebrahimi /* Available characters in the current position. */
386*22dc650dSSadaf Ebrahimi PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387*22dc650dSSadaf Ebrahimi } fast_forward_char_data;
388*22dc650dSSadaf Ebrahimi
389*22dc650dSSadaf Ebrahimi #define MAX_CLASS_RANGE_SIZE 4
390*22dc650dSSadaf Ebrahimi #define MAX_CLASS_CHARS_SIZE 3
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi typedef struct compiler_common {
393*22dc650dSSadaf Ebrahimi /* The sljit ceneric compiler. */
394*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler;
395*22dc650dSSadaf Ebrahimi /* Compiled regular expression. */
396*22dc650dSSadaf Ebrahimi pcre2_real_code *re;
397*22dc650dSSadaf Ebrahimi /* First byte code. */
398*22dc650dSSadaf Ebrahimi PCRE2_SPTR start;
399*22dc650dSSadaf Ebrahimi /* Maps private data offset to each opcode. */
400*22dc650dSSadaf Ebrahimi sljit_s32 *private_data_ptrs;
401*22dc650dSSadaf Ebrahimi /* Chain list of read-only data ptrs. */
402*22dc650dSSadaf Ebrahimi void *read_only_data_head;
403*22dc650dSSadaf Ebrahimi /* Tells whether the capturing bracket is optimized. */
404*22dc650dSSadaf Ebrahimi sljit_u8 *optimized_cbracket;
405*22dc650dSSadaf Ebrahimi /* Tells whether the starting offset is a target of then. */
406*22dc650dSSadaf Ebrahimi sljit_u8 *then_offsets;
407*22dc650dSSadaf Ebrahimi /* Current position where a THEN must jump. */
408*22dc650dSSadaf Ebrahimi then_trap_backtrack *then_trap;
409*22dc650dSSadaf Ebrahimi /* Starting offset of private data for capturing brackets. */
410*22dc650dSSadaf Ebrahimi sljit_s32 cbra_ptr;
411*22dc650dSSadaf Ebrahimi /* Output vector starting point. Must be divisible by 2. */
412*22dc650dSSadaf Ebrahimi sljit_s32 ovector_start;
413*22dc650dSSadaf Ebrahimi /* Points to the starting character of the current match. */
414*22dc650dSSadaf Ebrahimi sljit_s32 start_ptr;
415*22dc650dSSadaf Ebrahimi /* Last known position of the requested byte. */
416*22dc650dSSadaf Ebrahimi sljit_s32 req_char_ptr;
417*22dc650dSSadaf Ebrahimi /* Head of the last recursion. */
418*22dc650dSSadaf Ebrahimi sljit_s32 recursive_head_ptr;
419*22dc650dSSadaf Ebrahimi /* First inspected character for partial matching.
420*22dc650dSSadaf Ebrahimi (Needed for avoiding zero length partial matches.) */
421*22dc650dSSadaf Ebrahimi sljit_s32 start_used_ptr;
422*22dc650dSSadaf Ebrahimi /* Starting pointer for partial soft matches. */
423*22dc650dSSadaf Ebrahimi sljit_s32 hit_start;
424*22dc650dSSadaf Ebrahimi /* Pointer of the match end position. */
425*22dc650dSSadaf Ebrahimi sljit_s32 match_end_ptr;
426*22dc650dSSadaf Ebrahimi /* Points to the marked string. */
427*22dc650dSSadaf Ebrahimi sljit_s32 mark_ptr;
428*22dc650dSSadaf Ebrahimi /* Head of the recursive control verb management chain.
429*22dc650dSSadaf Ebrahimi Each item must have a previous offset and type
430*22dc650dSSadaf Ebrahimi (see control_types) values. See do_search_mark. */
431*22dc650dSSadaf Ebrahimi sljit_s32 control_head_ptr;
432*22dc650dSSadaf Ebrahimi /* Points to the last matched capture block index. */
433*22dc650dSSadaf Ebrahimi sljit_s32 capture_last_ptr;
434*22dc650dSSadaf Ebrahimi /* Fast forward skipping byte code pointer. */
435*22dc650dSSadaf Ebrahimi PCRE2_SPTR fast_forward_bc_ptr;
436*22dc650dSSadaf Ebrahimi /* Locals used by fast fail optimization. */
437*22dc650dSSadaf Ebrahimi sljit_s32 early_fail_start_ptr;
438*22dc650dSSadaf Ebrahimi sljit_s32 early_fail_end_ptr;
439*22dc650dSSadaf Ebrahimi /* Variables used by recursive call generator. */
440*22dc650dSSadaf Ebrahimi sljit_s32 recurse_bitset_size;
441*22dc650dSSadaf Ebrahimi uint8_t *recurse_bitset;
442*22dc650dSSadaf Ebrahimi
443*22dc650dSSadaf Ebrahimi /* Flipped and lower case tables. */
444*22dc650dSSadaf Ebrahimi const sljit_u8 *fcc;
445*22dc650dSSadaf Ebrahimi sljit_sw lcc;
446*22dc650dSSadaf Ebrahimi /* Mode can be PCRE2_JIT_COMPLETE and others. */
447*22dc650dSSadaf Ebrahimi int mode;
448*22dc650dSSadaf Ebrahimi /* TRUE, when empty match is accepted for partial matching. */
449*22dc650dSSadaf Ebrahimi BOOL allow_empty_partial;
450*22dc650dSSadaf Ebrahimi /* TRUE, when minlength is greater than 0. */
451*22dc650dSSadaf Ebrahimi BOOL might_be_empty;
452*22dc650dSSadaf Ebrahimi /* \K is found in the pattern. */
453*22dc650dSSadaf Ebrahimi BOOL has_set_som;
454*22dc650dSSadaf Ebrahimi /* (*SKIP:arg) is found in the pattern. */
455*22dc650dSSadaf Ebrahimi BOOL has_skip_arg;
456*22dc650dSSadaf Ebrahimi /* (*THEN) is found in the pattern. */
457*22dc650dSSadaf Ebrahimi BOOL has_then;
458*22dc650dSSadaf Ebrahimi /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459*22dc650dSSadaf Ebrahimi BOOL has_skip_in_assert_back;
460*22dc650dSSadaf Ebrahimi /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461*22dc650dSSadaf Ebrahimi BOOL local_quit_available;
462*22dc650dSSadaf Ebrahimi /* Currently in a positive assertion. */
463*22dc650dSSadaf Ebrahimi BOOL in_positive_assertion;
464*22dc650dSSadaf Ebrahimi /* Newline control. */
465*22dc650dSSadaf Ebrahimi int nltype;
466*22dc650dSSadaf Ebrahimi sljit_u32 nlmax;
467*22dc650dSSadaf Ebrahimi sljit_u32 nlmin;
468*22dc650dSSadaf Ebrahimi int newline;
469*22dc650dSSadaf Ebrahimi int bsr_nltype;
470*22dc650dSSadaf Ebrahimi sljit_u32 bsr_nlmax;
471*22dc650dSSadaf Ebrahimi sljit_u32 bsr_nlmin;
472*22dc650dSSadaf Ebrahimi /* Dollar endonly. */
473*22dc650dSSadaf Ebrahimi int endonly;
474*22dc650dSSadaf Ebrahimi /* Tables. */
475*22dc650dSSadaf Ebrahimi sljit_sw ctypes;
476*22dc650dSSadaf Ebrahimi /* Named capturing brackets. */
477*22dc650dSSadaf Ebrahimi PCRE2_SPTR name_table;
478*22dc650dSSadaf Ebrahimi sljit_sw name_count;
479*22dc650dSSadaf Ebrahimi sljit_sw name_entry_size;
480*22dc650dSSadaf Ebrahimi
481*22dc650dSSadaf Ebrahimi /* Labels and jump lists. */
482*22dc650dSSadaf Ebrahimi struct sljit_label *partialmatchlabel;
483*22dc650dSSadaf Ebrahimi struct sljit_label *quit_label;
484*22dc650dSSadaf Ebrahimi struct sljit_label *abort_label;
485*22dc650dSSadaf Ebrahimi struct sljit_label *accept_label;
486*22dc650dSSadaf Ebrahimi struct sljit_label *ff_newline_shortcut;
487*22dc650dSSadaf Ebrahimi stub_list *stubs;
488*22dc650dSSadaf Ebrahimi recurse_entry *entries;
489*22dc650dSSadaf Ebrahimi recurse_entry *currententry;
490*22dc650dSSadaf Ebrahimi jump_list *partialmatch;
491*22dc650dSSadaf Ebrahimi jump_list *quit;
492*22dc650dSSadaf Ebrahimi jump_list *positive_assertion_quit;
493*22dc650dSSadaf Ebrahimi jump_list *abort;
494*22dc650dSSadaf Ebrahimi jump_list *failed_match;
495*22dc650dSSadaf Ebrahimi jump_list *accept;
496*22dc650dSSadaf Ebrahimi jump_list *calllimit;
497*22dc650dSSadaf Ebrahimi jump_list *stackalloc;
498*22dc650dSSadaf Ebrahimi jump_list *revertframes;
499*22dc650dSSadaf Ebrahimi jump_list *wordboundary;
500*22dc650dSSadaf Ebrahimi jump_list *ucp_wordboundary;
501*22dc650dSSadaf Ebrahimi jump_list *anynewline;
502*22dc650dSSadaf Ebrahimi jump_list *hspace;
503*22dc650dSSadaf Ebrahimi jump_list *vspace;
504*22dc650dSSadaf Ebrahimi jump_list *casefulcmp;
505*22dc650dSSadaf Ebrahimi jump_list *caselesscmp;
506*22dc650dSSadaf Ebrahimi jump_list *reset_match;
507*22dc650dSSadaf Ebrahimi /* Same as reset_match, but resets the STR_PTR as well. */
508*22dc650dSSadaf Ebrahimi jump_list *restart_match;
509*22dc650dSSadaf Ebrahimi BOOL unset_backref;
510*22dc650dSSadaf Ebrahimi BOOL alt_circumflex;
511*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
512*22dc650dSSadaf Ebrahimi BOOL utf;
513*22dc650dSSadaf Ebrahimi BOOL invalid_utf;
514*22dc650dSSadaf Ebrahimi BOOL ucp;
515*22dc650dSSadaf Ebrahimi /* Points to saving area for iref. */
516*22dc650dSSadaf Ebrahimi sljit_s32 iref_ptr;
517*22dc650dSSadaf Ebrahimi jump_list *getucd;
518*22dc650dSSadaf Ebrahimi jump_list *getucdtype;
519*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
520*22dc650dSSadaf Ebrahimi jump_list *utfreadchar;
521*22dc650dSSadaf Ebrahimi jump_list *utfreadtype8;
522*22dc650dSSadaf Ebrahimi jump_list *utfpeakcharback;
523*22dc650dSSadaf Ebrahimi #endif
524*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525*22dc650dSSadaf Ebrahimi jump_list *utfreadchar_invalid;
526*22dc650dSSadaf Ebrahimi jump_list *utfreadnewline_invalid;
527*22dc650dSSadaf Ebrahimi jump_list *utfmoveback_invalid;
528*22dc650dSSadaf Ebrahimi jump_list *utfpeakcharback_invalid;
529*22dc650dSSadaf Ebrahimi #endif
530*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
531*22dc650dSSadaf Ebrahimi } compiler_common;
532*22dc650dSSadaf Ebrahimi
533*22dc650dSSadaf Ebrahimi /* For byte_sequence_compare. */
534*22dc650dSSadaf Ebrahimi
535*22dc650dSSadaf Ebrahimi typedef struct compare_context {
536*22dc650dSSadaf Ebrahimi int length;
537*22dc650dSSadaf Ebrahimi int sourcereg;
538*22dc650dSSadaf Ebrahimi #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539*22dc650dSSadaf Ebrahimi int ucharptr;
540*22dc650dSSadaf Ebrahimi union {
541*22dc650dSSadaf Ebrahimi sljit_s32 asint;
542*22dc650dSSadaf Ebrahimi sljit_u16 asushort;
543*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
544*22dc650dSSadaf Ebrahimi sljit_u8 asbyte;
545*22dc650dSSadaf Ebrahimi sljit_u8 asuchars[4];
546*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
547*22dc650dSSadaf Ebrahimi sljit_u16 asuchars[2];
548*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
549*22dc650dSSadaf Ebrahimi sljit_u32 asuchars[1];
550*22dc650dSSadaf Ebrahimi #endif
551*22dc650dSSadaf Ebrahimi } c;
552*22dc650dSSadaf Ebrahimi union {
553*22dc650dSSadaf Ebrahimi sljit_s32 asint;
554*22dc650dSSadaf Ebrahimi sljit_u16 asushort;
555*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
556*22dc650dSSadaf Ebrahimi sljit_u8 asbyte;
557*22dc650dSSadaf Ebrahimi sljit_u8 asuchars[4];
558*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
559*22dc650dSSadaf Ebrahimi sljit_u16 asuchars[2];
560*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
561*22dc650dSSadaf Ebrahimi sljit_u32 asuchars[1];
562*22dc650dSSadaf Ebrahimi #endif
563*22dc650dSSadaf Ebrahimi } oc;
564*22dc650dSSadaf Ebrahimi #endif
565*22dc650dSSadaf Ebrahimi } compare_context;
566*22dc650dSSadaf Ebrahimi
567*22dc650dSSadaf Ebrahimi /* Undefine sljit macros. */
568*22dc650dSSadaf Ebrahimi #undef CMP
569*22dc650dSSadaf Ebrahimi
570*22dc650dSSadaf Ebrahimi /* Used for accessing the elements of the stack. */
571*22dc650dSSadaf Ebrahimi #define STACK(i) ((i) * SSIZE_OF(sw))
572*22dc650dSSadaf Ebrahimi
573*22dc650dSSadaf Ebrahimi #ifdef SLJIT_PREF_SHIFT_REG
574*22dc650dSSadaf Ebrahimi #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575*22dc650dSSadaf Ebrahimi /* Nothing. */
576*22dc650dSSadaf Ebrahimi #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577*22dc650dSSadaf Ebrahimi #define SHIFT_REG_IS_R3
578*22dc650dSSadaf Ebrahimi #else
579*22dc650dSSadaf Ebrahimi #error "Unsupported shift register"
580*22dc650dSSadaf Ebrahimi #endif
581*22dc650dSSadaf Ebrahimi #endif
582*22dc650dSSadaf Ebrahimi
583*22dc650dSSadaf Ebrahimi #define TMP1 SLJIT_R0
584*22dc650dSSadaf Ebrahimi #ifdef SHIFT_REG_IS_R3
585*22dc650dSSadaf Ebrahimi #define TMP2 SLJIT_R3
586*22dc650dSSadaf Ebrahimi #define TMP3 SLJIT_R2
587*22dc650dSSadaf Ebrahimi #else
588*22dc650dSSadaf Ebrahimi #define TMP2 SLJIT_R2
589*22dc650dSSadaf Ebrahimi #define TMP3 SLJIT_R3
590*22dc650dSSadaf Ebrahimi #endif
591*22dc650dSSadaf Ebrahimi #define STR_PTR SLJIT_R1
592*22dc650dSSadaf Ebrahimi #define STR_END SLJIT_S0
593*22dc650dSSadaf Ebrahimi #define STACK_TOP SLJIT_S1
594*22dc650dSSadaf Ebrahimi #define STACK_LIMIT SLJIT_S2
595*22dc650dSSadaf Ebrahimi #define COUNT_MATCH SLJIT_S3
596*22dc650dSSadaf Ebrahimi #define ARGUMENTS SLJIT_S4
597*22dc650dSSadaf Ebrahimi #define RETURN_ADDR SLJIT_R4
598*22dc650dSSadaf Ebrahimi
599*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600*22dc650dSSadaf Ebrahimi #define HAS_VIRTUAL_REGISTERS 1
601*22dc650dSSadaf Ebrahimi #else
602*22dc650dSSadaf Ebrahimi #define HAS_VIRTUAL_REGISTERS 0
603*22dc650dSSadaf Ebrahimi #endif
604*22dc650dSSadaf Ebrahimi
605*22dc650dSSadaf Ebrahimi /* Local space layout. */
606*22dc650dSSadaf Ebrahimi /* These two locals can be used by the current opcode. */
607*22dc650dSSadaf Ebrahimi #define LOCALS0 (0 * sizeof(sljit_sw))
608*22dc650dSSadaf Ebrahimi #define LOCALS1 (1 * sizeof(sljit_sw))
609*22dc650dSSadaf Ebrahimi /* Two local variables for possessive quantifiers (char1 cannot use them). */
610*22dc650dSSadaf Ebrahimi #define POSSESSIVE0 (2 * sizeof(sljit_sw))
611*22dc650dSSadaf Ebrahimi #define POSSESSIVE1 (3 * sizeof(sljit_sw))
612*22dc650dSSadaf Ebrahimi /* Max limit of recursions. */
613*22dc650dSSadaf Ebrahimi #define LIMIT_MATCH (4 * sizeof(sljit_sw))
614*22dc650dSSadaf Ebrahimi /* The output vector is stored on the stack, and contains pointers
615*22dc650dSSadaf Ebrahimi to characters. The vector data is divided into two groups: the first
616*22dc650dSSadaf Ebrahimi group contains the start / end character pointers, and the second is
617*22dc650dSSadaf Ebrahimi the start pointers when the end of the capturing group has not yet reached. */
618*22dc650dSSadaf Ebrahimi #define OVECTOR_START (common->ovector_start)
619*22dc650dSSadaf Ebrahimi #define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
620*22dc650dSSadaf Ebrahimi #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
621*22dc650dSSadaf Ebrahimi #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622*22dc650dSSadaf Ebrahimi
623*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
624*22dc650dSSadaf Ebrahimi #define MOV_UCHAR SLJIT_MOV_U8
625*22dc650dSSadaf Ebrahimi #define IN_UCHARS(x) (x)
626*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
627*22dc650dSSadaf Ebrahimi #define MOV_UCHAR SLJIT_MOV_U16
628*22dc650dSSadaf Ebrahimi #define UCHAR_SHIFT (1)
629*22dc650dSSadaf Ebrahimi #define IN_UCHARS(x) ((x) * 2)
630*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
631*22dc650dSSadaf Ebrahimi #define MOV_UCHAR SLJIT_MOV_U32
632*22dc650dSSadaf Ebrahimi #define UCHAR_SHIFT (2)
633*22dc650dSSadaf Ebrahimi #define IN_UCHARS(x) ((x) * 4)
634*22dc650dSSadaf Ebrahimi #else
635*22dc650dSSadaf Ebrahimi #error Unsupported compiling mode
636*22dc650dSSadaf Ebrahimi #endif
637*22dc650dSSadaf Ebrahimi
638*22dc650dSSadaf Ebrahimi /* Shortcuts. */
639*22dc650dSSadaf Ebrahimi #define DEFINE_COMPILER \
640*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler = common->compiler
641*22dc650dSSadaf Ebrahimi #define OP1(op, dst, dstw, src, srcw) \
642*22dc650dSSadaf Ebrahimi sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643*22dc650dSSadaf Ebrahimi #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644*22dc650dSSadaf Ebrahimi sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645*22dc650dSSadaf Ebrahimi #define OP2U(op, src1, src1w, src2, src2w) \
646*22dc650dSSadaf Ebrahimi sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647*22dc650dSSadaf Ebrahimi #define OP_SRC(op, src, srcw) \
648*22dc650dSSadaf Ebrahimi sljit_emit_op_src(compiler, (op), (src), (srcw))
649*22dc650dSSadaf Ebrahimi #define LABEL() \
650*22dc650dSSadaf Ebrahimi sljit_emit_label(compiler)
651*22dc650dSSadaf Ebrahimi #define JUMP(type) \
652*22dc650dSSadaf Ebrahimi sljit_emit_jump(compiler, (type))
653*22dc650dSSadaf Ebrahimi #define JUMPTO(type, label) \
654*22dc650dSSadaf Ebrahimi sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655*22dc650dSSadaf Ebrahimi #define JUMPHERE(jump) \
656*22dc650dSSadaf Ebrahimi sljit_set_label((jump), sljit_emit_label(compiler))
657*22dc650dSSadaf Ebrahimi #define SET_LABEL(jump, label) \
658*22dc650dSSadaf Ebrahimi sljit_set_label((jump), (label))
659*22dc650dSSadaf Ebrahimi #define CMP(type, src1, src1w, src2, src2w) \
660*22dc650dSSadaf Ebrahimi sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661*22dc650dSSadaf Ebrahimi #define CMPTO(type, src1, src1w, src2, src2w, label) \
662*22dc650dSSadaf Ebrahimi sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663*22dc650dSSadaf Ebrahimi #define OP_FLAGS(op, dst, dstw, type) \
664*22dc650dSSadaf Ebrahimi sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665*22dc650dSSadaf Ebrahimi #define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666*22dc650dSSadaf Ebrahimi sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667*22dc650dSSadaf Ebrahimi #define GET_LOCAL_BASE(dst, dstw, offset) \
668*22dc650dSSadaf Ebrahimi sljit_get_local_base(compiler, (dst), (dstw), (offset))
669*22dc650dSSadaf Ebrahimi
670*22dc650dSSadaf Ebrahimi #define READ_CHAR_MAX 0x7fffffff
671*22dc650dSSadaf Ebrahimi
672*22dc650dSSadaf Ebrahimi #define INVALID_UTF_CHAR -1
673*22dc650dSSadaf Ebrahimi #define UNASSIGNED_UTF_CHAR 888
674*22dc650dSSadaf Ebrahimi
675*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
676*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
677*22dc650dSSadaf Ebrahimi
678*22dc650dSSadaf Ebrahimi #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679*22dc650dSSadaf Ebrahimi { \
680*22dc650dSSadaf Ebrahimi if (ptr[0] <= 0x7f) \
681*22dc650dSSadaf Ebrahimi c = *ptr++; \
682*22dc650dSSadaf Ebrahimi else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683*22dc650dSSadaf Ebrahimi { \
684*22dc650dSSadaf Ebrahimi c = ptr[1] - 0x80; \
685*22dc650dSSadaf Ebrahimi \
686*22dc650dSSadaf Ebrahimi if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687*22dc650dSSadaf Ebrahimi { \
688*22dc650dSSadaf Ebrahimi c |= (ptr[0] - 0xc0) << 6; \
689*22dc650dSSadaf Ebrahimi ptr += 2; \
690*22dc650dSSadaf Ebrahimi } \
691*22dc650dSSadaf Ebrahimi else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692*22dc650dSSadaf Ebrahimi { \
693*22dc650dSSadaf Ebrahimi c = c << 6 | (ptr[2] - 0x80); \
694*22dc650dSSadaf Ebrahimi \
695*22dc650dSSadaf Ebrahimi if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696*22dc650dSSadaf Ebrahimi { \
697*22dc650dSSadaf Ebrahimi c |= (ptr[0] - 0xe0) << 12; \
698*22dc650dSSadaf Ebrahimi ptr += 3; \
699*22dc650dSSadaf Ebrahimi \
700*22dc650dSSadaf Ebrahimi if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701*22dc650dSSadaf Ebrahimi { \
702*22dc650dSSadaf Ebrahimi invalid_action; \
703*22dc650dSSadaf Ebrahimi } \
704*22dc650dSSadaf Ebrahimi } \
705*22dc650dSSadaf Ebrahimi else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706*22dc650dSSadaf Ebrahimi { \
707*22dc650dSSadaf Ebrahimi c = c << 6 | (ptr[3] - 0x80); \
708*22dc650dSSadaf Ebrahimi \
709*22dc650dSSadaf Ebrahimi if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710*22dc650dSSadaf Ebrahimi { \
711*22dc650dSSadaf Ebrahimi c |= (ptr[0] - 0xf0) << 18; \
712*22dc650dSSadaf Ebrahimi ptr += 4; \
713*22dc650dSSadaf Ebrahimi \
714*22dc650dSSadaf Ebrahimi if (c >= 0x110000 || c < 0x10000) \
715*22dc650dSSadaf Ebrahimi { \
716*22dc650dSSadaf Ebrahimi invalid_action; \
717*22dc650dSSadaf Ebrahimi } \
718*22dc650dSSadaf Ebrahimi } \
719*22dc650dSSadaf Ebrahimi else \
720*22dc650dSSadaf Ebrahimi { \
721*22dc650dSSadaf Ebrahimi invalid_action; \
722*22dc650dSSadaf Ebrahimi } \
723*22dc650dSSadaf Ebrahimi } \
724*22dc650dSSadaf Ebrahimi else \
725*22dc650dSSadaf Ebrahimi { \
726*22dc650dSSadaf Ebrahimi invalid_action; \
727*22dc650dSSadaf Ebrahimi } \
728*22dc650dSSadaf Ebrahimi } \
729*22dc650dSSadaf Ebrahimi else \
730*22dc650dSSadaf Ebrahimi { \
731*22dc650dSSadaf Ebrahimi invalid_action; \
732*22dc650dSSadaf Ebrahimi } \
733*22dc650dSSadaf Ebrahimi } \
734*22dc650dSSadaf Ebrahimi else \
735*22dc650dSSadaf Ebrahimi { \
736*22dc650dSSadaf Ebrahimi invalid_action; \
737*22dc650dSSadaf Ebrahimi } \
738*22dc650dSSadaf Ebrahimi }
739*22dc650dSSadaf Ebrahimi
740*22dc650dSSadaf Ebrahimi #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741*22dc650dSSadaf Ebrahimi { \
742*22dc650dSSadaf Ebrahimi c = ptr[-1]; \
743*22dc650dSSadaf Ebrahimi if (c <= 0x7f) \
744*22dc650dSSadaf Ebrahimi ptr--; \
745*22dc650dSSadaf Ebrahimi else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746*22dc650dSSadaf Ebrahimi { \
747*22dc650dSSadaf Ebrahimi c -= 0x80; \
748*22dc650dSSadaf Ebrahimi \
749*22dc650dSSadaf Ebrahimi if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750*22dc650dSSadaf Ebrahimi { \
751*22dc650dSSadaf Ebrahimi c |= (ptr[-2] - 0xc0) << 6; \
752*22dc650dSSadaf Ebrahimi ptr -= 2; \
753*22dc650dSSadaf Ebrahimi } \
754*22dc650dSSadaf Ebrahimi else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755*22dc650dSSadaf Ebrahimi { \
756*22dc650dSSadaf Ebrahimi c = c << 6 | (ptr[-2] - 0x80); \
757*22dc650dSSadaf Ebrahimi \
758*22dc650dSSadaf Ebrahimi if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759*22dc650dSSadaf Ebrahimi { \
760*22dc650dSSadaf Ebrahimi c |= (ptr[-3] - 0xe0) << 12; \
761*22dc650dSSadaf Ebrahimi ptr -= 3; \
762*22dc650dSSadaf Ebrahimi \
763*22dc650dSSadaf Ebrahimi if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764*22dc650dSSadaf Ebrahimi { \
765*22dc650dSSadaf Ebrahimi invalid_action; \
766*22dc650dSSadaf Ebrahimi } \
767*22dc650dSSadaf Ebrahimi } \
768*22dc650dSSadaf Ebrahimi else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769*22dc650dSSadaf Ebrahimi { \
770*22dc650dSSadaf Ebrahimi c = c << 6 | (ptr[-3] - 0x80); \
771*22dc650dSSadaf Ebrahimi \
772*22dc650dSSadaf Ebrahimi if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773*22dc650dSSadaf Ebrahimi { \
774*22dc650dSSadaf Ebrahimi c |= (ptr[-4] - 0xf0) << 18; \
775*22dc650dSSadaf Ebrahimi ptr -= 4; \
776*22dc650dSSadaf Ebrahimi \
777*22dc650dSSadaf Ebrahimi if (c >= 0x110000 || c < 0x10000) \
778*22dc650dSSadaf Ebrahimi { \
779*22dc650dSSadaf Ebrahimi invalid_action; \
780*22dc650dSSadaf Ebrahimi } \
781*22dc650dSSadaf Ebrahimi } \
782*22dc650dSSadaf Ebrahimi else \
783*22dc650dSSadaf Ebrahimi { \
784*22dc650dSSadaf Ebrahimi invalid_action; \
785*22dc650dSSadaf Ebrahimi } \
786*22dc650dSSadaf Ebrahimi } \
787*22dc650dSSadaf Ebrahimi else \
788*22dc650dSSadaf Ebrahimi { \
789*22dc650dSSadaf Ebrahimi invalid_action; \
790*22dc650dSSadaf Ebrahimi } \
791*22dc650dSSadaf Ebrahimi } \
792*22dc650dSSadaf Ebrahimi else \
793*22dc650dSSadaf Ebrahimi { \
794*22dc650dSSadaf Ebrahimi invalid_action; \
795*22dc650dSSadaf Ebrahimi } \
796*22dc650dSSadaf Ebrahimi } \
797*22dc650dSSadaf Ebrahimi else \
798*22dc650dSSadaf Ebrahimi { \
799*22dc650dSSadaf Ebrahimi invalid_action; \
800*22dc650dSSadaf Ebrahimi } \
801*22dc650dSSadaf Ebrahimi }
802*22dc650dSSadaf Ebrahimi
803*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
804*22dc650dSSadaf Ebrahimi
805*22dc650dSSadaf Ebrahimi #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806*22dc650dSSadaf Ebrahimi { \
807*22dc650dSSadaf Ebrahimi if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808*22dc650dSSadaf Ebrahimi c = *ptr++; \
809*22dc650dSSadaf Ebrahimi else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810*22dc650dSSadaf Ebrahimi { \
811*22dc650dSSadaf Ebrahimi c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812*22dc650dSSadaf Ebrahimi ptr += 2; \
813*22dc650dSSadaf Ebrahimi } \
814*22dc650dSSadaf Ebrahimi else \
815*22dc650dSSadaf Ebrahimi { \
816*22dc650dSSadaf Ebrahimi invalid_action; \
817*22dc650dSSadaf Ebrahimi } \
818*22dc650dSSadaf Ebrahimi }
819*22dc650dSSadaf Ebrahimi
820*22dc650dSSadaf Ebrahimi #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821*22dc650dSSadaf Ebrahimi { \
822*22dc650dSSadaf Ebrahimi c = ptr[-1]; \
823*22dc650dSSadaf Ebrahimi if (c < 0xd800 || c >= 0xe000) \
824*22dc650dSSadaf Ebrahimi ptr--; \
825*22dc650dSSadaf Ebrahimi else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826*22dc650dSSadaf Ebrahimi { \
827*22dc650dSSadaf Ebrahimi c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828*22dc650dSSadaf Ebrahimi ptr -= 2; \
829*22dc650dSSadaf Ebrahimi } \
830*22dc650dSSadaf Ebrahimi else \
831*22dc650dSSadaf Ebrahimi { \
832*22dc650dSSadaf Ebrahimi invalid_action; \
833*22dc650dSSadaf Ebrahimi } \
834*22dc650dSSadaf Ebrahimi }
835*22dc650dSSadaf Ebrahimi
836*22dc650dSSadaf Ebrahimi
837*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
838*22dc650dSSadaf Ebrahimi
839*22dc650dSSadaf Ebrahimi #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840*22dc650dSSadaf Ebrahimi { \
841*22dc650dSSadaf Ebrahimi if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842*22dc650dSSadaf Ebrahimi c = *ptr++; \
843*22dc650dSSadaf Ebrahimi else \
844*22dc650dSSadaf Ebrahimi { \
845*22dc650dSSadaf Ebrahimi invalid_action; \
846*22dc650dSSadaf Ebrahimi } \
847*22dc650dSSadaf Ebrahimi }
848*22dc650dSSadaf Ebrahimi
849*22dc650dSSadaf Ebrahimi #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850*22dc650dSSadaf Ebrahimi { \
851*22dc650dSSadaf Ebrahimi c = ptr[-1]; \
852*22dc650dSSadaf Ebrahimi if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853*22dc650dSSadaf Ebrahimi ptr--; \
854*22dc650dSSadaf Ebrahimi else \
855*22dc650dSSadaf Ebrahimi { \
856*22dc650dSSadaf Ebrahimi invalid_action; \
857*22dc650dSSadaf Ebrahimi } \
858*22dc650dSSadaf Ebrahimi }
859*22dc650dSSadaf Ebrahimi
860*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
862*22dc650dSSadaf Ebrahimi
bracketend(PCRE2_SPTR cc)863*22dc650dSSadaf Ebrahimi static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864*22dc650dSSadaf Ebrahimi {
865*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866*22dc650dSSadaf Ebrahimi do cc += GET(cc, 1); while (*cc == OP_ALT);
867*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
869*22dc650dSSadaf Ebrahimi return cc;
870*22dc650dSSadaf Ebrahimi }
871*22dc650dSSadaf Ebrahimi
no_alternatives(PCRE2_SPTR cc)872*22dc650dSSadaf Ebrahimi static int no_alternatives(PCRE2_SPTR cc)
873*22dc650dSSadaf Ebrahimi {
874*22dc650dSSadaf Ebrahimi int count = 0;
875*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876*22dc650dSSadaf Ebrahimi do
877*22dc650dSSadaf Ebrahimi {
878*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
879*22dc650dSSadaf Ebrahimi count++;
880*22dc650dSSadaf Ebrahimi }
881*22dc650dSSadaf Ebrahimi while (*cc == OP_ALT);
882*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883*22dc650dSSadaf Ebrahimi return count;
884*22dc650dSSadaf Ebrahimi }
885*22dc650dSSadaf Ebrahimi
find_vreverse(PCRE2_SPTR cc)886*22dc650dSSadaf Ebrahimi static BOOL find_vreverse(PCRE2_SPTR cc)
887*22dc650dSSadaf Ebrahimi {
888*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
889*22dc650dSSadaf Ebrahimi
890*22dc650dSSadaf Ebrahimi do
891*22dc650dSSadaf Ebrahimi {
892*22dc650dSSadaf Ebrahimi if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893*22dc650dSSadaf Ebrahimi return TRUE;
894*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
895*22dc650dSSadaf Ebrahimi }
896*22dc650dSSadaf Ebrahimi while (*cc == OP_ALT);
897*22dc650dSSadaf Ebrahimi
898*22dc650dSSadaf Ebrahimi return FALSE;
899*22dc650dSSadaf Ebrahimi }
900*22dc650dSSadaf Ebrahimi
901*22dc650dSSadaf Ebrahimi /* Functions whose might need modification for all new supported opcodes:
902*22dc650dSSadaf Ebrahimi next_opcode
903*22dc650dSSadaf Ebrahimi check_opcode_types
904*22dc650dSSadaf Ebrahimi set_private_data_ptrs
905*22dc650dSSadaf Ebrahimi get_framesize
906*22dc650dSSadaf Ebrahimi init_frame
907*22dc650dSSadaf Ebrahimi get_recurse_data_length
908*22dc650dSSadaf Ebrahimi copy_recurse_data
909*22dc650dSSadaf Ebrahimi compile_matchingpath
910*22dc650dSSadaf Ebrahimi compile_backtrackingpath
911*22dc650dSSadaf Ebrahimi */
912*22dc650dSSadaf Ebrahimi
next_opcode(compiler_common * common,PCRE2_SPTR cc)913*22dc650dSSadaf Ebrahimi static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914*22dc650dSSadaf Ebrahimi {
915*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(common);
916*22dc650dSSadaf Ebrahimi switch(*cc)
917*22dc650dSSadaf Ebrahimi {
918*22dc650dSSadaf Ebrahimi case OP_SOD:
919*22dc650dSSadaf Ebrahimi case OP_SOM:
920*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
921*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
922*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
923*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
924*22dc650dSSadaf Ebrahimi case OP_DIGIT:
925*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
926*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
927*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
928*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
929*22dc650dSSadaf Ebrahimi case OP_ANY:
930*22dc650dSSadaf Ebrahimi case OP_ALLANY:
931*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
932*22dc650dSSadaf Ebrahimi case OP_PROP:
933*22dc650dSSadaf Ebrahimi case OP_ANYNL:
934*22dc650dSSadaf Ebrahimi case OP_NOT_HSPACE:
935*22dc650dSSadaf Ebrahimi case OP_HSPACE:
936*22dc650dSSadaf Ebrahimi case OP_NOT_VSPACE:
937*22dc650dSSadaf Ebrahimi case OP_VSPACE:
938*22dc650dSSadaf Ebrahimi case OP_EXTUNI:
939*22dc650dSSadaf Ebrahimi case OP_EODN:
940*22dc650dSSadaf Ebrahimi case OP_EOD:
941*22dc650dSSadaf Ebrahimi case OP_CIRC:
942*22dc650dSSadaf Ebrahimi case OP_CIRCM:
943*22dc650dSSadaf Ebrahimi case OP_DOLL:
944*22dc650dSSadaf Ebrahimi case OP_DOLLM:
945*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
946*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
947*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
948*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
949*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
950*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
951*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
952*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
953*22dc650dSSadaf Ebrahimi case OP_CRPOSSTAR:
954*22dc650dSSadaf Ebrahimi case OP_CRPOSPLUS:
955*22dc650dSSadaf Ebrahimi case OP_CRPOSQUERY:
956*22dc650dSSadaf Ebrahimi case OP_CRPOSRANGE:
957*22dc650dSSadaf Ebrahimi case OP_CLASS:
958*22dc650dSSadaf Ebrahimi case OP_NCLASS:
959*22dc650dSSadaf Ebrahimi case OP_REF:
960*22dc650dSSadaf Ebrahimi case OP_REFI:
961*22dc650dSSadaf Ebrahimi case OP_DNREF:
962*22dc650dSSadaf Ebrahimi case OP_DNREFI:
963*22dc650dSSadaf Ebrahimi case OP_RECURSE:
964*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
965*22dc650dSSadaf Ebrahimi case OP_ALT:
966*22dc650dSSadaf Ebrahimi case OP_KET:
967*22dc650dSSadaf Ebrahimi case OP_KETRMAX:
968*22dc650dSSadaf Ebrahimi case OP_KETRMIN:
969*22dc650dSSadaf Ebrahimi case OP_KETRPOS:
970*22dc650dSSadaf Ebrahimi case OP_REVERSE:
971*22dc650dSSadaf Ebrahimi case OP_VREVERSE:
972*22dc650dSSadaf Ebrahimi case OP_ASSERT:
973*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
974*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
975*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
976*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
977*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
978*22dc650dSSadaf Ebrahimi case OP_ONCE:
979*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
980*22dc650dSSadaf Ebrahimi case OP_BRA:
981*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
982*22dc650dSSadaf Ebrahimi case OP_CBRA:
983*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
984*22dc650dSSadaf Ebrahimi case OP_COND:
985*22dc650dSSadaf Ebrahimi case OP_SBRA:
986*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
987*22dc650dSSadaf Ebrahimi case OP_SCBRA:
988*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
989*22dc650dSSadaf Ebrahimi case OP_SCOND:
990*22dc650dSSadaf Ebrahimi case OP_CREF:
991*22dc650dSSadaf Ebrahimi case OP_DNCREF:
992*22dc650dSSadaf Ebrahimi case OP_RREF:
993*22dc650dSSadaf Ebrahimi case OP_DNRREF:
994*22dc650dSSadaf Ebrahimi case OP_FALSE:
995*22dc650dSSadaf Ebrahimi case OP_TRUE:
996*22dc650dSSadaf Ebrahimi case OP_BRAZERO:
997*22dc650dSSadaf Ebrahimi case OP_BRAMINZERO:
998*22dc650dSSadaf Ebrahimi case OP_BRAPOSZERO:
999*22dc650dSSadaf Ebrahimi case OP_PRUNE:
1000*22dc650dSSadaf Ebrahimi case OP_SKIP:
1001*22dc650dSSadaf Ebrahimi case OP_THEN:
1002*22dc650dSSadaf Ebrahimi case OP_COMMIT:
1003*22dc650dSSadaf Ebrahimi case OP_FAIL:
1004*22dc650dSSadaf Ebrahimi case OP_ACCEPT:
1005*22dc650dSSadaf Ebrahimi case OP_ASSERT_ACCEPT:
1006*22dc650dSSadaf Ebrahimi case OP_CLOSE:
1007*22dc650dSSadaf Ebrahimi case OP_SKIPZERO:
1008*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
1009*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
1010*22dc650dSSadaf Ebrahimi return cc + PRIV(OP_lengths)[*cc];
1011*22dc650dSSadaf Ebrahimi
1012*22dc650dSSadaf Ebrahimi case OP_CHAR:
1013*22dc650dSSadaf Ebrahimi case OP_CHARI:
1014*22dc650dSSadaf Ebrahimi case OP_NOT:
1015*22dc650dSSadaf Ebrahimi case OP_NOTI:
1016*22dc650dSSadaf Ebrahimi case OP_STAR:
1017*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
1018*22dc650dSSadaf Ebrahimi case OP_PLUS:
1019*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
1020*22dc650dSSadaf Ebrahimi case OP_QUERY:
1021*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
1022*22dc650dSSadaf Ebrahimi case OP_UPTO:
1023*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
1024*22dc650dSSadaf Ebrahimi case OP_EXACT:
1025*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
1026*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
1027*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
1028*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
1029*22dc650dSSadaf Ebrahimi case OP_STARI:
1030*22dc650dSSadaf Ebrahimi case OP_MINSTARI:
1031*22dc650dSSadaf Ebrahimi case OP_PLUSI:
1032*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
1033*22dc650dSSadaf Ebrahimi case OP_QUERYI:
1034*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
1035*22dc650dSSadaf Ebrahimi case OP_UPTOI:
1036*22dc650dSSadaf Ebrahimi case OP_MINUPTOI:
1037*22dc650dSSadaf Ebrahimi case OP_EXACTI:
1038*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
1039*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
1040*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
1041*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
1042*22dc650dSSadaf Ebrahimi case OP_NOTSTAR:
1043*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR:
1044*22dc650dSSadaf Ebrahimi case OP_NOTPLUS:
1045*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS:
1046*22dc650dSSadaf Ebrahimi case OP_NOTQUERY:
1047*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY:
1048*22dc650dSSadaf Ebrahimi case OP_NOTUPTO:
1049*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO:
1050*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
1051*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
1052*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
1053*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
1054*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
1055*22dc650dSSadaf Ebrahimi case OP_NOTSTARI:
1056*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI:
1057*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
1058*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI:
1059*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI:
1060*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
1061*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI:
1062*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
1063*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
1064*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
1065*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
1066*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
1067*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
1068*22dc650dSSadaf Ebrahimi cc += PRIV(OP_lengths)[*cc];
1069*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1070*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071*22dc650dSSadaf Ebrahimi #endif
1072*22dc650dSSadaf Ebrahimi return cc;
1073*22dc650dSSadaf Ebrahimi
1074*22dc650dSSadaf Ebrahimi /* Special cases. */
1075*22dc650dSSadaf Ebrahimi case OP_TYPESTAR:
1076*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR:
1077*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
1078*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS:
1079*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY:
1080*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
1081*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
1082*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
1083*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
1084*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
1085*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
1086*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
1087*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
1088*22dc650dSSadaf Ebrahimi return cc + PRIV(OP_lengths)[*cc] - 1;
1089*22dc650dSSadaf Ebrahimi
1090*22dc650dSSadaf Ebrahimi case OP_ANYBYTE:
1091*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1092*22dc650dSSadaf Ebrahimi if (common->utf) return NULL;
1093*22dc650dSSadaf Ebrahimi #endif
1094*22dc650dSSadaf Ebrahimi return cc + 1;
1095*22dc650dSSadaf Ebrahimi
1096*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
1097*22dc650dSSadaf Ebrahimi return cc + GET(cc, 1 + 2*LINK_SIZE);
1098*22dc650dSSadaf Ebrahimi
1099*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100*22dc650dSSadaf Ebrahimi case OP_XCLASS:
1101*22dc650dSSadaf Ebrahimi return cc + GET(cc, 1);
1102*22dc650dSSadaf Ebrahimi #endif
1103*22dc650dSSadaf Ebrahimi
1104*22dc650dSSadaf Ebrahimi case OP_MARK:
1105*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
1106*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
1107*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
1108*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
1109*22dc650dSSadaf Ebrahimi return cc + 1 + 2 + cc[1];
1110*22dc650dSSadaf Ebrahimi
1111*22dc650dSSadaf Ebrahimi default:
1112*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
1113*22dc650dSSadaf Ebrahimi return NULL;
1114*22dc650dSSadaf Ebrahimi }
1115*22dc650dSSadaf Ebrahimi }
1116*22dc650dSSadaf Ebrahimi
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1117*22dc650dSSadaf Ebrahimi static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118*22dc650dSSadaf Ebrahimi {
1119*22dc650dSSadaf Ebrahimi int count;
1120*22dc650dSSadaf Ebrahimi PCRE2_SPTR slot;
1121*22dc650dSSadaf Ebrahimi PCRE2_SPTR assert_back_end = cc - 1;
1122*22dc650dSSadaf Ebrahimi PCRE2_SPTR assert_na_end = cc - 1;
1123*22dc650dSSadaf Ebrahimi
1124*22dc650dSSadaf Ebrahimi /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125*22dc650dSSadaf Ebrahimi while (cc < ccend)
1126*22dc650dSSadaf Ebrahimi {
1127*22dc650dSSadaf Ebrahimi switch(*cc)
1128*22dc650dSSadaf Ebrahimi {
1129*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
1130*22dc650dSSadaf Ebrahimi common->has_set_som = TRUE;
1131*22dc650dSSadaf Ebrahimi common->might_be_empty = TRUE;
1132*22dc650dSSadaf Ebrahimi cc += 1;
1133*22dc650dSSadaf Ebrahimi break;
1134*22dc650dSSadaf Ebrahimi
1135*22dc650dSSadaf Ebrahimi case OP_REFI:
1136*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1137*22dc650dSSadaf Ebrahimi if (common->iref_ptr == 0)
1138*22dc650dSSadaf Ebrahimi {
1139*22dc650dSSadaf Ebrahimi common->iref_ptr = common->ovector_start;
1140*22dc650dSSadaf Ebrahimi common->ovector_start += 3 * sizeof(sljit_sw);
1141*22dc650dSSadaf Ebrahimi }
1142*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
1143*22dc650dSSadaf Ebrahimi /* Fall through. */
1144*22dc650dSSadaf Ebrahimi case OP_REF:
1145*22dc650dSSadaf Ebrahimi common->optimized_cbracket[GET2(cc, 1)] = 0;
1146*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
1147*22dc650dSSadaf Ebrahimi break;
1148*22dc650dSSadaf Ebrahimi
1149*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
1150*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
1151*22dc650dSSadaf Ebrahimi slot = bracketend(cc);
1152*22dc650dSSadaf Ebrahimi if (slot > assert_na_end)
1153*22dc650dSSadaf Ebrahimi assert_na_end = slot;
1154*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1155*22dc650dSSadaf Ebrahimi break;
1156*22dc650dSSadaf Ebrahimi
1157*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
1158*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
1159*22dc650dSSadaf Ebrahimi common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
1161*22dc650dSSadaf Ebrahimi break;
1162*22dc650dSSadaf Ebrahimi
1163*22dc650dSSadaf Ebrahimi case OP_COND:
1164*22dc650dSSadaf Ebrahimi case OP_SCOND:
1165*22dc650dSSadaf Ebrahimi /* Only AUTO_CALLOUT can insert this opcode. We do
1166*22dc650dSSadaf Ebrahimi not intend to support this case. */
1167*22dc650dSSadaf Ebrahimi if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168*22dc650dSSadaf Ebrahimi return FALSE;
1169*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1170*22dc650dSSadaf Ebrahimi break;
1171*22dc650dSSadaf Ebrahimi
1172*22dc650dSSadaf Ebrahimi case OP_CREF:
1173*22dc650dSSadaf Ebrahimi common->optimized_cbracket[GET2(cc, 1)] = 0;
1174*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
1175*22dc650dSSadaf Ebrahimi break;
1176*22dc650dSSadaf Ebrahimi
1177*22dc650dSSadaf Ebrahimi case OP_DNREF:
1178*22dc650dSSadaf Ebrahimi case OP_DNREFI:
1179*22dc650dSSadaf Ebrahimi case OP_DNCREF:
1180*22dc650dSSadaf Ebrahimi count = GET2(cc, 1 + IMM2_SIZE);
1181*22dc650dSSadaf Ebrahimi slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182*22dc650dSSadaf Ebrahimi while (count-- > 0)
1183*22dc650dSSadaf Ebrahimi {
1184*22dc650dSSadaf Ebrahimi common->optimized_cbracket[GET2(slot, 0)] = 0;
1185*22dc650dSSadaf Ebrahimi slot += common->name_entry_size;
1186*22dc650dSSadaf Ebrahimi }
1187*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
1188*22dc650dSSadaf Ebrahimi break;
1189*22dc650dSSadaf Ebrahimi
1190*22dc650dSSadaf Ebrahimi case OP_RECURSE:
1191*22dc650dSSadaf Ebrahimi /* Set its value only once. */
1192*22dc650dSSadaf Ebrahimi if (common->recursive_head_ptr == 0)
1193*22dc650dSSadaf Ebrahimi {
1194*22dc650dSSadaf Ebrahimi common->recursive_head_ptr = common->ovector_start;
1195*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
1196*22dc650dSSadaf Ebrahimi }
1197*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1198*22dc650dSSadaf Ebrahimi break;
1199*22dc650dSSadaf Ebrahimi
1200*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
1201*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
1202*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr == 0)
1203*22dc650dSSadaf Ebrahimi {
1204*22dc650dSSadaf Ebrahimi common->capture_last_ptr = common->ovector_start;
1205*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
1206*22dc650dSSadaf Ebrahimi }
1207*22dc650dSSadaf Ebrahimi cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208*22dc650dSSadaf Ebrahimi break;
1209*22dc650dSSadaf Ebrahimi
1210*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
1211*22dc650dSSadaf Ebrahimi slot = bracketend(cc);
1212*22dc650dSSadaf Ebrahimi if (slot > assert_back_end)
1213*22dc650dSSadaf Ebrahimi assert_back_end = slot;
1214*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1215*22dc650dSSadaf Ebrahimi break;
1216*22dc650dSSadaf Ebrahimi
1217*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
1218*22dc650dSSadaf Ebrahimi common->has_then = TRUE;
1219*22dc650dSSadaf Ebrahimi common->control_head_ptr = 1;
1220*22dc650dSSadaf Ebrahimi /* Fall through. */
1221*22dc650dSSadaf Ebrahimi
1222*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
1223*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
1224*22dc650dSSadaf Ebrahimi if (cc < assert_na_end)
1225*22dc650dSSadaf Ebrahimi return FALSE;
1226*22dc650dSSadaf Ebrahimi /* Fall through */
1227*22dc650dSSadaf Ebrahimi case OP_MARK:
1228*22dc650dSSadaf Ebrahimi if (common->mark_ptr == 0)
1229*22dc650dSSadaf Ebrahimi {
1230*22dc650dSSadaf Ebrahimi common->mark_ptr = common->ovector_start;
1231*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
1232*22dc650dSSadaf Ebrahimi }
1233*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
1234*22dc650dSSadaf Ebrahimi break;
1235*22dc650dSSadaf Ebrahimi
1236*22dc650dSSadaf Ebrahimi case OP_THEN:
1237*22dc650dSSadaf Ebrahimi common->has_then = TRUE;
1238*22dc650dSSadaf Ebrahimi common->control_head_ptr = 1;
1239*22dc650dSSadaf Ebrahimi cc += 1;
1240*22dc650dSSadaf Ebrahimi break;
1241*22dc650dSSadaf Ebrahimi
1242*22dc650dSSadaf Ebrahimi case OP_SKIP:
1243*22dc650dSSadaf Ebrahimi if (cc < assert_back_end)
1244*22dc650dSSadaf Ebrahimi common->has_skip_in_assert_back = TRUE;
1245*22dc650dSSadaf Ebrahimi if (cc < assert_na_end)
1246*22dc650dSSadaf Ebrahimi return FALSE;
1247*22dc650dSSadaf Ebrahimi cc += 1;
1248*22dc650dSSadaf Ebrahimi break;
1249*22dc650dSSadaf Ebrahimi
1250*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
1251*22dc650dSSadaf Ebrahimi common->control_head_ptr = 1;
1252*22dc650dSSadaf Ebrahimi common->has_skip_arg = TRUE;
1253*22dc650dSSadaf Ebrahimi if (cc < assert_back_end)
1254*22dc650dSSadaf Ebrahimi common->has_skip_in_assert_back = TRUE;
1255*22dc650dSSadaf Ebrahimi if (cc < assert_na_end)
1256*22dc650dSSadaf Ebrahimi return FALSE;
1257*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
1258*22dc650dSSadaf Ebrahimi break;
1259*22dc650dSSadaf Ebrahimi
1260*22dc650dSSadaf Ebrahimi case OP_PRUNE:
1261*22dc650dSSadaf Ebrahimi case OP_COMMIT:
1262*22dc650dSSadaf Ebrahimi case OP_ASSERT_ACCEPT:
1263*22dc650dSSadaf Ebrahimi if (cc < assert_na_end)
1264*22dc650dSSadaf Ebrahimi return FALSE;
1265*22dc650dSSadaf Ebrahimi cc++;
1266*22dc650dSSadaf Ebrahimi break;
1267*22dc650dSSadaf Ebrahimi
1268*22dc650dSSadaf Ebrahimi default:
1269*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
1270*22dc650dSSadaf Ebrahimi if (cc == NULL)
1271*22dc650dSSadaf Ebrahimi return FALSE;
1272*22dc650dSSadaf Ebrahimi break;
1273*22dc650dSSadaf Ebrahimi }
1274*22dc650dSSadaf Ebrahimi }
1275*22dc650dSSadaf Ebrahimi return TRUE;
1276*22dc650dSSadaf Ebrahimi }
1277*22dc650dSSadaf Ebrahimi
1278*22dc650dSSadaf Ebrahimi #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279*22dc650dSSadaf Ebrahimi
1280*22dc650dSSadaf Ebrahimi /*
1281*22dc650dSSadaf Ebrahimi Start represent the number of allowed early fail enhancements
1282*22dc650dSSadaf Ebrahimi
1283*22dc650dSSadaf Ebrahimi The 0-2 values has a special meaning:
1284*22dc650dSSadaf Ebrahimi 0 - skip is allowed for all iterators
1285*22dc650dSSadaf Ebrahimi 1 - fail is allowed for all iterators
1286*22dc650dSSadaf Ebrahimi 2 - fail is allowed for greedy iterators
1287*22dc650dSSadaf Ebrahimi 3 - only ranged early fail is allowed
1288*22dc650dSSadaf Ebrahimi >3 - (start - 3) number of remaining ranged early fails allowed
1289*22dc650dSSadaf Ebrahimi
1290*22dc650dSSadaf Ebrahimi return: the updated value of start
1291*22dc650dSSadaf Ebrahimi */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1292*22dc650dSSadaf Ebrahimi static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293*22dc650dSSadaf Ebrahimi int *private_data_start, sljit_s32 depth, int start)
1294*22dc650dSSadaf Ebrahimi {
1295*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin = cc;
1296*22dc650dSSadaf Ebrahimi PCRE2_SPTR next_alt;
1297*22dc650dSSadaf Ebrahimi PCRE2_SPTR end;
1298*22dc650dSSadaf Ebrahimi PCRE2_SPTR accelerated_start;
1299*22dc650dSSadaf Ebrahimi int result = 0;
1300*22dc650dSSadaf Ebrahimi int count, prev_count;
1301*22dc650dSSadaf Ebrahimi
1302*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305*22dc650dSSadaf Ebrahimi
1306*22dc650dSSadaf Ebrahimi next_alt = cc + GET(cc, 1);
1307*22dc650dSSadaf Ebrahimi if (*next_alt == OP_ALT && start < 1)
1308*22dc650dSSadaf Ebrahimi start = 1;
1309*22dc650dSSadaf Ebrahimi
1310*22dc650dSSadaf Ebrahimi do
1311*22dc650dSSadaf Ebrahimi {
1312*22dc650dSSadaf Ebrahimi count = start;
1313*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314*22dc650dSSadaf Ebrahimi
1315*22dc650dSSadaf Ebrahimi while (TRUE)
1316*22dc650dSSadaf Ebrahimi {
1317*22dc650dSSadaf Ebrahimi accelerated_start = NULL;
1318*22dc650dSSadaf Ebrahimi
1319*22dc650dSSadaf Ebrahimi switch(*cc)
1320*22dc650dSSadaf Ebrahimi {
1321*22dc650dSSadaf Ebrahimi case OP_SOD:
1322*22dc650dSSadaf Ebrahimi case OP_SOM:
1323*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
1324*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
1325*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
1326*22dc650dSSadaf Ebrahimi case OP_EODN:
1327*22dc650dSSadaf Ebrahimi case OP_EOD:
1328*22dc650dSSadaf Ebrahimi case OP_CIRC:
1329*22dc650dSSadaf Ebrahimi case OP_CIRCM:
1330*22dc650dSSadaf Ebrahimi case OP_DOLL:
1331*22dc650dSSadaf Ebrahimi case OP_DOLLM:
1332*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
1333*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
1334*22dc650dSSadaf Ebrahimi /* Zero width assertions. */
1335*22dc650dSSadaf Ebrahimi cc++;
1336*22dc650dSSadaf Ebrahimi continue;
1337*22dc650dSSadaf Ebrahimi
1338*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
1339*22dc650dSSadaf Ebrahimi case OP_DIGIT:
1340*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
1341*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
1342*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
1343*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
1344*22dc650dSSadaf Ebrahimi case OP_ANY:
1345*22dc650dSSadaf Ebrahimi case OP_ALLANY:
1346*22dc650dSSadaf Ebrahimi case OP_ANYBYTE:
1347*22dc650dSSadaf Ebrahimi case OP_NOT_HSPACE:
1348*22dc650dSSadaf Ebrahimi case OP_HSPACE:
1349*22dc650dSSadaf Ebrahimi case OP_NOT_VSPACE:
1350*22dc650dSSadaf Ebrahimi case OP_VSPACE:
1351*22dc650dSSadaf Ebrahimi if (count < 1)
1352*22dc650dSSadaf Ebrahimi count = 1;
1353*22dc650dSSadaf Ebrahimi cc++;
1354*22dc650dSSadaf Ebrahimi continue;
1355*22dc650dSSadaf Ebrahimi
1356*22dc650dSSadaf Ebrahimi case OP_ANYNL:
1357*22dc650dSSadaf Ebrahimi case OP_EXTUNI:
1358*22dc650dSSadaf Ebrahimi if (count < 3)
1359*22dc650dSSadaf Ebrahimi count = 3;
1360*22dc650dSSadaf Ebrahimi cc++;
1361*22dc650dSSadaf Ebrahimi continue;
1362*22dc650dSSadaf Ebrahimi
1363*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
1364*22dc650dSSadaf Ebrahimi case OP_PROP:
1365*22dc650dSSadaf Ebrahimi if (count < 1)
1366*22dc650dSSadaf Ebrahimi count = 1;
1367*22dc650dSSadaf Ebrahimi cc += 1 + 2;
1368*22dc650dSSadaf Ebrahimi continue;
1369*22dc650dSSadaf Ebrahimi
1370*22dc650dSSadaf Ebrahimi case OP_CHAR:
1371*22dc650dSSadaf Ebrahimi case OP_CHARI:
1372*22dc650dSSadaf Ebrahimi case OP_NOT:
1373*22dc650dSSadaf Ebrahimi case OP_NOTI:
1374*22dc650dSSadaf Ebrahimi if (count < 1)
1375*22dc650dSSadaf Ebrahimi count = 1;
1376*22dc650dSSadaf Ebrahimi cc += 2;
1377*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1378*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379*22dc650dSSadaf Ebrahimi #endif
1380*22dc650dSSadaf Ebrahimi continue;
1381*22dc650dSSadaf Ebrahimi
1382*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR:
1383*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS:
1384*22dc650dSSadaf Ebrahimi if (count == 2)
1385*22dc650dSSadaf Ebrahimi count = 3;
1386*22dc650dSSadaf Ebrahimi /* Fall through */
1387*22dc650dSSadaf Ebrahimi
1388*22dc650dSSadaf Ebrahimi case OP_TYPESTAR:
1389*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
1390*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
1391*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
1392*22dc650dSSadaf Ebrahimi /* The type or prop opcode is skipped in the next iteration. */
1393*22dc650dSSadaf Ebrahimi cc += 1;
1394*22dc650dSSadaf Ebrahimi
1395*22dc650dSSadaf Ebrahimi if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396*22dc650dSSadaf Ebrahimi {
1397*22dc650dSSadaf Ebrahimi accelerated_start = cc - 1;
1398*22dc650dSSadaf Ebrahimi break;
1399*22dc650dSSadaf Ebrahimi }
1400*22dc650dSSadaf Ebrahimi
1401*22dc650dSSadaf Ebrahimi if (count < 3)
1402*22dc650dSSadaf Ebrahimi count = 3;
1403*22dc650dSSadaf Ebrahimi continue;
1404*22dc650dSSadaf Ebrahimi
1405*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
1406*22dc650dSSadaf Ebrahimi if (count < 1)
1407*22dc650dSSadaf Ebrahimi count = 1;
1408*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
1409*22dc650dSSadaf Ebrahimi continue;
1410*22dc650dSSadaf Ebrahimi
1411*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
1412*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
1413*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
1414*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
1415*22dc650dSSadaf Ebrahimi /* Fall through */
1416*22dc650dSSadaf Ebrahimi
1417*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY:
1418*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
1419*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
1420*22dc650dSSadaf Ebrahimi /* The type or prop opcode is skipped in the next iteration. */
1421*22dc650dSSadaf Ebrahimi if (count < 3)
1422*22dc650dSSadaf Ebrahimi count = 3;
1423*22dc650dSSadaf Ebrahimi cc += 1;
1424*22dc650dSSadaf Ebrahimi continue;
1425*22dc650dSSadaf Ebrahimi
1426*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
1427*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
1428*22dc650dSSadaf Ebrahimi case OP_MINSTARI:
1429*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
1430*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR:
1431*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS:
1432*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI:
1433*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI:
1434*22dc650dSSadaf Ebrahimi if (count == 2)
1435*22dc650dSSadaf Ebrahimi count = 3;
1436*22dc650dSSadaf Ebrahimi /* Fall through */
1437*22dc650dSSadaf Ebrahimi
1438*22dc650dSSadaf Ebrahimi case OP_STAR:
1439*22dc650dSSadaf Ebrahimi case OP_PLUS:
1440*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
1441*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
1442*22dc650dSSadaf Ebrahimi
1443*22dc650dSSadaf Ebrahimi case OP_STARI:
1444*22dc650dSSadaf Ebrahimi case OP_PLUSI:
1445*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
1446*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
1447*22dc650dSSadaf Ebrahimi
1448*22dc650dSSadaf Ebrahimi case OP_NOTSTAR:
1449*22dc650dSSadaf Ebrahimi case OP_NOTPLUS:
1450*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
1451*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
1452*22dc650dSSadaf Ebrahimi
1453*22dc650dSSadaf Ebrahimi case OP_NOTSTARI:
1454*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
1455*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
1456*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
1457*22dc650dSSadaf Ebrahimi accelerated_start = cc;
1458*22dc650dSSadaf Ebrahimi cc += 2;
1459*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1460*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461*22dc650dSSadaf Ebrahimi #endif
1462*22dc650dSSadaf Ebrahimi break;
1463*22dc650dSSadaf Ebrahimi
1464*22dc650dSSadaf Ebrahimi case OP_EXACT:
1465*22dc650dSSadaf Ebrahimi if (count < 1)
1466*22dc650dSSadaf Ebrahimi count = 1;
1467*22dc650dSSadaf Ebrahimi cc += 2 + IMM2_SIZE;
1468*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1469*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470*22dc650dSSadaf Ebrahimi #endif
1471*22dc650dSSadaf Ebrahimi continue;
1472*22dc650dSSadaf Ebrahimi
1473*22dc650dSSadaf Ebrahimi case OP_UPTO:
1474*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
1475*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
1476*22dc650dSSadaf Ebrahimi case OP_UPTOI:
1477*22dc650dSSadaf Ebrahimi case OP_MINUPTOI:
1478*22dc650dSSadaf Ebrahimi case OP_EXACTI:
1479*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
1480*22dc650dSSadaf Ebrahimi case OP_NOTUPTO:
1481*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO:
1482*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
1483*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
1484*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI:
1485*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
1486*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
1487*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
1488*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
1489*22dc650dSSadaf Ebrahimi /* Fall through */
1490*22dc650dSSadaf Ebrahimi
1491*22dc650dSSadaf Ebrahimi case OP_QUERY:
1492*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
1493*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
1494*22dc650dSSadaf Ebrahimi case OP_QUERYI:
1495*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
1496*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
1497*22dc650dSSadaf Ebrahimi case OP_NOTQUERY:
1498*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY:
1499*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
1500*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI:
1501*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
1502*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
1503*22dc650dSSadaf Ebrahimi if (count < 3)
1504*22dc650dSSadaf Ebrahimi count = 3;
1505*22dc650dSSadaf Ebrahimi cc += 2;
1506*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
1507*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508*22dc650dSSadaf Ebrahimi #endif
1509*22dc650dSSadaf Ebrahimi continue;
1510*22dc650dSSadaf Ebrahimi
1511*22dc650dSSadaf Ebrahimi case OP_CLASS:
1512*22dc650dSSadaf Ebrahimi case OP_NCLASS:
1513*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514*22dc650dSSadaf Ebrahimi case OP_XCLASS:
1515*22dc650dSSadaf Ebrahimi accelerated_start = cc;
1516*22dc650dSSadaf Ebrahimi cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517*22dc650dSSadaf Ebrahimi #else
1518*22dc650dSSadaf Ebrahimi accelerated_start = cc;
1519*22dc650dSSadaf Ebrahimi cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520*22dc650dSSadaf Ebrahimi #endif
1521*22dc650dSSadaf Ebrahimi
1522*22dc650dSSadaf Ebrahimi switch (*cc)
1523*22dc650dSSadaf Ebrahimi {
1524*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
1525*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
1526*22dc650dSSadaf Ebrahimi if (count == 2)
1527*22dc650dSSadaf Ebrahimi count = 3;
1528*22dc650dSSadaf Ebrahimi /* Fall through */
1529*22dc650dSSadaf Ebrahimi
1530*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
1531*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
1532*22dc650dSSadaf Ebrahimi case OP_CRPOSSTAR:
1533*22dc650dSSadaf Ebrahimi case OP_CRPOSPLUS:
1534*22dc650dSSadaf Ebrahimi cc++;
1535*22dc650dSSadaf Ebrahimi break;
1536*22dc650dSSadaf Ebrahimi
1537*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
1538*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
1539*22dc650dSSadaf Ebrahimi case OP_CRPOSRANGE:
1540*22dc650dSSadaf Ebrahimi if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541*22dc650dSSadaf Ebrahimi {
1542*22dc650dSSadaf Ebrahimi /* Exact repeat. */
1543*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
1544*22dc650dSSadaf Ebrahimi if (count < 1)
1545*22dc650dSSadaf Ebrahimi count = 1;
1546*22dc650dSSadaf Ebrahimi continue;
1547*22dc650dSSadaf Ebrahimi }
1548*22dc650dSSadaf Ebrahimi
1549*22dc650dSSadaf Ebrahimi cc += 2 * IMM2_SIZE;
1550*22dc650dSSadaf Ebrahimi /* Fall through */
1551*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
1552*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
1553*22dc650dSSadaf Ebrahimi case OP_CRPOSQUERY:
1554*22dc650dSSadaf Ebrahimi cc++;
1555*22dc650dSSadaf Ebrahimi if (count < 3)
1556*22dc650dSSadaf Ebrahimi count = 3;
1557*22dc650dSSadaf Ebrahimi continue;
1558*22dc650dSSadaf Ebrahimi
1559*22dc650dSSadaf Ebrahimi default:
1560*22dc650dSSadaf Ebrahimi /* No repeat. */
1561*22dc650dSSadaf Ebrahimi if (count < 1)
1562*22dc650dSSadaf Ebrahimi count = 1;
1563*22dc650dSSadaf Ebrahimi continue;
1564*22dc650dSSadaf Ebrahimi }
1565*22dc650dSSadaf Ebrahimi break;
1566*22dc650dSSadaf Ebrahimi
1567*22dc650dSSadaf Ebrahimi case OP_BRA:
1568*22dc650dSSadaf Ebrahimi case OP_CBRA:
1569*22dc650dSSadaf Ebrahimi prev_count = count;
1570*22dc650dSSadaf Ebrahimi if (count < 1)
1571*22dc650dSSadaf Ebrahimi count = 1;
1572*22dc650dSSadaf Ebrahimi
1573*22dc650dSSadaf Ebrahimi if (depth >= 4)
1574*22dc650dSSadaf Ebrahimi break;
1575*22dc650dSSadaf Ebrahimi
1576*22dc650dSSadaf Ebrahimi if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577*22dc650dSSadaf Ebrahimi count = 3;
1578*22dc650dSSadaf Ebrahimi
1579*22dc650dSSadaf Ebrahimi end = bracketend(cc);
1580*22dc650dSSadaf Ebrahimi if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581*22dc650dSSadaf Ebrahimi break;
1582*22dc650dSSadaf Ebrahimi
1583*22dc650dSSadaf Ebrahimi prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584*22dc650dSSadaf Ebrahimi
1585*22dc650dSSadaf Ebrahimi if (prev_count > count)
1586*22dc650dSSadaf Ebrahimi count = prev_count;
1587*22dc650dSSadaf Ebrahimi
1588*22dc650dSSadaf Ebrahimi if (PRIVATE_DATA(cc) != 0)
1589*22dc650dSSadaf Ebrahimi common->private_data_ptrs[begin - common->start] = 1;
1590*22dc650dSSadaf Ebrahimi
1591*22dc650dSSadaf Ebrahimi if (count < EARLY_FAIL_ENHANCE_MAX)
1592*22dc650dSSadaf Ebrahimi {
1593*22dc650dSSadaf Ebrahimi cc = end;
1594*22dc650dSSadaf Ebrahimi continue;
1595*22dc650dSSadaf Ebrahimi }
1596*22dc650dSSadaf Ebrahimi break;
1597*22dc650dSSadaf Ebrahimi
1598*22dc650dSSadaf Ebrahimi case OP_KET:
1599*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600*22dc650dSSadaf Ebrahimi if (cc >= next_alt)
1601*22dc650dSSadaf Ebrahimi break;
1602*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1603*22dc650dSSadaf Ebrahimi continue;
1604*22dc650dSSadaf Ebrahimi }
1605*22dc650dSSadaf Ebrahimi
1606*22dc650dSSadaf Ebrahimi if (accelerated_start == NULL)
1607*22dc650dSSadaf Ebrahimi break;
1608*22dc650dSSadaf Ebrahimi
1609*22dc650dSSadaf Ebrahimi if (count == 0)
1610*22dc650dSSadaf Ebrahimi {
1611*22dc650dSSadaf Ebrahimi common->fast_forward_bc_ptr = accelerated_start;
1612*22dc650dSSadaf Ebrahimi common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613*22dc650dSSadaf Ebrahimi *private_data_start += sizeof(sljit_sw);
1614*22dc650dSSadaf Ebrahimi count = 4;
1615*22dc650dSSadaf Ebrahimi }
1616*22dc650dSSadaf Ebrahimi else if (count < 3)
1617*22dc650dSSadaf Ebrahimi {
1618*22dc650dSSadaf Ebrahimi common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619*22dc650dSSadaf Ebrahimi
1620*22dc650dSSadaf Ebrahimi if (common->early_fail_start_ptr == 0)
1621*22dc650dSSadaf Ebrahimi common->early_fail_start_ptr = *private_data_start;
1622*22dc650dSSadaf Ebrahimi
1623*22dc650dSSadaf Ebrahimi *private_data_start += sizeof(sljit_sw);
1624*22dc650dSSadaf Ebrahimi common->early_fail_end_ptr = *private_data_start;
1625*22dc650dSSadaf Ebrahimi
1626*22dc650dSSadaf Ebrahimi if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627*22dc650dSSadaf Ebrahimi return EARLY_FAIL_ENHANCE_MAX;
1628*22dc650dSSadaf Ebrahimi
1629*22dc650dSSadaf Ebrahimi count = 4;
1630*22dc650dSSadaf Ebrahimi }
1631*22dc650dSSadaf Ebrahimi else
1632*22dc650dSSadaf Ebrahimi {
1633*22dc650dSSadaf Ebrahimi common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634*22dc650dSSadaf Ebrahimi
1635*22dc650dSSadaf Ebrahimi if (common->early_fail_start_ptr == 0)
1636*22dc650dSSadaf Ebrahimi common->early_fail_start_ptr = *private_data_start;
1637*22dc650dSSadaf Ebrahimi
1638*22dc650dSSadaf Ebrahimi *private_data_start += 2 * sizeof(sljit_sw);
1639*22dc650dSSadaf Ebrahimi common->early_fail_end_ptr = *private_data_start;
1640*22dc650dSSadaf Ebrahimi
1641*22dc650dSSadaf Ebrahimi if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642*22dc650dSSadaf Ebrahimi return EARLY_FAIL_ENHANCE_MAX;
1643*22dc650dSSadaf Ebrahimi
1644*22dc650dSSadaf Ebrahimi count++;
1645*22dc650dSSadaf Ebrahimi }
1646*22dc650dSSadaf Ebrahimi
1647*22dc650dSSadaf Ebrahimi /* Cannot be part of a repeat. */
1648*22dc650dSSadaf Ebrahimi common->private_data_ptrs[begin - common->start] = 1;
1649*22dc650dSSadaf Ebrahimi
1650*22dc650dSSadaf Ebrahimi if (count >= EARLY_FAIL_ENHANCE_MAX)
1651*22dc650dSSadaf Ebrahimi break;
1652*22dc650dSSadaf Ebrahimi }
1653*22dc650dSSadaf Ebrahimi
1654*22dc650dSSadaf Ebrahimi if (*cc != OP_ALT && *cc != OP_KET)
1655*22dc650dSSadaf Ebrahimi result = EARLY_FAIL_ENHANCE_MAX;
1656*22dc650dSSadaf Ebrahimi else if (result < count)
1657*22dc650dSSadaf Ebrahimi result = count;
1658*22dc650dSSadaf Ebrahimi
1659*22dc650dSSadaf Ebrahimi cc = next_alt;
1660*22dc650dSSadaf Ebrahimi next_alt = cc + GET(cc, 1);
1661*22dc650dSSadaf Ebrahimi }
1662*22dc650dSSadaf Ebrahimi while (*cc == OP_ALT);
1663*22dc650dSSadaf Ebrahimi
1664*22dc650dSSadaf Ebrahimi return result;
1665*22dc650dSSadaf Ebrahimi }
1666*22dc650dSSadaf Ebrahimi
get_class_iterator_size(PCRE2_SPTR cc)1667*22dc650dSSadaf Ebrahimi static int get_class_iterator_size(PCRE2_SPTR cc)
1668*22dc650dSSadaf Ebrahimi {
1669*22dc650dSSadaf Ebrahimi sljit_u32 min;
1670*22dc650dSSadaf Ebrahimi sljit_u32 max;
1671*22dc650dSSadaf Ebrahimi switch(*cc)
1672*22dc650dSSadaf Ebrahimi {
1673*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
1674*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
1675*22dc650dSSadaf Ebrahimi return 2;
1676*22dc650dSSadaf Ebrahimi
1677*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
1678*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
1679*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
1680*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
1681*22dc650dSSadaf Ebrahimi return 1;
1682*22dc650dSSadaf Ebrahimi
1683*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
1684*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
1685*22dc650dSSadaf Ebrahimi min = GET2(cc, 1);
1686*22dc650dSSadaf Ebrahimi max = GET2(cc, 1 + IMM2_SIZE);
1687*22dc650dSSadaf Ebrahimi if (max == 0)
1688*22dc650dSSadaf Ebrahimi return (*cc == OP_CRRANGE) ? 2 : 1;
1689*22dc650dSSadaf Ebrahimi max -= min;
1690*22dc650dSSadaf Ebrahimi if (max > 2)
1691*22dc650dSSadaf Ebrahimi max = 2;
1692*22dc650dSSadaf Ebrahimi return max;
1693*22dc650dSSadaf Ebrahimi
1694*22dc650dSSadaf Ebrahimi default:
1695*22dc650dSSadaf Ebrahimi return 0;
1696*22dc650dSSadaf Ebrahimi }
1697*22dc650dSSadaf Ebrahimi }
1698*22dc650dSSadaf Ebrahimi
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1699*22dc650dSSadaf Ebrahimi static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700*22dc650dSSadaf Ebrahimi {
1701*22dc650dSSadaf Ebrahimi PCRE2_SPTR end = bracketend(begin);
1702*22dc650dSSadaf Ebrahimi PCRE2_SPTR next;
1703*22dc650dSSadaf Ebrahimi PCRE2_SPTR next_end;
1704*22dc650dSSadaf Ebrahimi PCRE2_SPTR max_end;
1705*22dc650dSSadaf Ebrahimi PCRE2_UCHAR type;
1706*22dc650dSSadaf Ebrahimi sljit_sw length = end - begin;
1707*22dc650dSSadaf Ebrahimi sljit_s32 min, max, i;
1708*22dc650dSSadaf Ebrahimi
1709*22dc650dSSadaf Ebrahimi /* Detect fixed iterations first. */
1710*22dc650dSSadaf Ebrahimi if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711*22dc650dSSadaf Ebrahimi return FALSE;
1712*22dc650dSSadaf Ebrahimi
1713*22dc650dSSadaf Ebrahimi /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714*22dc650dSSadaf Ebrahimi * Skip the check of the second part. */
1715*22dc650dSSadaf Ebrahimi if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716*22dc650dSSadaf Ebrahimi return TRUE;
1717*22dc650dSSadaf Ebrahimi
1718*22dc650dSSadaf Ebrahimi next = end;
1719*22dc650dSSadaf Ebrahimi min = 1;
1720*22dc650dSSadaf Ebrahimi while (1)
1721*22dc650dSSadaf Ebrahimi {
1722*22dc650dSSadaf Ebrahimi if (*next != *begin)
1723*22dc650dSSadaf Ebrahimi break;
1724*22dc650dSSadaf Ebrahimi next_end = bracketend(next);
1725*22dc650dSSadaf Ebrahimi if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726*22dc650dSSadaf Ebrahimi break;
1727*22dc650dSSadaf Ebrahimi next = next_end;
1728*22dc650dSSadaf Ebrahimi min++;
1729*22dc650dSSadaf Ebrahimi }
1730*22dc650dSSadaf Ebrahimi
1731*22dc650dSSadaf Ebrahimi if (min == 2)
1732*22dc650dSSadaf Ebrahimi return FALSE;
1733*22dc650dSSadaf Ebrahimi
1734*22dc650dSSadaf Ebrahimi max = 0;
1735*22dc650dSSadaf Ebrahimi max_end = next;
1736*22dc650dSSadaf Ebrahimi if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737*22dc650dSSadaf Ebrahimi {
1738*22dc650dSSadaf Ebrahimi type = *next;
1739*22dc650dSSadaf Ebrahimi while (1)
1740*22dc650dSSadaf Ebrahimi {
1741*22dc650dSSadaf Ebrahimi if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742*22dc650dSSadaf Ebrahimi break;
1743*22dc650dSSadaf Ebrahimi next_end = bracketend(next + 2 + LINK_SIZE);
1744*22dc650dSSadaf Ebrahimi if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745*22dc650dSSadaf Ebrahimi break;
1746*22dc650dSSadaf Ebrahimi next = next_end;
1747*22dc650dSSadaf Ebrahimi max++;
1748*22dc650dSSadaf Ebrahimi }
1749*22dc650dSSadaf Ebrahimi
1750*22dc650dSSadaf Ebrahimi if (next[0] == type && next[1] == *begin && max >= 1)
1751*22dc650dSSadaf Ebrahimi {
1752*22dc650dSSadaf Ebrahimi next_end = bracketend(next + 1);
1753*22dc650dSSadaf Ebrahimi if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754*22dc650dSSadaf Ebrahimi {
1755*22dc650dSSadaf Ebrahimi for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756*22dc650dSSadaf Ebrahimi if (*next_end != OP_KET)
1757*22dc650dSSadaf Ebrahimi break;
1758*22dc650dSSadaf Ebrahimi
1759*22dc650dSSadaf Ebrahimi if (i == max)
1760*22dc650dSSadaf Ebrahimi {
1761*22dc650dSSadaf Ebrahimi common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762*22dc650dSSadaf Ebrahimi common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763*22dc650dSSadaf Ebrahimi /* +2 the original and the last. */
1764*22dc650dSSadaf Ebrahimi common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765*22dc650dSSadaf Ebrahimi if (min == 1)
1766*22dc650dSSadaf Ebrahimi return TRUE;
1767*22dc650dSSadaf Ebrahimi min--;
1768*22dc650dSSadaf Ebrahimi max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769*22dc650dSSadaf Ebrahimi }
1770*22dc650dSSadaf Ebrahimi }
1771*22dc650dSSadaf Ebrahimi }
1772*22dc650dSSadaf Ebrahimi }
1773*22dc650dSSadaf Ebrahimi
1774*22dc650dSSadaf Ebrahimi if (min >= 3)
1775*22dc650dSSadaf Ebrahimi {
1776*22dc650dSSadaf Ebrahimi common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777*22dc650dSSadaf Ebrahimi common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778*22dc650dSSadaf Ebrahimi common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779*22dc650dSSadaf Ebrahimi return TRUE;
1780*22dc650dSSadaf Ebrahimi }
1781*22dc650dSSadaf Ebrahimi
1782*22dc650dSSadaf Ebrahimi return FALSE;
1783*22dc650dSSadaf Ebrahimi }
1784*22dc650dSSadaf Ebrahimi
1785*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_PRIVATE_DATA_1 \
1786*22dc650dSSadaf Ebrahimi case OP_MINSTAR: \
1787*22dc650dSSadaf Ebrahimi case OP_MINPLUS: \
1788*22dc650dSSadaf Ebrahimi case OP_QUERY: \
1789*22dc650dSSadaf Ebrahimi case OP_MINQUERY: \
1790*22dc650dSSadaf Ebrahimi case OP_MINSTARI: \
1791*22dc650dSSadaf Ebrahimi case OP_MINPLUSI: \
1792*22dc650dSSadaf Ebrahimi case OP_QUERYI: \
1793*22dc650dSSadaf Ebrahimi case OP_MINQUERYI: \
1794*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR: \
1795*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS: \
1796*22dc650dSSadaf Ebrahimi case OP_NOTQUERY: \
1797*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY: \
1798*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI: \
1799*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI: \
1800*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI: \
1801*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
1802*22dc650dSSadaf Ebrahimi
1803*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_PRIVATE_DATA_2A \
1804*22dc650dSSadaf Ebrahimi case OP_STAR: \
1805*22dc650dSSadaf Ebrahimi case OP_PLUS: \
1806*22dc650dSSadaf Ebrahimi case OP_STARI: \
1807*22dc650dSSadaf Ebrahimi case OP_PLUSI: \
1808*22dc650dSSadaf Ebrahimi case OP_NOTSTAR: \
1809*22dc650dSSadaf Ebrahimi case OP_NOTPLUS: \
1810*22dc650dSSadaf Ebrahimi case OP_NOTSTARI: \
1811*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
1812*22dc650dSSadaf Ebrahimi
1813*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_PRIVATE_DATA_2B \
1814*22dc650dSSadaf Ebrahimi case OP_UPTO: \
1815*22dc650dSSadaf Ebrahimi case OP_MINUPTO: \
1816*22dc650dSSadaf Ebrahimi case OP_UPTOI: \
1817*22dc650dSSadaf Ebrahimi case OP_MINUPTOI: \
1818*22dc650dSSadaf Ebrahimi case OP_NOTUPTO: \
1819*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO: \
1820*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI: \
1821*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
1822*22dc650dSSadaf Ebrahimi
1823*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR: \
1825*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS: \
1826*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY: \
1827*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
1828*22dc650dSSadaf Ebrahimi
1829*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830*22dc650dSSadaf Ebrahimi case OP_TYPESTAR: \
1831*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
1832*22dc650dSSadaf Ebrahimi
1833*22dc650dSSadaf Ebrahimi #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO: \
1835*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
1836*22dc650dSSadaf Ebrahimi
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1837*22dc650dSSadaf Ebrahimi static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838*22dc650dSSadaf Ebrahimi {
1839*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = common->start;
1840*22dc650dSSadaf Ebrahimi PCRE2_SPTR alternative;
1841*22dc650dSSadaf Ebrahimi PCRE2_SPTR end = NULL;
1842*22dc650dSSadaf Ebrahimi int private_data_ptr = *private_data_start;
1843*22dc650dSSadaf Ebrahimi int space, size, bracketlen;
1844*22dc650dSSadaf Ebrahimi BOOL repeat_check = TRUE;
1845*22dc650dSSadaf Ebrahimi
1846*22dc650dSSadaf Ebrahimi while (cc < ccend)
1847*22dc650dSSadaf Ebrahimi {
1848*22dc650dSSadaf Ebrahimi space = 0;
1849*22dc650dSSadaf Ebrahimi size = 0;
1850*22dc650dSSadaf Ebrahimi bracketlen = 0;
1851*22dc650dSSadaf Ebrahimi if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852*22dc650dSSadaf Ebrahimi break;
1853*22dc650dSSadaf Ebrahimi
1854*22dc650dSSadaf Ebrahimi /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855*22dc650dSSadaf Ebrahimi if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856*22dc650dSSadaf Ebrahimi {
1857*22dc650dSSadaf Ebrahimi if (detect_repeat(common, cc))
1858*22dc650dSSadaf Ebrahimi {
1859*22dc650dSSadaf Ebrahimi /* These brackets are converted to repeats, so no global
1860*22dc650dSSadaf Ebrahimi based single character repeat is allowed. */
1861*22dc650dSSadaf Ebrahimi if (cc >= end)
1862*22dc650dSSadaf Ebrahimi end = bracketend(cc);
1863*22dc650dSSadaf Ebrahimi }
1864*22dc650dSSadaf Ebrahimi }
1865*22dc650dSSadaf Ebrahimi repeat_check = TRUE;
1866*22dc650dSSadaf Ebrahimi
1867*22dc650dSSadaf Ebrahimi switch(*cc)
1868*22dc650dSSadaf Ebrahimi {
1869*22dc650dSSadaf Ebrahimi case OP_KET:
1870*22dc650dSSadaf Ebrahimi if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871*22dc650dSSadaf Ebrahimi {
1872*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1874*22dc650dSSadaf Ebrahimi cc += common->private_data_ptrs[cc + 1 - common->start];
1875*22dc650dSSadaf Ebrahimi }
1876*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
1877*22dc650dSSadaf Ebrahimi break;
1878*22dc650dSSadaf Ebrahimi
1879*22dc650dSSadaf Ebrahimi case OP_ASSERT:
1880*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
1881*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
1882*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
1883*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
1884*22dc650dSSadaf Ebrahimi case OP_ONCE:
1885*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
1886*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
1887*22dc650dSSadaf Ebrahimi case OP_SBRA:
1888*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
1889*22dc650dSSadaf Ebrahimi case OP_SCOND:
1890*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1892*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE;
1893*22dc650dSSadaf Ebrahimi break;
1894*22dc650dSSadaf Ebrahimi
1895*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
1896*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1898*22dc650dSSadaf Ebrahimi
1899*22dc650dSSadaf Ebrahimi if (find_vreverse(cc))
1900*22dc650dSSadaf Ebrahimi {
1901*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc + 1 - common->start] = 1;
1902*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1903*22dc650dSSadaf Ebrahimi }
1904*22dc650dSSadaf Ebrahimi
1905*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE;
1906*22dc650dSSadaf Ebrahimi break;
1907*22dc650dSSadaf Ebrahimi
1908*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
1909*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
1910*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1912*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913*22dc650dSSadaf Ebrahimi break;
1914*22dc650dSSadaf Ebrahimi
1915*22dc650dSSadaf Ebrahimi case OP_COND:
1916*22dc650dSSadaf Ebrahimi /* Might be a hidden SCOND. */
1917*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = 0;
1918*22dc650dSSadaf Ebrahimi alternative = cc + GET(cc, 1);
1919*22dc650dSSadaf Ebrahimi if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920*22dc650dSSadaf Ebrahimi {
1921*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw);
1923*22dc650dSSadaf Ebrahimi }
1924*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE;
1925*22dc650dSSadaf Ebrahimi break;
1926*22dc650dSSadaf Ebrahimi
1927*22dc650dSSadaf Ebrahimi case OP_BRA:
1928*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE;
1929*22dc650dSSadaf Ebrahimi break;
1930*22dc650dSSadaf Ebrahimi
1931*22dc650dSSadaf Ebrahimi case OP_CBRA:
1932*22dc650dSSadaf Ebrahimi case OP_SCBRA:
1933*22dc650dSSadaf Ebrahimi bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934*22dc650dSSadaf Ebrahimi break;
1935*22dc650dSSadaf Ebrahimi
1936*22dc650dSSadaf Ebrahimi case OP_BRAZERO:
1937*22dc650dSSadaf Ebrahimi case OP_BRAMINZERO:
1938*22dc650dSSadaf Ebrahimi case OP_BRAPOSZERO:
1939*22dc650dSSadaf Ebrahimi size = 1;
1940*22dc650dSSadaf Ebrahimi repeat_check = FALSE;
1941*22dc650dSSadaf Ebrahimi break;
1942*22dc650dSSadaf Ebrahimi
1943*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_1
1944*22dc650dSSadaf Ebrahimi size = -2;
1945*22dc650dSSadaf Ebrahimi space = 1;
1946*22dc650dSSadaf Ebrahimi break;
1947*22dc650dSSadaf Ebrahimi
1948*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2A
1949*22dc650dSSadaf Ebrahimi size = -2;
1950*22dc650dSSadaf Ebrahimi space = 2;
1951*22dc650dSSadaf Ebrahimi break;
1952*22dc650dSSadaf Ebrahimi
1953*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2B
1954*22dc650dSSadaf Ebrahimi size = -(2 + IMM2_SIZE);
1955*22dc650dSSadaf Ebrahimi space = 2;
1956*22dc650dSSadaf Ebrahimi break;
1957*22dc650dSSadaf Ebrahimi
1958*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959*22dc650dSSadaf Ebrahimi size = 1;
1960*22dc650dSSadaf Ebrahimi space = 1;
1961*22dc650dSSadaf Ebrahimi break;
1962*22dc650dSSadaf Ebrahimi
1963*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964*22dc650dSSadaf Ebrahimi size = 1;
1965*22dc650dSSadaf Ebrahimi if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966*22dc650dSSadaf Ebrahimi space = 2;
1967*22dc650dSSadaf Ebrahimi break;
1968*22dc650dSSadaf Ebrahimi
1969*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
1970*22dc650dSSadaf Ebrahimi size = 1 + IMM2_SIZE;
1971*22dc650dSSadaf Ebrahimi if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972*22dc650dSSadaf Ebrahimi space = 2;
1973*22dc650dSSadaf Ebrahimi break;
1974*22dc650dSSadaf Ebrahimi
1975*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
1976*22dc650dSSadaf Ebrahimi size = 1 + IMM2_SIZE;
1977*22dc650dSSadaf Ebrahimi space = 2;
1978*22dc650dSSadaf Ebrahimi break;
1979*22dc650dSSadaf Ebrahimi
1980*22dc650dSSadaf Ebrahimi case OP_CLASS:
1981*22dc650dSSadaf Ebrahimi case OP_NCLASS:
1982*22dc650dSSadaf Ebrahimi size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983*22dc650dSSadaf Ebrahimi space = get_class_iterator_size(cc + size);
1984*22dc650dSSadaf Ebrahimi break;
1985*22dc650dSSadaf Ebrahimi
1986*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987*22dc650dSSadaf Ebrahimi case OP_XCLASS:
1988*22dc650dSSadaf Ebrahimi size = GET(cc, 1);
1989*22dc650dSSadaf Ebrahimi space = get_class_iterator_size(cc + size);
1990*22dc650dSSadaf Ebrahimi break;
1991*22dc650dSSadaf Ebrahimi #endif
1992*22dc650dSSadaf Ebrahimi
1993*22dc650dSSadaf Ebrahimi default:
1994*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
1995*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
1996*22dc650dSSadaf Ebrahimi break;
1997*22dc650dSSadaf Ebrahimi }
1998*22dc650dSSadaf Ebrahimi
1999*22dc650dSSadaf Ebrahimi /* Character iterators, which are not inside a repeated bracket,
2000*22dc650dSSadaf Ebrahimi gets a private slot instead of allocating it on the stack. */
2001*22dc650dSSadaf Ebrahimi if (space > 0 && cc >= end)
2002*22dc650dSSadaf Ebrahimi {
2003*22dc650dSSadaf Ebrahimi common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004*22dc650dSSadaf Ebrahimi private_data_ptr += sizeof(sljit_sw) * space;
2005*22dc650dSSadaf Ebrahimi }
2006*22dc650dSSadaf Ebrahimi
2007*22dc650dSSadaf Ebrahimi if (size != 0)
2008*22dc650dSSadaf Ebrahimi {
2009*22dc650dSSadaf Ebrahimi if (size < 0)
2010*22dc650dSSadaf Ebrahimi {
2011*22dc650dSSadaf Ebrahimi cc += -size;
2012*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2013*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014*22dc650dSSadaf Ebrahimi #endif
2015*22dc650dSSadaf Ebrahimi }
2016*22dc650dSSadaf Ebrahimi else
2017*22dc650dSSadaf Ebrahimi cc += size;
2018*22dc650dSSadaf Ebrahimi }
2019*22dc650dSSadaf Ebrahimi
2020*22dc650dSSadaf Ebrahimi if (bracketlen > 0)
2021*22dc650dSSadaf Ebrahimi {
2022*22dc650dSSadaf Ebrahimi if (cc >= end)
2023*22dc650dSSadaf Ebrahimi {
2024*22dc650dSSadaf Ebrahimi end = bracketend(cc);
2025*22dc650dSSadaf Ebrahimi if (end[-1 - LINK_SIZE] == OP_KET)
2026*22dc650dSSadaf Ebrahimi end = NULL;
2027*22dc650dSSadaf Ebrahimi }
2028*22dc650dSSadaf Ebrahimi cc += bracketlen;
2029*22dc650dSSadaf Ebrahimi }
2030*22dc650dSSadaf Ebrahimi }
2031*22dc650dSSadaf Ebrahimi *private_data_start = private_data_ptr;
2032*22dc650dSSadaf Ebrahimi }
2033*22dc650dSSadaf Ebrahimi
2034*22dc650dSSadaf Ebrahimi /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)2035*22dc650dSSadaf Ebrahimi static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036*22dc650dSSadaf Ebrahimi {
2037*22dc650dSSadaf Ebrahimi int length = 0;
2038*22dc650dSSadaf Ebrahimi int possessive = 0;
2039*22dc650dSSadaf Ebrahimi BOOL stack_restore = FALSE;
2040*22dc650dSSadaf Ebrahimi BOOL setsom_found = recursive;
2041*22dc650dSSadaf Ebrahimi BOOL setmark_found = recursive;
2042*22dc650dSSadaf Ebrahimi /* The last capture is a local variable even for recursions. */
2043*22dc650dSSadaf Ebrahimi BOOL capture_last_found = FALSE;
2044*22dc650dSSadaf Ebrahimi
2045*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
2047*22dc650dSSadaf Ebrahimi *needs_control_head = TRUE;
2048*22dc650dSSadaf Ebrahimi #else
2049*22dc650dSSadaf Ebrahimi *needs_control_head = FALSE;
2050*22dc650dSSadaf Ebrahimi #endif
2051*22dc650dSSadaf Ebrahimi
2052*22dc650dSSadaf Ebrahimi if (ccend == NULL)
2053*22dc650dSSadaf Ebrahimi {
2054*22dc650dSSadaf Ebrahimi ccend = bracketend(cc) - (1 + LINK_SIZE);
2055*22dc650dSSadaf Ebrahimi if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056*22dc650dSSadaf Ebrahimi {
2057*22dc650dSSadaf Ebrahimi possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058*22dc650dSSadaf Ebrahimi /* This is correct regardless of common->capture_last_ptr. */
2059*22dc650dSSadaf Ebrahimi capture_last_found = TRUE;
2060*22dc650dSSadaf Ebrahimi }
2061*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
2062*22dc650dSSadaf Ebrahimi }
2063*22dc650dSSadaf Ebrahimi
2064*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
2065*22dc650dSSadaf Ebrahimi while (cc < ccend)
2066*22dc650dSSadaf Ebrahimi switch(*cc)
2067*22dc650dSSadaf Ebrahimi {
2068*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
2069*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->has_set_som);
2070*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2071*22dc650dSSadaf Ebrahimi if (!setsom_found)
2072*22dc650dSSadaf Ebrahimi {
2073*22dc650dSSadaf Ebrahimi length += 2;
2074*22dc650dSSadaf Ebrahimi setsom_found = TRUE;
2075*22dc650dSSadaf Ebrahimi }
2076*22dc650dSSadaf Ebrahimi cc += 1;
2077*22dc650dSSadaf Ebrahimi break;
2078*22dc650dSSadaf Ebrahimi
2079*22dc650dSSadaf Ebrahimi case OP_MARK:
2080*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
2081*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
2082*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
2083*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mark_ptr != 0);
2084*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2085*22dc650dSSadaf Ebrahimi if (!setmark_found)
2086*22dc650dSSadaf Ebrahimi {
2087*22dc650dSSadaf Ebrahimi length += 2;
2088*22dc650dSSadaf Ebrahimi setmark_found = TRUE;
2089*22dc650dSSadaf Ebrahimi }
2090*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
2091*22dc650dSSadaf Ebrahimi *needs_control_head = TRUE;
2092*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
2093*22dc650dSSadaf Ebrahimi break;
2094*22dc650dSSadaf Ebrahimi
2095*22dc650dSSadaf Ebrahimi case OP_RECURSE:
2096*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2097*22dc650dSSadaf Ebrahimi if (common->has_set_som && !setsom_found)
2098*22dc650dSSadaf Ebrahimi {
2099*22dc650dSSadaf Ebrahimi length += 2;
2100*22dc650dSSadaf Ebrahimi setsom_found = TRUE;
2101*22dc650dSSadaf Ebrahimi }
2102*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0 && !setmark_found)
2103*22dc650dSSadaf Ebrahimi {
2104*22dc650dSSadaf Ebrahimi length += 2;
2105*22dc650dSSadaf Ebrahimi setmark_found = TRUE;
2106*22dc650dSSadaf Ebrahimi }
2107*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && !capture_last_found)
2108*22dc650dSSadaf Ebrahimi {
2109*22dc650dSSadaf Ebrahimi length += 2;
2110*22dc650dSSadaf Ebrahimi capture_last_found = TRUE;
2111*22dc650dSSadaf Ebrahimi }
2112*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2113*22dc650dSSadaf Ebrahimi break;
2114*22dc650dSSadaf Ebrahimi
2115*22dc650dSSadaf Ebrahimi case OP_CBRA:
2116*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
2117*22dc650dSSadaf Ebrahimi case OP_SCBRA:
2118*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
2119*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2120*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && !capture_last_found)
2121*22dc650dSSadaf Ebrahimi {
2122*22dc650dSSadaf Ebrahimi length += 2;
2123*22dc650dSSadaf Ebrahimi capture_last_found = TRUE;
2124*22dc650dSSadaf Ebrahimi }
2125*22dc650dSSadaf Ebrahimi length += 3;
2126*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2127*22dc650dSSadaf Ebrahimi break;
2128*22dc650dSSadaf Ebrahimi
2129*22dc650dSSadaf Ebrahimi case OP_THEN:
2130*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2131*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
2132*22dc650dSSadaf Ebrahimi *needs_control_head = TRUE;
2133*22dc650dSSadaf Ebrahimi cc ++;
2134*22dc650dSSadaf Ebrahimi break;
2135*22dc650dSSadaf Ebrahimi
2136*22dc650dSSadaf Ebrahimi default:
2137*22dc650dSSadaf Ebrahimi stack_restore = TRUE;
2138*22dc650dSSadaf Ebrahimi /* Fall through. */
2139*22dc650dSSadaf Ebrahimi
2140*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
2141*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
2142*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
2143*22dc650dSSadaf Ebrahimi case OP_DIGIT:
2144*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
2145*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
2146*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
2147*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
2148*22dc650dSSadaf Ebrahimi case OP_ANY:
2149*22dc650dSSadaf Ebrahimi case OP_ALLANY:
2150*22dc650dSSadaf Ebrahimi case OP_ANYBYTE:
2151*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
2152*22dc650dSSadaf Ebrahimi case OP_PROP:
2153*22dc650dSSadaf Ebrahimi case OP_ANYNL:
2154*22dc650dSSadaf Ebrahimi case OP_NOT_HSPACE:
2155*22dc650dSSadaf Ebrahimi case OP_HSPACE:
2156*22dc650dSSadaf Ebrahimi case OP_NOT_VSPACE:
2157*22dc650dSSadaf Ebrahimi case OP_VSPACE:
2158*22dc650dSSadaf Ebrahimi case OP_EXTUNI:
2159*22dc650dSSadaf Ebrahimi case OP_EODN:
2160*22dc650dSSadaf Ebrahimi case OP_EOD:
2161*22dc650dSSadaf Ebrahimi case OP_CIRC:
2162*22dc650dSSadaf Ebrahimi case OP_CIRCM:
2163*22dc650dSSadaf Ebrahimi case OP_DOLL:
2164*22dc650dSSadaf Ebrahimi case OP_DOLLM:
2165*22dc650dSSadaf Ebrahimi case OP_CHAR:
2166*22dc650dSSadaf Ebrahimi case OP_CHARI:
2167*22dc650dSSadaf Ebrahimi case OP_NOT:
2168*22dc650dSSadaf Ebrahimi case OP_NOTI:
2169*22dc650dSSadaf Ebrahimi
2170*22dc650dSSadaf Ebrahimi case OP_EXACT:
2171*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
2172*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
2173*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
2174*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
2175*22dc650dSSadaf Ebrahimi
2176*22dc650dSSadaf Ebrahimi case OP_EXACTI:
2177*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
2178*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
2179*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
2180*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
2181*22dc650dSSadaf Ebrahimi
2182*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
2183*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
2184*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
2185*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
2186*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
2187*22dc650dSSadaf Ebrahimi
2188*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
2189*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
2190*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
2191*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
2192*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
2193*22dc650dSSadaf Ebrahimi
2194*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
2195*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
2196*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
2197*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
2198*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
2199*22dc650dSSadaf Ebrahimi
2200*22dc650dSSadaf Ebrahimi case OP_CLASS:
2201*22dc650dSSadaf Ebrahimi case OP_NCLASS:
2202*22dc650dSSadaf Ebrahimi case OP_XCLASS:
2203*22dc650dSSadaf Ebrahimi
2204*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
2205*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
2206*22dc650dSSadaf Ebrahimi
2207*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
2208*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
2209*22dc650dSSadaf Ebrahimi
2210*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
2211*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
2212*22dc650dSSadaf Ebrahimi break;
2213*22dc650dSSadaf Ebrahimi }
2214*22dc650dSSadaf Ebrahimi
2215*22dc650dSSadaf Ebrahimi /* Possessive quantifiers can use a special case. */
2216*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(possessive == length))
2217*22dc650dSSadaf Ebrahimi return stack_restore ? no_frame : no_stack;
2218*22dc650dSSadaf Ebrahimi
2219*22dc650dSSadaf Ebrahimi if (length > 0)
2220*22dc650dSSadaf Ebrahimi return length + 1;
2221*22dc650dSSadaf Ebrahimi return stack_restore ? no_frame : no_stack;
2222*22dc650dSSadaf Ebrahimi }
2223*22dc650dSSadaf Ebrahimi
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2224*22dc650dSSadaf Ebrahimi static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225*22dc650dSSadaf Ebrahimi {
2226*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
2227*22dc650dSSadaf Ebrahimi BOOL setsom_found = FALSE;
2228*22dc650dSSadaf Ebrahimi BOOL setmark_found = FALSE;
2229*22dc650dSSadaf Ebrahimi /* The last capture is a local variable even for recursions. */
2230*22dc650dSSadaf Ebrahimi BOOL capture_last_found = FALSE;
2231*22dc650dSSadaf Ebrahimi int offset;
2232*22dc650dSSadaf Ebrahimi
2233*22dc650dSSadaf Ebrahimi /* >= 1 + shortest item size (2) */
2234*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(stacktop);
2235*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(stackpos >= stacktop + 2);
2236*22dc650dSSadaf Ebrahimi
2237*22dc650dSSadaf Ebrahimi stackpos = STACK(stackpos);
2238*22dc650dSSadaf Ebrahimi if (ccend == NULL)
2239*22dc650dSSadaf Ebrahimi {
2240*22dc650dSSadaf Ebrahimi ccend = bracketend(cc) - (1 + LINK_SIZE);
2241*22dc650dSSadaf Ebrahimi if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
2243*22dc650dSSadaf Ebrahimi }
2244*22dc650dSSadaf Ebrahimi
2245*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
2246*22dc650dSSadaf Ebrahimi while (cc < ccend)
2247*22dc650dSSadaf Ebrahimi switch(*cc)
2248*22dc650dSSadaf Ebrahimi {
2249*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
2250*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->has_set_som);
2251*22dc650dSSadaf Ebrahimi if (!setsom_found)
2252*22dc650dSSadaf Ebrahimi {
2253*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2256*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2258*22dc650dSSadaf Ebrahimi setsom_found = TRUE;
2259*22dc650dSSadaf Ebrahimi }
2260*22dc650dSSadaf Ebrahimi cc += 1;
2261*22dc650dSSadaf Ebrahimi break;
2262*22dc650dSSadaf Ebrahimi
2263*22dc650dSSadaf Ebrahimi case OP_MARK:
2264*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
2265*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
2266*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
2267*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mark_ptr != 0);
2268*22dc650dSSadaf Ebrahimi if (!setmark_found)
2269*22dc650dSSadaf Ebrahimi {
2270*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2273*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2275*22dc650dSSadaf Ebrahimi setmark_found = TRUE;
2276*22dc650dSSadaf Ebrahimi }
2277*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
2278*22dc650dSSadaf Ebrahimi break;
2279*22dc650dSSadaf Ebrahimi
2280*22dc650dSSadaf Ebrahimi case OP_RECURSE:
2281*22dc650dSSadaf Ebrahimi if (common->has_set_som && !setsom_found)
2282*22dc650dSSadaf Ebrahimi {
2283*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2286*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2288*22dc650dSSadaf Ebrahimi setsom_found = TRUE;
2289*22dc650dSSadaf Ebrahimi }
2290*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0 && !setmark_found)
2291*22dc650dSSadaf Ebrahimi {
2292*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2295*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2297*22dc650dSSadaf Ebrahimi setmark_found = TRUE;
2298*22dc650dSSadaf Ebrahimi }
2299*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && !capture_last_found)
2300*22dc650dSSadaf Ebrahimi {
2301*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2304*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2306*22dc650dSSadaf Ebrahimi capture_last_found = TRUE;
2307*22dc650dSSadaf Ebrahimi }
2308*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2309*22dc650dSSadaf Ebrahimi break;
2310*22dc650dSSadaf Ebrahimi
2311*22dc650dSSadaf Ebrahimi case OP_CBRA:
2312*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
2313*22dc650dSSadaf Ebrahimi case OP_SCBRA:
2314*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
2315*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && !capture_last_found)
2316*22dc650dSSadaf Ebrahimi {
2317*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2320*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2322*22dc650dSSadaf Ebrahimi capture_last_found = TRUE;
2323*22dc650dSSadaf Ebrahimi }
2324*22dc650dSSadaf Ebrahimi offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2327*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2331*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332*22dc650dSSadaf Ebrahimi stackpos -= SSIZE_OF(sw);
2333*22dc650dSSadaf Ebrahimi
2334*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2335*22dc650dSSadaf Ebrahimi break;
2336*22dc650dSSadaf Ebrahimi
2337*22dc650dSSadaf Ebrahimi default:
2338*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
2339*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
2340*22dc650dSSadaf Ebrahimi break;
2341*22dc650dSSadaf Ebrahimi }
2342*22dc650dSSadaf Ebrahimi
2343*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(stackpos == STACK(stacktop));
2345*22dc650dSSadaf Ebrahimi }
2346*22dc650dSSadaf Ebrahimi
2347*22dc650dSSadaf Ebrahimi #define RECURSE_TMP_REG_COUNT 3
2348*22dc650dSSadaf Ebrahimi
2349*22dc650dSSadaf Ebrahimi typedef struct delayed_mem_copy_status {
2350*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler;
2351*22dc650dSSadaf Ebrahimi int store_bases[RECURSE_TMP_REG_COUNT];
2352*22dc650dSSadaf Ebrahimi int store_offsets[RECURSE_TMP_REG_COUNT];
2353*22dc650dSSadaf Ebrahimi int tmp_regs[RECURSE_TMP_REG_COUNT];
2354*22dc650dSSadaf Ebrahimi int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355*22dc650dSSadaf Ebrahimi int next_tmp_reg;
2356*22dc650dSSadaf Ebrahimi } delayed_mem_copy_status;
2357*22dc650dSSadaf Ebrahimi
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2358*22dc650dSSadaf Ebrahimi static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359*22dc650dSSadaf Ebrahimi {
2360*22dc650dSSadaf Ebrahimi int i;
2361*22dc650dSSadaf Ebrahimi
2362*22dc650dSSadaf Ebrahimi for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363*22dc650dSSadaf Ebrahimi {
2364*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366*22dc650dSSadaf Ebrahimi
2367*22dc650dSSadaf Ebrahimi status->store_bases[i] = -1;
2368*22dc650dSSadaf Ebrahimi }
2369*22dc650dSSadaf Ebrahimi status->next_tmp_reg = 0;
2370*22dc650dSSadaf Ebrahimi status->compiler = common->compiler;
2371*22dc650dSSadaf Ebrahimi }
2372*22dc650dSSadaf Ebrahimi
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2373*22dc650dSSadaf Ebrahimi static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374*22dc650dSSadaf Ebrahimi int store_base, sljit_sw store_offset)
2375*22dc650dSSadaf Ebrahimi {
2376*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler = status->compiler;
2377*22dc650dSSadaf Ebrahimi int next_tmp_reg = status->next_tmp_reg;
2378*22dc650dSSadaf Ebrahimi int tmp_reg = status->tmp_regs[next_tmp_reg];
2379*22dc650dSSadaf Ebrahimi
2380*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381*22dc650dSSadaf Ebrahimi
2382*22dc650dSSadaf Ebrahimi if (status->store_bases[next_tmp_reg] == -1)
2383*22dc650dSSadaf Ebrahimi {
2384*22dc650dSSadaf Ebrahimi /* Preserve virtual registers. */
2385*22dc650dSSadaf Ebrahimi if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387*22dc650dSSadaf Ebrahimi }
2388*22dc650dSSadaf Ebrahimi else
2389*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390*22dc650dSSadaf Ebrahimi
2391*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392*22dc650dSSadaf Ebrahimi status->store_bases[next_tmp_reg] = store_base;
2393*22dc650dSSadaf Ebrahimi status->store_offsets[next_tmp_reg] = store_offset;
2394*22dc650dSSadaf Ebrahimi
2395*22dc650dSSadaf Ebrahimi status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396*22dc650dSSadaf Ebrahimi }
2397*22dc650dSSadaf Ebrahimi
delayed_mem_copy_finish(delayed_mem_copy_status * status)2398*22dc650dSSadaf Ebrahimi static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399*22dc650dSSadaf Ebrahimi {
2400*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler = status->compiler;
2401*22dc650dSSadaf Ebrahimi int next_tmp_reg = status->next_tmp_reg;
2402*22dc650dSSadaf Ebrahimi int tmp_reg, saved_tmp_reg, i;
2403*22dc650dSSadaf Ebrahimi
2404*22dc650dSSadaf Ebrahimi for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405*22dc650dSSadaf Ebrahimi {
2406*22dc650dSSadaf Ebrahimi if (status->store_bases[next_tmp_reg] != -1)
2407*22dc650dSSadaf Ebrahimi {
2408*22dc650dSSadaf Ebrahimi tmp_reg = status->tmp_regs[next_tmp_reg];
2409*22dc650dSSadaf Ebrahimi saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410*22dc650dSSadaf Ebrahimi
2411*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412*22dc650dSSadaf Ebrahimi
2413*22dc650dSSadaf Ebrahimi /* Restore virtual registers. */
2414*22dc650dSSadaf Ebrahimi if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416*22dc650dSSadaf Ebrahimi }
2417*22dc650dSSadaf Ebrahimi
2418*22dc650dSSadaf Ebrahimi next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419*22dc650dSSadaf Ebrahimi }
2420*22dc650dSSadaf Ebrahimi }
2421*22dc650dSSadaf Ebrahimi
2422*22dc650dSSadaf Ebrahimi #undef RECURSE_TMP_REG_COUNT
2423*22dc650dSSadaf Ebrahimi
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2424*22dc650dSSadaf Ebrahimi static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425*22dc650dSSadaf Ebrahimi {
2426*22dc650dSSadaf Ebrahimi uint8_t *byte;
2427*22dc650dSSadaf Ebrahimi uint8_t mask;
2428*22dc650dSSadaf Ebrahimi
2429*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430*22dc650dSSadaf Ebrahimi
2431*22dc650dSSadaf Ebrahimi bit_index >>= SLJIT_WORD_SHIFT;
2432*22dc650dSSadaf Ebrahimi
2433*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434*22dc650dSSadaf Ebrahimi
2435*22dc650dSSadaf Ebrahimi mask = 1 << (bit_index & 0x7);
2436*22dc650dSSadaf Ebrahimi byte = common->recurse_bitset + (bit_index >> 3);
2437*22dc650dSSadaf Ebrahimi
2438*22dc650dSSadaf Ebrahimi if (*byte & mask)
2439*22dc650dSSadaf Ebrahimi return FALSE;
2440*22dc650dSSadaf Ebrahimi
2441*22dc650dSSadaf Ebrahimi *byte |= mask;
2442*22dc650dSSadaf Ebrahimi return TRUE;
2443*22dc650dSSadaf Ebrahimi }
2444*22dc650dSSadaf Ebrahimi
2445*22dc650dSSadaf Ebrahimi enum get_recurse_flags {
2446*22dc650dSSadaf Ebrahimi recurse_flag_quit_found = (1 << 0),
2447*22dc650dSSadaf Ebrahimi recurse_flag_accept_found = (1 << 1),
2448*22dc650dSSadaf Ebrahimi recurse_flag_setsom_found = (1 << 2),
2449*22dc650dSSadaf Ebrahimi recurse_flag_setmark_found = (1 << 3),
2450*22dc650dSSadaf Ebrahimi recurse_flag_control_head_found = (1 << 4),
2451*22dc650dSSadaf Ebrahimi };
2452*22dc650dSSadaf Ebrahimi
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,uint32_t * result_flags)2453*22dc650dSSadaf Ebrahimi static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454*22dc650dSSadaf Ebrahimi {
2455*22dc650dSSadaf Ebrahimi int length = 1;
2456*22dc650dSSadaf Ebrahimi int size, offset;
2457*22dc650dSSadaf Ebrahimi PCRE2_SPTR alternative;
2458*22dc650dSSadaf Ebrahimi uint32_t recurse_flags = 0;
2459*22dc650dSSadaf Ebrahimi
2460*22dc650dSSadaf Ebrahimi memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461*22dc650dSSadaf Ebrahimi
2462*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
2464*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_control_head_found;
2465*22dc650dSSadaf Ebrahimi #endif
2466*22dc650dSSadaf Ebrahimi
2467*22dc650dSSadaf Ebrahimi /* Calculate the sum of the private machine words. */
2468*22dc650dSSadaf Ebrahimi while (cc < ccend)
2469*22dc650dSSadaf Ebrahimi {
2470*22dc650dSSadaf Ebrahimi size = 0;
2471*22dc650dSSadaf Ebrahimi switch(*cc)
2472*22dc650dSSadaf Ebrahimi {
2473*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
2474*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->has_set_som);
2475*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_setsom_found;
2476*22dc650dSSadaf Ebrahimi cc += 1;
2477*22dc650dSSadaf Ebrahimi break;
2478*22dc650dSSadaf Ebrahimi
2479*22dc650dSSadaf Ebrahimi case OP_RECURSE:
2480*22dc650dSSadaf Ebrahimi if (common->has_set_som)
2481*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_setsom_found;
2482*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
2483*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_setmark_found;
2484*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485*22dc650dSSadaf Ebrahimi length++;
2486*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2487*22dc650dSSadaf Ebrahimi break;
2488*22dc650dSSadaf Ebrahimi
2489*22dc650dSSadaf Ebrahimi case OP_KET:
2490*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2491*22dc650dSSadaf Ebrahimi if (offset != 0)
2492*22dc650dSSadaf Ebrahimi {
2493*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, offset))
2494*22dc650dSSadaf Ebrahimi length++;
2495*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496*22dc650dSSadaf Ebrahimi cc += PRIVATE_DATA(cc + 1);
2497*22dc650dSSadaf Ebrahimi }
2498*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2499*22dc650dSSadaf Ebrahimi break;
2500*22dc650dSSadaf Ebrahimi
2501*22dc650dSSadaf Ebrahimi case OP_ASSERT:
2502*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
2503*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
2504*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
2505*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
2506*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
2507*22dc650dSSadaf Ebrahimi case OP_ONCE:
2508*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
2509*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
2510*22dc650dSSadaf Ebrahimi case OP_SBRA:
2511*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
2512*22dc650dSSadaf Ebrahimi case OP_SCOND:
2513*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515*22dc650dSSadaf Ebrahimi length++;
2516*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2517*22dc650dSSadaf Ebrahimi break;
2518*22dc650dSSadaf Ebrahimi
2519*22dc650dSSadaf Ebrahimi case OP_CBRA:
2520*22dc650dSSadaf Ebrahimi case OP_SCBRA:
2521*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1 + LINK_SIZE);
2522*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523*22dc650dSSadaf Ebrahimi {
2524*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525*22dc650dSSadaf Ebrahimi length += 2;
2526*22dc650dSSadaf Ebrahimi }
2527*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528*22dc650dSSadaf Ebrahimi length++;
2529*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530*22dc650dSSadaf Ebrahimi length++;
2531*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2532*22dc650dSSadaf Ebrahimi break;
2533*22dc650dSSadaf Ebrahimi
2534*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
2535*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
2536*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1 + LINK_SIZE);
2537*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538*22dc650dSSadaf Ebrahimi {
2539*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540*22dc650dSSadaf Ebrahimi length += 2;
2541*22dc650dSSadaf Ebrahimi }
2542*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543*22dc650dSSadaf Ebrahimi length++;
2544*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545*22dc650dSSadaf Ebrahimi length++;
2546*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547*22dc650dSSadaf Ebrahimi length++;
2548*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2549*22dc650dSSadaf Ebrahimi break;
2550*22dc650dSSadaf Ebrahimi
2551*22dc650dSSadaf Ebrahimi case OP_COND:
2552*22dc650dSSadaf Ebrahimi /* Might be a hidden SCOND. */
2553*22dc650dSSadaf Ebrahimi alternative = cc + GET(cc, 1);
2554*22dc650dSSadaf Ebrahimi if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555*22dc650dSSadaf Ebrahimi length++;
2556*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2557*22dc650dSSadaf Ebrahimi break;
2558*22dc650dSSadaf Ebrahimi
2559*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_1
2560*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2561*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2562*22dc650dSSadaf Ebrahimi length++;
2563*22dc650dSSadaf Ebrahimi cc += 2;
2564*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2565*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566*22dc650dSSadaf Ebrahimi #endif
2567*22dc650dSSadaf Ebrahimi break;
2568*22dc650dSSadaf Ebrahimi
2569*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2A
2570*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2571*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2572*22dc650dSSadaf Ebrahimi {
2573*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574*22dc650dSSadaf Ebrahimi length += 2;
2575*22dc650dSSadaf Ebrahimi }
2576*22dc650dSSadaf Ebrahimi cc += 2;
2577*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2578*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579*22dc650dSSadaf Ebrahimi #endif
2580*22dc650dSSadaf Ebrahimi break;
2581*22dc650dSSadaf Ebrahimi
2582*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2B
2583*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2584*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2585*22dc650dSSadaf Ebrahimi {
2586*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587*22dc650dSSadaf Ebrahimi length += 2;
2588*22dc650dSSadaf Ebrahimi }
2589*22dc650dSSadaf Ebrahimi cc += 2 + IMM2_SIZE;
2590*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2591*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592*22dc650dSSadaf Ebrahimi #endif
2593*22dc650dSSadaf Ebrahimi break;
2594*22dc650dSSadaf Ebrahimi
2595*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2597*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2598*22dc650dSSadaf Ebrahimi length++;
2599*22dc650dSSadaf Ebrahimi cc += 1;
2600*22dc650dSSadaf Ebrahimi break;
2601*22dc650dSSadaf Ebrahimi
2602*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2604*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2605*22dc650dSSadaf Ebrahimi {
2606*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607*22dc650dSSadaf Ebrahimi length += 2;
2608*22dc650dSSadaf Ebrahimi }
2609*22dc650dSSadaf Ebrahimi cc += 1;
2610*22dc650dSSadaf Ebrahimi break;
2611*22dc650dSSadaf Ebrahimi
2612*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2614*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2615*22dc650dSSadaf Ebrahimi {
2616*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617*22dc650dSSadaf Ebrahimi length += 2;
2618*22dc650dSSadaf Ebrahimi }
2619*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
2620*22dc650dSSadaf Ebrahimi break;
2621*22dc650dSSadaf Ebrahimi
2622*22dc650dSSadaf Ebrahimi case OP_CLASS:
2623*22dc650dSSadaf Ebrahimi case OP_NCLASS:
2624*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625*22dc650dSSadaf Ebrahimi case OP_XCLASS:
2626*22dc650dSSadaf Ebrahimi size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627*22dc650dSSadaf Ebrahimi #else
2628*22dc650dSSadaf Ebrahimi size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629*22dc650dSSadaf Ebrahimi #endif
2630*22dc650dSSadaf Ebrahimi
2631*22dc650dSSadaf Ebrahimi offset = PRIVATE_DATA(cc);
2632*22dc650dSSadaf Ebrahimi if (offset != 0 && recurse_check_bit(common, offset))
2633*22dc650dSSadaf Ebrahimi length += get_class_iterator_size(cc + size);
2634*22dc650dSSadaf Ebrahimi cc += size;
2635*22dc650dSSadaf Ebrahimi break;
2636*22dc650dSSadaf Ebrahimi
2637*22dc650dSSadaf Ebrahimi case OP_MARK:
2638*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
2639*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
2640*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
2641*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mark_ptr != 0);
2642*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_setmark_found;
2643*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
2644*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_control_head_found;
2645*22dc650dSSadaf Ebrahimi if (*cc != OP_MARK)
2646*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_quit_found;
2647*22dc650dSSadaf Ebrahimi
2648*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
2649*22dc650dSSadaf Ebrahimi break;
2650*22dc650dSSadaf Ebrahimi
2651*22dc650dSSadaf Ebrahimi case OP_PRUNE:
2652*22dc650dSSadaf Ebrahimi case OP_SKIP:
2653*22dc650dSSadaf Ebrahimi case OP_COMMIT:
2654*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_quit_found;
2655*22dc650dSSadaf Ebrahimi cc++;
2656*22dc650dSSadaf Ebrahimi break;
2657*22dc650dSSadaf Ebrahimi
2658*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
2659*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_quit_found;
2660*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
2661*22dc650dSSadaf Ebrahimi break;
2662*22dc650dSSadaf Ebrahimi
2663*22dc650dSSadaf Ebrahimi case OP_THEN:
2664*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
2665*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666*22dc650dSSadaf Ebrahimi cc++;
2667*22dc650dSSadaf Ebrahimi break;
2668*22dc650dSSadaf Ebrahimi
2669*22dc650dSSadaf Ebrahimi case OP_ACCEPT:
2670*22dc650dSSadaf Ebrahimi case OP_ASSERT_ACCEPT:
2671*22dc650dSSadaf Ebrahimi recurse_flags |= recurse_flag_accept_found;
2672*22dc650dSSadaf Ebrahimi cc++;
2673*22dc650dSSadaf Ebrahimi break;
2674*22dc650dSSadaf Ebrahimi
2675*22dc650dSSadaf Ebrahimi default:
2676*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
2677*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
2678*22dc650dSSadaf Ebrahimi break;
2679*22dc650dSSadaf Ebrahimi }
2680*22dc650dSSadaf Ebrahimi }
2681*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc == ccend);
2682*22dc650dSSadaf Ebrahimi
2683*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_control_head_found)
2684*22dc650dSSadaf Ebrahimi length++;
2685*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_quit_found)
2686*22dc650dSSadaf Ebrahimi {
2687*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_setsom_found)
2688*22dc650dSSadaf Ebrahimi length++;
2689*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_setmark_found)
2690*22dc650dSSadaf Ebrahimi length++;
2691*22dc650dSSadaf Ebrahimi }
2692*22dc650dSSadaf Ebrahimi
2693*22dc650dSSadaf Ebrahimi *result_flags = recurse_flags;
2694*22dc650dSSadaf Ebrahimi return length;
2695*22dc650dSSadaf Ebrahimi }
2696*22dc650dSSadaf Ebrahimi
2697*22dc650dSSadaf Ebrahimi enum copy_recurse_data_types {
2698*22dc650dSSadaf Ebrahimi recurse_copy_from_global,
2699*22dc650dSSadaf Ebrahimi recurse_copy_private_to_global,
2700*22dc650dSSadaf Ebrahimi recurse_copy_shared_to_global,
2701*22dc650dSSadaf Ebrahimi recurse_copy_kept_shared_to_global,
2702*22dc650dSSadaf Ebrahimi recurse_swap_global
2703*22dc650dSSadaf Ebrahimi };
2704*22dc650dSSadaf Ebrahimi
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,uint32_t recurse_flags)2705*22dc650dSSadaf Ebrahimi static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706*22dc650dSSadaf Ebrahimi int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707*22dc650dSSadaf Ebrahimi {
2708*22dc650dSSadaf Ebrahimi delayed_mem_copy_status status;
2709*22dc650dSSadaf Ebrahimi PCRE2_SPTR alternative;
2710*22dc650dSSadaf Ebrahimi sljit_sw private_srcw[2];
2711*22dc650dSSadaf Ebrahimi sljit_sw shared_srcw[3];
2712*22dc650dSSadaf Ebrahimi sljit_sw kept_shared_srcw[2];
2713*22dc650dSSadaf Ebrahimi int private_count, shared_count, kept_shared_count;
2714*22dc650dSSadaf Ebrahimi int from_sp, base_reg, offset, i;
2715*22dc650dSSadaf Ebrahimi
2716*22dc650dSSadaf Ebrahimi memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717*22dc650dSSadaf Ebrahimi
2718*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
2720*22dc650dSSadaf Ebrahimi recurse_check_bit(common, common->control_head_ptr);
2721*22dc650dSSadaf Ebrahimi #endif
2722*22dc650dSSadaf Ebrahimi
2723*22dc650dSSadaf Ebrahimi switch (type)
2724*22dc650dSSadaf Ebrahimi {
2725*22dc650dSSadaf Ebrahimi case recurse_copy_from_global:
2726*22dc650dSSadaf Ebrahimi from_sp = TRUE;
2727*22dc650dSSadaf Ebrahimi base_reg = STACK_TOP;
2728*22dc650dSSadaf Ebrahimi break;
2729*22dc650dSSadaf Ebrahimi
2730*22dc650dSSadaf Ebrahimi case recurse_copy_private_to_global:
2731*22dc650dSSadaf Ebrahimi case recurse_copy_shared_to_global:
2732*22dc650dSSadaf Ebrahimi case recurse_copy_kept_shared_to_global:
2733*22dc650dSSadaf Ebrahimi from_sp = FALSE;
2734*22dc650dSSadaf Ebrahimi base_reg = STACK_TOP;
2735*22dc650dSSadaf Ebrahimi break;
2736*22dc650dSSadaf Ebrahimi
2737*22dc650dSSadaf Ebrahimi default:
2738*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == recurse_swap_global);
2739*22dc650dSSadaf Ebrahimi from_sp = FALSE;
2740*22dc650dSSadaf Ebrahimi base_reg = TMP2;
2741*22dc650dSSadaf Ebrahimi break;
2742*22dc650dSSadaf Ebrahimi }
2743*22dc650dSSadaf Ebrahimi
2744*22dc650dSSadaf Ebrahimi stackptr = STACK(stackptr);
2745*22dc650dSSadaf Ebrahimi stacktop = STACK(stacktop);
2746*22dc650dSSadaf Ebrahimi
2747*22dc650dSSadaf Ebrahimi status.tmp_regs[0] = TMP1;
2748*22dc650dSSadaf Ebrahimi status.saved_tmp_regs[0] = TMP1;
2749*22dc650dSSadaf Ebrahimi
2750*22dc650dSSadaf Ebrahimi if (base_reg != TMP2)
2751*22dc650dSSadaf Ebrahimi {
2752*22dc650dSSadaf Ebrahimi status.tmp_regs[1] = TMP2;
2753*22dc650dSSadaf Ebrahimi status.saved_tmp_regs[1] = TMP2;
2754*22dc650dSSadaf Ebrahimi }
2755*22dc650dSSadaf Ebrahimi else
2756*22dc650dSSadaf Ebrahimi {
2757*22dc650dSSadaf Ebrahimi status.saved_tmp_regs[1] = RETURN_ADDR;
2758*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
2759*22dc650dSSadaf Ebrahimi status.tmp_regs[1] = STR_PTR;
2760*22dc650dSSadaf Ebrahimi else
2761*22dc650dSSadaf Ebrahimi status.tmp_regs[1] = RETURN_ADDR;
2762*22dc650dSSadaf Ebrahimi }
2763*22dc650dSSadaf Ebrahimi
2764*22dc650dSSadaf Ebrahimi status.saved_tmp_regs[2] = TMP3;
2765*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
2766*22dc650dSSadaf Ebrahimi status.tmp_regs[2] = STR_END;
2767*22dc650dSSadaf Ebrahimi else
2768*22dc650dSSadaf Ebrahimi status.tmp_regs[2] = TMP3;
2769*22dc650dSSadaf Ebrahimi
2770*22dc650dSSadaf Ebrahimi delayed_mem_copy_init(&status, common);
2771*22dc650dSSadaf Ebrahimi
2772*22dc650dSSadaf Ebrahimi if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773*22dc650dSSadaf Ebrahimi {
2774*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775*22dc650dSSadaf Ebrahimi
2776*22dc650dSSadaf Ebrahimi if (!from_sp)
2777*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778*22dc650dSSadaf Ebrahimi
2779*22dc650dSSadaf Ebrahimi if (from_sp || type == recurse_swap_global)
2780*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781*22dc650dSSadaf Ebrahimi }
2782*22dc650dSSadaf Ebrahimi
2783*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw);
2784*22dc650dSSadaf Ebrahimi
2785*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786*22dc650dSSadaf Ebrahimi if (type != recurse_copy_shared_to_global)
2787*22dc650dSSadaf Ebrahimi {
2788*22dc650dSSadaf Ebrahimi if (!from_sp)
2789*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790*22dc650dSSadaf Ebrahimi
2791*22dc650dSSadaf Ebrahimi if (from_sp || type == recurse_swap_global)
2792*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793*22dc650dSSadaf Ebrahimi }
2794*22dc650dSSadaf Ebrahimi
2795*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw);
2796*22dc650dSSadaf Ebrahimi #endif
2797*22dc650dSSadaf Ebrahimi
2798*22dc650dSSadaf Ebrahimi while (cc < ccend)
2799*22dc650dSSadaf Ebrahimi {
2800*22dc650dSSadaf Ebrahimi private_count = 0;
2801*22dc650dSSadaf Ebrahimi shared_count = 0;
2802*22dc650dSSadaf Ebrahimi kept_shared_count = 0;
2803*22dc650dSSadaf Ebrahimi
2804*22dc650dSSadaf Ebrahimi switch(*cc)
2805*22dc650dSSadaf Ebrahimi {
2806*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
2807*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->has_set_som);
2808*22dc650dSSadaf Ebrahimi if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809*22dc650dSSadaf Ebrahimi {
2810*22dc650dSSadaf Ebrahimi kept_shared_srcw[0] = OVECTOR(0);
2811*22dc650dSSadaf Ebrahimi kept_shared_count = 1;
2812*22dc650dSSadaf Ebrahimi }
2813*22dc650dSSadaf Ebrahimi cc += 1;
2814*22dc650dSSadaf Ebrahimi break;
2815*22dc650dSSadaf Ebrahimi
2816*22dc650dSSadaf Ebrahimi case OP_RECURSE:
2817*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_quit_found)
2818*22dc650dSSadaf Ebrahimi {
2819*22dc650dSSadaf Ebrahimi if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820*22dc650dSSadaf Ebrahimi {
2821*22dc650dSSadaf Ebrahimi kept_shared_srcw[0] = OVECTOR(0);
2822*22dc650dSSadaf Ebrahimi kept_shared_count = 1;
2823*22dc650dSSadaf Ebrahimi }
2824*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825*22dc650dSSadaf Ebrahimi {
2826*22dc650dSSadaf Ebrahimi kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827*22dc650dSSadaf Ebrahimi kept_shared_count++;
2828*22dc650dSSadaf Ebrahimi }
2829*22dc650dSSadaf Ebrahimi }
2830*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831*22dc650dSSadaf Ebrahimi {
2832*22dc650dSSadaf Ebrahimi shared_srcw[0] = common->capture_last_ptr;
2833*22dc650dSSadaf Ebrahimi shared_count = 1;
2834*22dc650dSSadaf Ebrahimi }
2835*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2836*22dc650dSSadaf Ebrahimi break;
2837*22dc650dSSadaf Ebrahimi
2838*22dc650dSSadaf Ebrahimi case OP_KET:
2839*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2840*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0)
2841*22dc650dSSadaf Ebrahimi {
2842*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
2843*22dc650dSSadaf Ebrahimi private_count = 1;
2844*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845*22dc650dSSadaf Ebrahimi cc += PRIVATE_DATA(cc + 1);
2846*22dc650dSSadaf Ebrahimi }
2847*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2848*22dc650dSSadaf Ebrahimi break;
2849*22dc650dSSadaf Ebrahimi
2850*22dc650dSSadaf Ebrahimi case OP_ASSERT:
2851*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
2852*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
2853*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
2854*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
2855*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
2856*22dc650dSSadaf Ebrahimi case OP_ONCE:
2857*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
2858*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
2859*22dc650dSSadaf Ebrahimi case OP_SBRA:
2860*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
2861*22dc650dSSadaf Ebrahimi case OP_SCOND:
2862*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2863*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
2864*22dc650dSSadaf Ebrahimi private_count = 1;
2865*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2866*22dc650dSSadaf Ebrahimi break;
2867*22dc650dSSadaf Ebrahimi
2868*22dc650dSSadaf Ebrahimi case OP_CBRA:
2869*22dc650dSSadaf Ebrahimi case OP_SCBRA:
2870*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1 + LINK_SIZE);
2871*22dc650dSSadaf Ebrahimi shared_srcw[0] = OVECTOR(offset << 1);
2872*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, shared_srcw[0]))
2873*22dc650dSSadaf Ebrahimi {
2874*22dc650dSSadaf Ebrahimi shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876*22dc650dSSadaf Ebrahimi shared_count = 2;
2877*22dc650dSSadaf Ebrahimi }
2878*22dc650dSSadaf Ebrahimi
2879*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880*22dc650dSSadaf Ebrahimi {
2881*22dc650dSSadaf Ebrahimi shared_srcw[shared_count] = common->capture_last_ptr;
2882*22dc650dSSadaf Ebrahimi shared_count++;
2883*22dc650dSSadaf Ebrahimi }
2884*22dc650dSSadaf Ebrahimi
2885*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset] == 0)
2886*22dc650dSSadaf Ebrahimi {
2887*22dc650dSSadaf Ebrahimi private_srcw[0] = OVECTOR_PRIV(offset);
2888*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
2889*22dc650dSSadaf Ebrahimi private_count = 1;
2890*22dc650dSSadaf Ebrahimi }
2891*22dc650dSSadaf Ebrahimi
2892*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2893*22dc650dSSadaf Ebrahimi break;
2894*22dc650dSSadaf Ebrahimi
2895*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
2896*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
2897*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1 + LINK_SIZE);
2898*22dc650dSSadaf Ebrahimi shared_srcw[0] = OVECTOR(offset << 1);
2899*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, shared_srcw[0]))
2900*22dc650dSSadaf Ebrahimi {
2901*22dc650dSSadaf Ebrahimi shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903*22dc650dSSadaf Ebrahimi shared_count = 2;
2904*22dc650dSSadaf Ebrahimi }
2905*22dc650dSSadaf Ebrahimi
2906*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907*22dc650dSSadaf Ebrahimi {
2908*22dc650dSSadaf Ebrahimi shared_srcw[shared_count] = common->capture_last_ptr;
2909*22dc650dSSadaf Ebrahimi shared_count++;
2910*22dc650dSSadaf Ebrahimi }
2911*22dc650dSSadaf Ebrahimi
2912*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2913*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
2914*22dc650dSSadaf Ebrahimi private_count = 1;
2915*22dc650dSSadaf Ebrahimi
2916*22dc650dSSadaf Ebrahimi offset = OVECTOR_PRIV(offset);
2917*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, offset))
2918*22dc650dSSadaf Ebrahimi {
2919*22dc650dSSadaf Ebrahimi private_srcw[private_count] = offset;
2920*22dc650dSSadaf Ebrahimi private_count++;
2921*22dc650dSSadaf Ebrahimi }
2922*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE + IMM2_SIZE;
2923*22dc650dSSadaf Ebrahimi break;
2924*22dc650dSSadaf Ebrahimi
2925*22dc650dSSadaf Ebrahimi case OP_COND:
2926*22dc650dSSadaf Ebrahimi /* Might be a hidden SCOND. */
2927*22dc650dSSadaf Ebrahimi alternative = cc + GET(cc, 1);
2928*22dc650dSSadaf Ebrahimi if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929*22dc650dSSadaf Ebrahimi {
2930*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2931*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
2932*22dc650dSSadaf Ebrahimi private_count = 1;
2933*22dc650dSSadaf Ebrahimi }
2934*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
2935*22dc650dSSadaf Ebrahimi break;
2936*22dc650dSSadaf Ebrahimi
2937*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_1
2938*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2939*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940*22dc650dSSadaf Ebrahimi private_count = 1;
2941*22dc650dSSadaf Ebrahimi cc += 2;
2942*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2943*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944*22dc650dSSadaf Ebrahimi #endif
2945*22dc650dSSadaf Ebrahimi break;
2946*22dc650dSSadaf Ebrahimi
2947*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2A
2948*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2949*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950*22dc650dSSadaf Ebrahimi {
2951*22dc650dSSadaf Ebrahimi private_count = 2;
2952*22dc650dSSadaf Ebrahimi private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954*22dc650dSSadaf Ebrahimi }
2955*22dc650dSSadaf Ebrahimi cc += 2;
2956*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2957*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958*22dc650dSSadaf Ebrahimi #endif
2959*22dc650dSSadaf Ebrahimi break;
2960*22dc650dSSadaf Ebrahimi
2961*22dc650dSSadaf Ebrahimi CASE_ITERATOR_PRIVATE_DATA_2B
2962*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2963*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964*22dc650dSSadaf Ebrahimi {
2965*22dc650dSSadaf Ebrahimi private_count = 2;
2966*22dc650dSSadaf Ebrahimi private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968*22dc650dSSadaf Ebrahimi }
2969*22dc650dSSadaf Ebrahimi cc += 2 + IMM2_SIZE;
2970*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
2971*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972*22dc650dSSadaf Ebrahimi #endif
2973*22dc650dSSadaf Ebrahimi break;
2974*22dc650dSSadaf Ebrahimi
2975*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2977*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978*22dc650dSSadaf Ebrahimi private_count = 1;
2979*22dc650dSSadaf Ebrahimi cc += 1;
2980*22dc650dSSadaf Ebrahimi break;
2981*22dc650dSSadaf Ebrahimi
2982*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2984*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985*22dc650dSSadaf Ebrahimi {
2986*22dc650dSSadaf Ebrahimi private_count = 2;
2987*22dc650dSSadaf Ebrahimi private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989*22dc650dSSadaf Ebrahimi }
2990*22dc650dSSadaf Ebrahimi cc += 1;
2991*22dc650dSSadaf Ebrahimi break;
2992*22dc650dSSadaf Ebrahimi
2993*22dc650dSSadaf Ebrahimi CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
2995*22dc650dSSadaf Ebrahimi if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996*22dc650dSSadaf Ebrahimi {
2997*22dc650dSSadaf Ebrahimi private_count = 2;
2998*22dc650dSSadaf Ebrahimi private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000*22dc650dSSadaf Ebrahimi }
3001*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
3002*22dc650dSSadaf Ebrahimi break;
3003*22dc650dSSadaf Ebrahimi
3004*22dc650dSSadaf Ebrahimi case OP_CLASS:
3005*22dc650dSSadaf Ebrahimi case OP_NCLASS:
3006*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007*22dc650dSSadaf Ebrahimi case OP_XCLASS:
3008*22dc650dSSadaf Ebrahimi i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009*22dc650dSSadaf Ebrahimi #else
3010*22dc650dSSadaf Ebrahimi i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011*22dc650dSSadaf Ebrahimi #endif
3012*22dc650dSSadaf Ebrahimi if (PRIVATE_DATA(cc) != 0)
3013*22dc650dSSadaf Ebrahimi {
3014*22dc650dSSadaf Ebrahimi private_count = 1;
3015*22dc650dSSadaf Ebrahimi private_srcw[0] = PRIVATE_DATA(cc);
3016*22dc650dSSadaf Ebrahimi switch(get_class_iterator_size(cc + i))
3017*22dc650dSSadaf Ebrahimi {
3018*22dc650dSSadaf Ebrahimi case 1:
3019*22dc650dSSadaf Ebrahimi break;
3020*22dc650dSSadaf Ebrahimi
3021*22dc650dSSadaf Ebrahimi case 2:
3022*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, private_srcw[0]))
3023*22dc650dSSadaf Ebrahimi {
3024*22dc650dSSadaf Ebrahimi private_count = 2;
3025*22dc650dSSadaf Ebrahimi private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027*22dc650dSSadaf Ebrahimi }
3028*22dc650dSSadaf Ebrahimi break;
3029*22dc650dSSadaf Ebrahimi
3030*22dc650dSSadaf Ebrahimi default:
3031*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
3032*22dc650dSSadaf Ebrahimi break;
3033*22dc650dSSadaf Ebrahimi }
3034*22dc650dSSadaf Ebrahimi }
3035*22dc650dSSadaf Ebrahimi cc += i;
3036*22dc650dSSadaf Ebrahimi break;
3037*22dc650dSSadaf Ebrahimi
3038*22dc650dSSadaf Ebrahimi case OP_MARK:
3039*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
3040*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
3041*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
3042*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mark_ptr != 0);
3043*22dc650dSSadaf Ebrahimi if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044*22dc650dSSadaf Ebrahimi {
3045*22dc650dSSadaf Ebrahimi kept_shared_srcw[0] = common->mark_ptr;
3046*22dc650dSSadaf Ebrahimi kept_shared_count = 1;
3047*22dc650dSSadaf Ebrahimi }
3048*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049*22dc650dSSadaf Ebrahimi {
3050*22dc650dSSadaf Ebrahimi private_srcw[0] = common->control_head_ptr;
3051*22dc650dSSadaf Ebrahimi private_count = 1;
3052*22dc650dSSadaf Ebrahimi }
3053*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
3054*22dc650dSSadaf Ebrahimi break;
3055*22dc650dSSadaf Ebrahimi
3056*22dc650dSSadaf Ebrahimi case OP_THEN:
3057*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
3058*22dc650dSSadaf Ebrahimi if (recurse_check_bit(common, common->control_head_ptr))
3059*22dc650dSSadaf Ebrahimi {
3060*22dc650dSSadaf Ebrahimi private_srcw[0] = common->control_head_ptr;
3061*22dc650dSSadaf Ebrahimi private_count = 1;
3062*22dc650dSSadaf Ebrahimi }
3063*22dc650dSSadaf Ebrahimi cc++;
3064*22dc650dSSadaf Ebrahimi break;
3065*22dc650dSSadaf Ebrahimi
3066*22dc650dSSadaf Ebrahimi default:
3067*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
3068*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc != NULL);
3069*22dc650dSSadaf Ebrahimi continue;
3070*22dc650dSSadaf Ebrahimi }
3071*22dc650dSSadaf Ebrahimi
3072*22dc650dSSadaf Ebrahimi if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073*22dc650dSSadaf Ebrahimi {
3074*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075*22dc650dSSadaf Ebrahimi
3076*22dc650dSSadaf Ebrahimi for (i = 0; i < private_count; i++)
3077*22dc650dSSadaf Ebrahimi {
3078*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_srcw[i] != 0);
3079*22dc650dSSadaf Ebrahimi
3080*22dc650dSSadaf Ebrahimi if (!from_sp)
3081*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082*22dc650dSSadaf Ebrahimi
3083*22dc650dSSadaf Ebrahimi if (from_sp || type == recurse_swap_global)
3084*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085*22dc650dSSadaf Ebrahimi
3086*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw);
3087*22dc650dSSadaf Ebrahimi }
3088*22dc650dSSadaf Ebrahimi }
3089*22dc650dSSadaf Ebrahimi else
3090*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw) * private_count;
3091*22dc650dSSadaf Ebrahimi
3092*22dc650dSSadaf Ebrahimi if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093*22dc650dSSadaf Ebrahimi {
3094*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095*22dc650dSSadaf Ebrahimi
3096*22dc650dSSadaf Ebrahimi for (i = 0; i < shared_count; i++)
3097*22dc650dSSadaf Ebrahimi {
3098*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(shared_srcw[i] != 0);
3099*22dc650dSSadaf Ebrahimi
3100*22dc650dSSadaf Ebrahimi if (!from_sp)
3101*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102*22dc650dSSadaf Ebrahimi
3103*22dc650dSSadaf Ebrahimi if (from_sp || type == recurse_swap_global)
3104*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105*22dc650dSSadaf Ebrahimi
3106*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw);
3107*22dc650dSSadaf Ebrahimi }
3108*22dc650dSSadaf Ebrahimi }
3109*22dc650dSSadaf Ebrahimi else
3110*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw) * shared_count;
3111*22dc650dSSadaf Ebrahimi
3112*22dc650dSSadaf Ebrahimi if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113*22dc650dSSadaf Ebrahimi {
3114*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115*22dc650dSSadaf Ebrahimi
3116*22dc650dSSadaf Ebrahimi for (i = 0; i < kept_shared_count; i++)
3117*22dc650dSSadaf Ebrahimi {
3118*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119*22dc650dSSadaf Ebrahimi
3120*22dc650dSSadaf Ebrahimi if (!from_sp)
3121*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122*22dc650dSSadaf Ebrahimi
3123*22dc650dSSadaf Ebrahimi if (from_sp || type == recurse_swap_global)
3124*22dc650dSSadaf Ebrahimi delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125*22dc650dSSadaf Ebrahimi
3126*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw);
3127*22dc650dSSadaf Ebrahimi }
3128*22dc650dSSadaf Ebrahimi }
3129*22dc650dSSadaf Ebrahimi else
3130*22dc650dSSadaf Ebrahimi stackptr += sizeof(sljit_sw) * kept_shared_count;
3131*22dc650dSSadaf Ebrahimi }
3132*22dc650dSSadaf Ebrahimi
3133*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134*22dc650dSSadaf Ebrahimi
3135*22dc650dSSadaf Ebrahimi delayed_mem_copy_finish(&status);
3136*22dc650dSSadaf Ebrahimi }
3137*22dc650dSSadaf Ebrahimi
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3138*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139*22dc650dSSadaf Ebrahimi {
3140*22dc650dSSadaf Ebrahimi PCRE2_SPTR end = bracketend(cc);
3141*22dc650dSSadaf Ebrahimi BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142*22dc650dSSadaf Ebrahimi
3143*22dc650dSSadaf Ebrahimi /* Assert captures then. */
3144*22dc650dSSadaf Ebrahimi if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145*22dc650dSSadaf Ebrahimi current_offset = NULL;
3146*22dc650dSSadaf Ebrahimi /* Conditional block does not. */
3147*22dc650dSSadaf Ebrahimi if (*cc == OP_COND || *cc == OP_SCOND)
3148*22dc650dSSadaf Ebrahimi has_alternatives = FALSE;
3149*22dc650dSSadaf Ebrahimi
3150*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
3151*22dc650dSSadaf Ebrahimi
3152*22dc650dSSadaf Ebrahimi if (has_alternatives)
3153*22dc650dSSadaf Ebrahimi {
3154*22dc650dSSadaf Ebrahimi if (*cc == OP_REVERSE)
3155*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
3156*22dc650dSSadaf Ebrahimi else if (*cc == OP_VREVERSE)
3157*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
3158*22dc650dSSadaf Ebrahimi
3159*22dc650dSSadaf Ebrahimi current_offset = common->then_offsets + (cc - common->start);
3160*22dc650dSSadaf Ebrahimi }
3161*22dc650dSSadaf Ebrahimi
3162*22dc650dSSadaf Ebrahimi while (cc < end)
3163*22dc650dSSadaf Ebrahimi {
3164*22dc650dSSadaf Ebrahimi if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165*22dc650dSSadaf Ebrahimi cc = set_then_offsets(common, cc, current_offset);
3166*22dc650dSSadaf Ebrahimi else
3167*22dc650dSSadaf Ebrahimi {
3168*22dc650dSSadaf Ebrahimi if (*cc == OP_ALT && has_alternatives)
3169*22dc650dSSadaf Ebrahimi {
3170*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
3171*22dc650dSSadaf Ebrahimi
3172*22dc650dSSadaf Ebrahimi if (*cc == OP_REVERSE)
3173*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
3174*22dc650dSSadaf Ebrahimi else if (*cc == OP_VREVERSE)
3175*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
3176*22dc650dSSadaf Ebrahimi
3177*22dc650dSSadaf Ebrahimi current_offset = common->then_offsets + (cc - common->start);
3178*22dc650dSSadaf Ebrahimi continue;
3179*22dc650dSSadaf Ebrahimi }
3180*22dc650dSSadaf Ebrahimi
3181*22dc650dSSadaf Ebrahimi if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182*22dc650dSSadaf Ebrahimi *current_offset = 1;
3183*22dc650dSSadaf Ebrahimi cc = next_opcode(common, cc);
3184*22dc650dSSadaf Ebrahimi }
3185*22dc650dSSadaf Ebrahimi }
3186*22dc650dSSadaf Ebrahimi
3187*22dc650dSSadaf Ebrahimi return end;
3188*22dc650dSSadaf Ebrahimi }
3189*22dc650dSSadaf Ebrahimi
3190*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_PRIVATE_DATA_1
3191*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_PRIVATE_DATA_2A
3192*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_PRIVATE_DATA_2B
3193*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195*22dc650dSSadaf Ebrahimi #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196*22dc650dSSadaf Ebrahimi
is_powerof2(unsigned int value)3197*22dc650dSSadaf Ebrahimi static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198*22dc650dSSadaf Ebrahimi {
3199*22dc650dSSadaf Ebrahimi return (value & (value - 1)) == 0;
3200*22dc650dSSadaf Ebrahimi }
3201*22dc650dSSadaf Ebrahimi
set_jumps(jump_list * list,struct sljit_label * label)3202*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203*22dc650dSSadaf Ebrahimi {
3204*22dc650dSSadaf Ebrahimi while (list != NULL)
3205*22dc650dSSadaf Ebrahimi {
3206*22dc650dSSadaf Ebrahimi /* sljit_set_label is clever enough to do nothing
3207*22dc650dSSadaf Ebrahimi if either the jump or the label is NULL. */
3208*22dc650dSSadaf Ebrahimi SET_LABEL(list->jump, label);
3209*22dc650dSSadaf Ebrahimi list = list->next;
3210*22dc650dSSadaf Ebrahimi }
3211*22dc650dSSadaf Ebrahimi }
3212*22dc650dSSadaf Ebrahimi
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3213*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214*22dc650dSSadaf Ebrahimi {
3215*22dc650dSSadaf Ebrahimi jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216*22dc650dSSadaf Ebrahimi if (list_item)
3217*22dc650dSSadaf Ebrahimi {
3218*22dc650dSSadaf Ebrahimi list_item->next = *list;
3219*22dc650dSSadaf Ebrahimi list_item->jump = jump;
3220*22dc650dSSadaf Ebrahimi *list = list_item;
3221*22dc650dSSadaf Ebrahimi }
3222*22dc650dSSadaf Ebrahimi }
3223*22dc650dSSadaf Ebrahimi
add_stub(compiler_common * common,struct sljit_jump * start)3224*22dc650dSSadaf Ebrahimi static void add_stub(compiler_common *common, struct sljit_jump *start)
3225*22dc650dSSadaf Ebrahimi {
3226*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3227*22dc650dSSadaf Ebrahimi stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228*22dc650dSSadaf Ebrahimi
3229*22dc650dSSadaf Ebrahimi if (list_item)
3230*22dc650dSSadaf Ebrahimi {
3231*22dc650dSSadaf Ebrahimi list_item->start = start;
3232*22dc650dSSadaf Ebrahimi list_item->quit = LABEL();
3233*22dc650dSSadaf Ebrahimi list_item->next = common->stubs;
3234*22dc650dSSadaf Ebrahimi common->stubs = list_item;
3235*22dc650dSSadaf Ebrahimi }
3236*22dc650dSSadaf Ebrahimi }
3237*22dc650dSSadaf Ebrahimi
flush_stubs(compiler_common * common)3238*22dc650dSSadaf Ebrahimi static void flush_stubs(compiler_common *common)
3239*22dc650dSSadaf Ebrahimi {
3240*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3241*22dc650dSSadaf Ebrahimi stub_list *list_item = common->stubs;
3242*22dc650dSSadaf Ebrahimi
3243*22dc650dSSadaf Ebrahimi while (list_item)
3244*22dc650dSSadaf Ebrahimi {
3245*22dc650dSSadaf Ebrahimi JUMPHERE(list_item->start);
3246*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, list_item->quit);
3248*22dc650dSSadaf Ebrahimi list_item = list_item->next;
3249*22dc650dSSadaf Ebrahimi }
3250*22dc650dSSadaf Ebrahimi common->stubs = NULL;
3251*22dc650dSSadaf Ebrahimi }
3252*22dc650dSSadaf Ebrahimi
count_match(compiler_common * common)3253*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void count_match(compiler_common *common)
3254*22dc650dSSadaf Ebrahimi {
3255*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3256*22dc650dSSadaf Ebrahimi
3257*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259*22dc650dSSadaf Ebrahimi }
3260*22dc650dSSadaf Ebrahimi
allocate_stack(compiler_common * common,int size)3261*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262*22dc650dSSadaf Ebrahimi {
3263*22dc650dSSadaf Ebrahimi /* May destroy all locals and registers except TMP2. */
3264*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3265*22dc650dSSadaf Ebrahimi
3266*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(size > 0);
3267*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268*22dc650dSSadaf Ebrahimi #ifdef DESTROY_REGISTERS
3269*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274*22dc650dSSadaf Ebrahimi #endif
3275*22dc650dSSadaf Ebrahimi add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276*22dc650dSSadaf Ebrahimi }
3277*22dc650dSSadaf Ebrahimi
free_stack(compiler_common * common,int size)3278*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279*22dc650dSSadaf Ebrahimi {
3280*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3281*22dc650dSSadaf Ebrahimi
3282*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(size > 0);
3283*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284*22dc650dSSadaf Ebrahimi }
3285*22dc650dSSadaf Ebrahimi
allocate_read_only_data(compiler_common * common,sljit_uw size)3286*22dc650dSSadaf Ebrahimi static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287*22dc650dSSadaf Ebrahimi {
3288*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3289*22dc650dSSadaf Ebrahimi sljit_uw *result;
3290*22dc650dSSadaf Ebrahimi
3291*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292*22dc650dSSadaf Ebrahimi return NULL;
3293*22dc650dSSadaf Ebrahimi
3294*22dc650dSSadaf Ebrahimi result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(result == NULL))
3296*22dc650dSSadaf Ebrahimi {
3297*22dc650dSSadaf Ebrahimi sljit_set_compiler_memory_error(compiler);
3298*22dc650dSSadaf Ebrahimi return NULL;
3299*22dc650dSSadaf Ebrahimi }
3300*22dc650dSSadaf Ebrahimi
3301*22dc650dSSadaf Ebrahimi *(void**)result = common->read_only_data_head;
3302*22dc650dSSadaf Ebrahimi common->read_only_data_head = (void *)result;
3303*22dc650dSSadaf Ebrahimi return result + 1;
3304*22dc650dSSadaf Ebrahimi }
3305*22dc650dSSadaf Ebrahimi
reset_ovector(compiler_common * common,int length)3306*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307*22dc650dSSadaf Ebrahimi {
3308*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3309*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
3310*22dc650dSSadaf Ebrahimi sljit_s32 i;
3311*22dc650dSSadaf Ebrahimi
3312*22dc650dSSadaf Ebrahimi /* At this point we can freely use all temporary registers. */
3313*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(length > 1);
3314*22dc650dSSadaf Ebrahimi /* TMP1 returns with begin - 1. */
3315*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316*22dc650dSSadaf Ebrahimi if (length < 8)
3317*22dc650dSSadaf Ebrahimi {
3318*22dc650dSSadaf Ebrahimi for (i = 1; i < length; i++)
3319*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320*22dc650dSSadaf Ebrahimi }
3321*22dc650dSSadaf Ebrahimi else
3322*22dc650dSSadaf Ebrahimi {
3323*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324*22dc650dSSadaf Ebrahimi {
3325*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327*22dc650dSSadaf Ebrahimi loop = LABEL();
3328*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, loop);
3331*22dc650dSSadaf Ebrahimi }
3332*22dc650dSSadaf Ebrahimi else
3333*22dc650dSSadaf Ebrahimi {
3334*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336*22dc650dSSadaf Ebrahimi loop = LABEL();
3337*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, loop);
3341*22dc650dSSadaf Ebrahimi }
3342*22dc650dSSadaf Ebrahimi }
3343*22dc650dSSadaf Ebrahimi }
3344*22dc650dSSadaf Ebrahimi
reset_early_fail(compiler_common * common)3345*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346*22dc650dSSadaf Ebrahimi {
3347*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3348*22dc650dSSadaf Ebrahimi sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349*22dc650dSSadaf Ebrahimi sljit_u32 uncleared_size;
3350*22dc650dSSadaf Ebrahimi sljit_s32 src = SLJIT_IMM;
3351*22dc650dSSadaf Ebrahimi sljit_s32 i;
3352*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
3353*22dc650dSSadaf Ebrahimi
3354*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355*22dc650dSSadaf Ebrahimi
3356*22dc650dSSadaf Ebrahimi if (size == sizeof(sljit_sw))
3357*22dc650dSSadaf Ebrahimi {
3358*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359*22dc650dSSadaf Ebrahimi return;
3360*22dc650dSSadaf Ebrahimi }
3361*22dc650dSSadaf Ebrahimi
3362*22dc650dSSadaf Ebrahimi if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363*22dc650dSSadaf Ebrahimi {
3364*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365*22dc650dSSadaf Ebrahimi src = TMP3;
3366*22dc650dSSadaf Ebrahimi }
3367*22dc650dSSadaf Ebrahimi
3368*22dc650dSSadaf Ebrahimi if (size <= 6 * sizeof(sljit_sw))
3369*22dc650dSSadaf Ebrahimi {
3370*22dc650dSSadaf Ebrahimi for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372*22dc650dSSadaf Ebrahimi return;
3373*22dc650dSSadaf Ebrahimi }
3374*22dc650dSSadaf Ebrahimi
3375*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376*22dc650dSSadaf Ebrahimi
3377*22dc650dSSadaf Ebrahimi uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378*22dc650dSSadaf Ebrahimi
3379*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380*22dc650dSSadaf Ebrahimi
3381*22dc650dSSadaf Ebrahimi loop = LABEL();
3382*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387*22dc650dSSadaf Ebrahimi
3388*22dc650dSSadaf Ebrahimi if (uncleared_size >= sizeof(sljit_sw))
3389*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390*22dc650dSSadaf Ebrahimi
3391*22dc650dSSadaf Ebrahimi if (uncleared_size >= 2 * sizeof(sljit_sw))
3392*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393*22dc650dSSadaf Ebrahimi }
3394*22dc650dSSadaf Ebrahimi
do_reset_match(compiler_common * common,int length)3395*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396*22dc650dSSadaf Ebrahimi {
3397*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3398*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
3399*22dc650dSSadaf Ebrahimi int i;
3400*22dc650dSSadaf Ebrahimi
3401*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(length > 1);
3402*22dc650dSSadaf Ebrahimi /* OVECTOR(1) contains the "string begin - 1" constant. */
3403*22dc650dSSadaf Ebrahimi if (length > 2)
3404*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405*22dc650dSSadaf Ebrahimi if (length < 8)
3406*22dc650dSSadaf Ebrahimi {
3407*22dc650dSSadaf Ebrahimi for (i = 2; i < length; i++)
3408*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409*22dc650dSSadaf Ebrahimi }
3410*22dc650dSSadaf Ebrahimi else
3411*22dc650dSSadaf Ebrahimi {
3412*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413*22dc650dSSadaf Ebrahimi {
3414*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416*22dc650dSSadaf Ebrahimi loop = LABEL();
3417*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, loop);
3420*22dc650dSSadaf Ebrahimi }
3421*22dc650dSSadaf Ebrahimi else
3422*22dc650dSSadaf Ebrahimi {
3423*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425*22dc650dSSadaf Ebrahimi loop = LABEL();
3426*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, loop);
3430*22dc650dSSadaf Ebrahimi }
3431*22dc650dSSadaf Ebrahimi }
3432*22dc650dSSadaf Ebrahimi
3433*22dc650dSSadaf Ebrahimi if (!HAS_VIRTUAL_REGISTERS)
3434*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435*22dc650dSSadaf Ebrahimi else
3436*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437*22dc650dSSadaf Ebrahimi
3438*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
3439*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
3441*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
3443*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444*22dc650dSSadaf Ebrahimi
3445*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447*22dc650dSSadaf Ebrahimi }
3448*22dc650dSSadaf Ebrahimi
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3449*22dc650dSSadaf Ebrahimi static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450*22dc650dSSadaf Ebrahimi {
3451*22dc650dSSadaf Ebrahimi while (current != NULL)
3452*22dc650dSSadaf Ebrahimi {
3453*22dc650dSSadaf Ebrahimi switch (current[1])
3454*22dc650dSSadaf Ebrahimi {
3455*22dc650dSSadaf Ebrahimi case type_then_trap:
3456*22dc650dSSadaf Ebrahimi break;
3457*22dc650dSSadaf Ebrahimi
3458*22dc650dSSadaf Ebrahimi case type_mark:
3459*22dc650dSSadaf Ebrahimi if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460*22dc650dSSadaf Ebrahimi return current[3];
3461*22dc650dSSadaf Ebrahimi break;
3462*22dc650dSSadaf Ebrahimi
3463*22dc650dSSadaf Ebrahimi default:
3464*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
3465*22dc650dSSadaf Ebrahimi break;
3466*22dc650dSSadaf Ebrahimi }
3467*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468*22dc650dSSadaf Ebrahimi current = (sljit_sw*)current[0];
3469*22dc650dSSadaf Ebrahimi }
3470*22dc650dSSadaf Ebrahimi return 0;
3471*22dc650dSSadaf Ebrahimi }
3472*22dc650dSSadaf Ebrahimi
copy_ovector(compiler_common * common,int topbracket)3473*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474*22dc650dSSadaf Ebrahimi {
3475*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3476*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
3477*22dc650dSSadaf Ebrahimi BOOL has_pre;
3478*22dc650dSSadaf Ebrahimi
3479*22dc650dSSadaf Ebrahimi /* At this point we can freely use all registers. */
3480*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482*22dc650dSSadaf Ebrahimi
3483*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
3484*22dc650dSSadaf Ebrahimi {
3485*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
3488*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
3492*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494*22dc650dSSadaf Ebrahimi SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495*22dc650dSSadaf Ebrahimi }
3496*22dc650dSSadaf Ebrahimi else
3497*22dc650dSSadaf Ebrahimi {
3498*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
3501*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
3505*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507*22dc650dSSadaf Ebrahimi }
3508*22dc650dSSadaf Ebrahimi
3509*22dc650dSSadaf Ebrahimi has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510*22dc650dSSadaf Ebrahimi
3511*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513*22dc650dSSadaf Ebrahimi
3514*22dc650dSSadaf Ebrahimi loop = LABEL();
3515*22dc650dSSadaf Ebrahimi
3516*22dc650dSSadaf Ebrahimi if (has_pre)
3517*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518*22dc650dSSadaf Ebrahimi else
3519*22dc650dSSadaf Ebrahimi {
3520*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522*22dc650dSSadaf Ebrahimi }
3523*22dc650dSSadaf Ebrahimi
3524*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526*22dc650dSSadaf Ebrahimi /* Copy the integer value to the output buffer */
3527*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528*22dc650dSSadaf Ebrahimi OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529*22dc650dSSadaf Ebrahimi #endif
3530*22dc650dSSadaf Ebrahimi
3531*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532*22dc650dSSadaf Ebrahimi OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533*22dc650dSSadaf Ebrahimi
3534*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, loop);
3536*22dc650dSSadaf Ebrahimi
3537*22dc650dSSadaf Ebrahimi /* Calculate the return value, which is the maximum ovector value. */
3538*22dc650dSSadaf Ebrahimi if (topbracket > 1)
3539*22dc650dSSadaf Ebrahimi {
3540*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541*22dc650dSSadaf Ebrahimi {
3542*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544*22dc650dSSadaf Ebrahimi
3545*22dc650dSSadaf Ebrahimi /* OVECTOR(0) is never equal to SLJIT_S2. */
3546*22dc650dSSadaf Ebrahimi loop = LABEL();
3547*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551*22dc650dSSadaf Ebrahimi }
3552*22dc650dSSadaf Ebrahimi else
3553*22dc650dSSadaf Ebrahimi {
3554*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556*22dc650dSSadaf Ebrahimi
3557*22dc650dSSadaf Ebrahimi /* OVECTOR(0) is never equal to SLJIT_S2. */
3558*22dc650dSSadaf Ebrahimi loop = LABEL();
3559*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564*22dc650dSSadaf Ebrahimi }
3565*22dc650dSSadaf Ebrahimi }
3566*22dc650dSSadaf Ebrahimi else
3567*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568*22dc650dSSadaf Ebrahimi }
3569*22dc650dSSadaf Ebrahimi
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3570*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571*22dc650dSSadaf Ebrahimi {
3572*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3573*22dc650dSSadaf Ebrahimi sljit_s32 mov_opcode;
3574*22dc650dSSadaf Ebrahimi sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575*22dc650dSSadaf Ebrahimi
3576*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578*22dc650dSSadaf Ebrahimi && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579*22dc650dSSadaf Ebrahimi
3580*22dc650dSSadaf Ebrahimi if (arguments_reg != ARGUMENTS)
3581*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583*22dc650dSSadaf Ebrahimi common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585*22dc650dSSadaf Ebrahimi
3586*22dc650dSSadaf Ebrahimi /* Store match begin and end. */
3587*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590*22dc650dSSadaf Ebrahimi
3591*22dc650dSSadaf Ebrahimi mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592*22dc650dSSadaf Ebrahimi
3593*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595*22dc650dSSadaf Ebrahimi OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596*22dc650dSSadaf Ebrahimi #endif
3597*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598*22dc650dSSadaf Ebrahimi
3599*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601*22dc650dSSadaf Ebrahimi OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602*22dc650dSSadaf Ebrahimi #endif
3603*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604*22dc650dSSadaf Ebrahimi
3605*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, quit);
3606*22dc650dSSadaf Ebrahimi }
3607*22dc650dSSadaf Ebrahimi
check_start_used_ptr(compiler_common * common)3608*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609*22dc650dSSadaf Ebrahimi {
3610*22dc650dSSadaf Ebrahimi /* May destroy TMP1. */
3611*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3612*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3613*22dc650dSSadaf Ebrahimi
3614*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615*22dc650dSSadaf Ebrahimi {
3616*22dc650dSSadaf Ebrahimi /* The value of -1 must be kept for start_used_ptr! */
3617*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618*22dc650dSSadaf Ebrahimi /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619*22dc650dSSadaf Ebrahimi is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3623*22dc650dSSadaf Ebrahimi }
3624*22dc650dSSadaf Ebrahimi else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625*22dc650dSSadaf Ebrahimi {
3626*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3629*22dc650dSSadaf Ebrahimi }
3630*22dc650dSSadaf Ebrahimi }
3631*22dc650dSSadaf Ebrahimi
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3632*22dc650dSSadaf Ebrahimi static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633*22dc650dSSadaf Ebrahimi {
3634*22dc650dSSadaf Ebrahimi /* Detects if the character has an othercase. */
3635*22dc650dSSadaf Ebrahimi unsigned int c;
3636*22dc650dSSadaf Ebrahimi
3637*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3638*22dc650dSSadaf Ebrahimi if (common->utf || common->ucp)
3639*22dc650dSSadaf Ebrahimi {
3640*22dc650dSSadaf Ebrahimi if (common->utf)
3641*22dc650dSSadaf Ebrahimi {
3642*22dc650dSSadaf Ebrahimi GETCHAR(c, cc);
3643*22dc650dSSadaf Ebrahimi }
3644*22dc650dSSadaf Ebrahimi else
3645*22dc650dSSadaf Ebrahimi c = *cc;
3646*22dc650dSSadaf Ebrahimi
3647*22dc650dSSadaf Ebrahimi if (c > 127)
3648*22dc650dSSadaf Ebrahimi return c != UCD_OTHERCASE(c);
3649*22dc650dSSadaf Ebrahimi
3650*22dc650dSSadaf Ebrahimi return common->fcc[c] != c;
3651*22dc650dSSadaf Ebrahimi }
3652*22dc650dSSadaf Ebrahimi else
3653*22dc650dSSadaf Ebrahimi #endif
3654*22dc650dSSadaf Ebrahimi c = *cc;
3655*22dc650dSSadaf Ebrahimi return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656*22dc650dSSadaf Ebrahimi }
3657*22dc650dSSadaf Ebrahimi
char_othercase(compiler_common * common,unsigned int c)3658*22dc650dSSadaf Ebrahimi static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659*22dc650dSSadaf Ebrahimi {
3660*22dc650dSSadaf Ebrahimi /* Returns with the othercase. */
3661*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3662*22dc650dSSadaf Ebrahimi if ((common->utf || common->ucp) && c > 127)
3663*22dc650dSSadaf Ebrahimi return UCD_OTHERCASE(c);
3664*22dc650dSSadaf Ebrahimi #endif
3665*22dc650dSSadaf Ebrahimi return TABLE_GET(c, common->fcc, c);
3666*22dc650dSSadaf Ebrahimi }
3667*22dc650dSSadaf Ebrahimi
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3668*22dc650dSSadaf Ebrahimi static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669*22dc650dSSadaf Ebrahimi {
3670*22dc650dSSadaf Ebrahimi /* Detects if the character and its othercase has only 1 bit difference. */
3671*22dc650dSSadaf Ebrahimi unsigned int c, oc, bit;
3672*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673*22dc650dSSadaf Ebrahimi int n;
3674*22dc650dSSadaf Ebrahimi #endif
3675*22dc650dSSadaf Ebrahimi
3676*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3677*22dc650dSSadaf Ebrahimi if (common->utf || common->ucp)
3678*22dc650dSSadaf Ebrahimi {
3679*22dc650dSSadaf Ebrahimi if (common->utf)
3680*22dc650dSSadaf Ebrahimi {
3681*22dc650dSSadaf Ebrahimi GETCHAR(c, cc);
3682*22dc650dSSadaf Ebrahimi }
3683*22dc650dSSadaf Ebrahimi else
3684*22dc650dSSadaf Ebrahimi c = *cc;
3685*22dc650dSSadaf Ebrahimi
3686*22dc650dSSadaf Ebrahimi if (c <= 127)
3687*22dc650dSSadaf Ebrahimi oc = common->fcc[c];
3688*22dc650dSSadaf Ebrahimi else
3689*22dc650dSSadaf Ebrahimi oc = UCD_OTHERCASE(c);
3690*22dc650dSSadaf Ebrahimi }
3691*22dc650dSSadaf Ebrahimi else
3692*22dc650dSSadaf Ebrahimi {
3693*22dc650dSSadaf Ebrahimi c = *cc;
3694*22dc650dSSadaf Ebrahimi oc = TABLE_GET(c, common->fcc, c);
3695*22dc650dSSadaf Ebrahimi }
3696*22dc650dSSadaf Ebrahimi #else
3697*22dc650dSSadaf Ebrahimi c = *cc;
3698*22dc650dSSadaf Ebrahimi oc = TABLE_GET(c, common->fcc, c);
3699*22dc650dSSadaf Ebrahimi #endif
3700*22dc650dSSadaf Ebrahimi
3701*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(c != oc);
3702*22dc650dSSadaf Ebrahimi
3703*22dc650dSSadaf Ebrahimi bit = c ^ oc;
3704*22dc650dSSadaf Ebrahimi /* Optimized for English alphabet. */
3705*22dc650dSSadaf Ebrahimi if (c <= 127 && bit == 0x20)
3706*22dc650dSSadaf Ebrahimi return (0 << 8) | 0x20;
3707*22dc650dSSadaf Ebrahimi
3708*22dc650dSSadaf Ebrahimi /* Since c != oc, they must have at least 1 bit difference. */
3709*22dc650dSSadaf Ebrahimi if (!is_powerof2(bit))
3710*22dc650dSSadaf Ebrahimi return 0;
3711*22dc650dSSadaf Ebrahimi
3712*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3713*22dc650dSSadaf Ebrahimi
3714*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3715*22dc650dSSadaf Ebrahimi if (common->utf && c > 127)
3716*22dc650dSSadaf Ebrahimi {
3717*22dc650dSSadaf Ebrahimi n = GET_EXTRALEN(*cc);
3718*22dc650dSSadaf Ebrahimi while ((bit & 0x3f) == 0)
3719*22dc650dSSadaf Ebrahimi {
3720*22dc650dSSadaf Ebrahimi n--;
3721*22dc650dSSadaf Ebrahimi bit >>= 6;
3722*22dc650dSSadaf Ebrahimi }
3723*22dc650dSSadaf Ebrahimi return (n << 8) | bit;
3724*22dc650dSSadaf Ebrahimi }
3725*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
3726*22dc650dSSadaf Ebrahimi return (0 << 8) | bit;
3727*22dc650dSSadaf Ebrahimi
3728*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729*22dc650dSSadaf Ebrahimi
3730*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3731*22dc650dSSadaf Ebrahimi if (common->utf && c > 65535)
3732*22dc650dSSadaf Ebrahimi {
3733*22dc650dSSadaf Ebrahimi if (bit >= (1u << 10))
3734*22dc650dSSadaf Ebrahimi bit >>= 10;
3735*22dc650dSSadaf Ebrahimi else
3736*22dc650dSSadaf Ebrahimi return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737*22dc650dSSadaf Ebrahimi }
3738*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
3739*22dc650dSSadaf Ebrahimi return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740*22dc650dSSadaf Ebrahimi
3741*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742*22dc650dSSadaf Ebrahimi }
3743*22dc650dSSadaf Ebrahimi
check_partial(compiler_common * common,BOOL force)3744*22dc650dSSadaf Ebrahimi static void check_partial(compiler_common *common, BOOL force)
3745*22dc650dSSadaf Ebrahimi {
3746*22dc650dSSadaf Ebrahimi /* Checks whether a partial matching is occurred. Does not modify registers. */
3747*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3748*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
3749*22dc650dSSadaf Ebrahimi
3750*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751*22dc650dSSadaf Ebrahimi
3752*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
3753*22dc650dSSadaf Ebrahimi return;
3754*22dc650dSSadaf Ebrahimi
3755*22dc650dSSadaf Ebrahimi if (!force && !common->allow_empty_partial)
3756*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757*22dc650dSSadaf Ebrahimi else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759*22dc650dSSadaf Ebrahimi
3760*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762*22dc650dSSadaf Ebrahimi else
3763*22dc650dSSadaf Ebrahimi {
3764*22dc650dSSadaf Ebrahimi if (common->partialmatchlabel != NULL)
3765*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766*22dc650dSSadaf Ebrahimi else
3767*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768*22dc650dSSadaf Ebrahimi }
3769*22dc650dSSadaf Ebrahimi
3770*22dc650dSSadaf Ebrahimi if (jump != NULL)
3771*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3772*22dc650dSSadaf Ebrahimi }
3773*22dc650dSSadaf Ebrahimi
check_str_end(compiler_common * common,jump_list ** end_reached)3774*22dc650dSSadaf Ebrahimi static void check_str_end(compiler_common *common, jump_list **end_reached)
3775*22dc650dSSadaf Ebrahimi {
3776*22dc650dSSadaf Ebrahimi /* Does not affect registers. Usually used in a tight spot. */
3777*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3778*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3779*22dc650dSSadaf Ebrahimi
3780*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
3781*22dc650dSSadaf Ebrahimi {
3782*22dc650dSSadaf Ebrahimi add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783*22dc650dSSadaf Ebrahimi return;
3784*22dc650dSSadaf Ebrahimi }
3785*22dc650dSSadaf Ebrahimi
3786*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788*22dc650dSSadaf Ebrahimi {
3789*22dc650dSSadaf Ebrahimi add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791*22dc650dSSadaf Ebrahimi add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792*22dc650dSSadaf Ebrahimi }
3793*22dc650dSSadaf Ebrahimi else
3794*22dc650dSSadaf Ebrahimi {
3795*22dc650dSSadaf Ebrahimi add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796*22dc650dSSadaf Ebrahimi if (common->partialmatchlabel != NULL)
3797*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798*22dc650dSSadaf Ebrahimi else
3799*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800*22dc650dSSadaf Ebrahimi }
3801*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3802*22dc650dSSadaf Ebrahimi }
3803*22dc650dSSadaf Ebrahimi
detect_partial_match(compiler_common * common,jump_list ** backtracks)3804*22dc650dSSadaf Ebrahimi static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805*22dc650dSSadaf Ebrahimi {
3806*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3807*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3808*22dc650dSSadaf Ebrahimi
3809*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
3810*22dc650dSSadaf Ebrahimi {
3811*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812*22dc650dSSadaf Ebrahimi return;
3813*22dc650dSSadaf Ebrahimi }
3814*22dc650dSSadaf Ebrahimi
3815*22dc650dSSadaf Ebrahimi /* Partial matching mode. */
3816*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817*22dc650dSSadaf Ebrahimi if (!common->allow_empty_partial)
3818*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819*22dc650dSSadaf Ebrahimi else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821*22dc650dSSadaf Ebrahimi
3822*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823*22dc650dSSadaf Ebrahimi {
3824*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826*22dc650dSSadaf Ebrahimi }
3827*22dc650dSSadaf Ebrahimi else
3828*22dc650dSSadaf Ebrahimi {
3829*22dc650dSSadaf Ebrahimi if (common->partialmatchlabel != NULL)
3830*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831*22dc650dSSadaf Ebrahimi else
3832*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833*22dc650dSSadaf Ebrahimi }
3834*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3835*22dc650dSSadaf Ebrahimi }
3836*22dc650dSSadaf Ebrahimi
process_partial_match(compiler_common * common)3837*22dc650dSSadaf Ebrahimi static void process_partial_match(compiler_common *common)
3838*22dc650dSSadaf Ebrahimi {
3839*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3840*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3841*22dc650dSSadaf Ebrahimi
3842*22dc650dSSadaf Ebrahimi /* Partial matching mode. */
3843*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844*22dc650dSSadaf Ebrahimi {
3845*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3848*22dc650dSSadaf Ebrahimi }
3849*22dc650dSSadaf Ebrahimi else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850*22dc650dSSadaf Ebrahimi {
3851*22dc650dSSadaf Ebrahimi if (common->partialmatchlabel != NULL)
3852*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853*22dc650dSSadaf Ebrahimi else
3854*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855*22dc650dSSadaf Ebrahimi }
3856*22dc650dSSadaf Ebrahimi }
3857*22dc650dSSadaf Ebrahimi
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3858*22dc650dSSadaf Ebrahimi static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859*22dc650dSSadaf Ebrahimi {
3860*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3861*22dc650dSSadaf Ebrahimi
3862*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863*22dc650dSSadaf Ebrahimi process_partial_match(common);
3864*22dc650dSSadaf Ebrahimi }
3865*22dc650dSSadaf Ebrahimi
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3866*22dc650dSSadaf Ebrahimi static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867*22dc650dSSadaf Ebrahimi {
3868*22dc650dSSadaf Ebrahimi /* Reads the character into TMP1, keeps STR_PTR.
3869*22dc650dSSadaf Ebrahimi Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3871*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3873*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874*22dc650dSSadaf Ebrahimi
3875*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(max);
3876*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(dst);
3877*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(dstw);
3878*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(backtracks);
3879*22dc650dSSadaf Ebrahimi
3880*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881*22dc650dSSadaf Ebrahimi
3882*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3883*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3884*22dc650dSSadaf Ebrahimi if (common->utf)
3885*22dc650dSSadaf Ebrahimi {
3886*22dc650dSSadaf Ebrahimi if (max < 128) return;
3887*22dc650dSSadaf Ebrahimi
3888*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891*22dc650dSSadaf Ebrahimi add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893*22dc650dSSadaf Ebrahimi if (backtracks && common->invalid_utf)
3894*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3896*22dc650dSSadaf Ebrahimi }
3897*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
3898*22dc650dSSadaf Ebrahimi if (common->utf)
3899*22dc650dSSadaf Ebrahimi {
3900*22dc650dSSadaf Ebrahimi if (max < 0xd800) return;
3901*22dc650dSSadaf Ebrahimi
3902*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903*22dc650dSSadaf Ebrahimi
3904*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
3905*22dc650dSSadaf Ebrahimi {
3906*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911*22dc650dSSadaf Ebrahimi if (backtracks && common->invalid_utf)
3912*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913*22dc650dSSadaf Ebrahimi }
3914*22dc650dSSadaf Ebrahimi else
3915*22dc650dSSadaf Ebrahimi {
3916*22dc650dSSadaf Ebrahimi /* TMP2 contains the high surrogate. */
3917*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922*22dc650dSSadaf Ebrahimi }
3923*22dc650dSSadaf Ebrahimi
3924*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3925*22dc650dSSadaf Ebrahimi }
3926*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
3927*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
3928*22dc650dSSadaf Ebrahimi {
3929*22dc650dSSadaf Ebrahimi if (max < 0xd800) return;
3930*22dc650dSSadaf Ebrahimi
3931*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
3932*22dc650dSSadaf Ebrahimi {
3933*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936*22dc650dSSadaf Ebrahimi }
3937*22dc650dSSadaf Ebrahimi else
3938*22dc650dSSadaf Ebrahimi {
3939*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944*22dc650dSSadaf Ebrahimi }
3945*22dc650dSSadaf Ebrahimi }
3946*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
3948*22dc650dSSadaf Ebrahimi }
3949*22dc650dSSadaf Ebrahimi
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3950*22dc650dSSadaf Ebrahimi static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951*22dc650dSSadaf Ebrahimi {
3952*22dc650dSSadaf Ebrahimi /* Reads one character back without moving STR_PTR. TMP2 must
3953*22dc650dSSadaf Ebrahimi contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
3955*22dc650dSSadaf Ebrahimi
3956*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3958*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959*22dc650dSSadaf Ebrahimi
3960*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(max);
3961*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(backtracks);
3962*22dc650dSSadaf Ebrahimi
3963*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964*22dc650dSSadaf Ebrahimi
3965*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
3966*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
3967*22dc650dSSadaf Ebrahimi if (common->utf)
3968*22dc650dSSadaf Ebrahimi {
3969*22dc650dSSadaf Ebrahimi if (max < 128) return;
3970*22dc650dSSadaf Ebrahimi
3971*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
3973*22dc650dSSadaf Ebrahimi {
3974*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
3976*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977*22dc650dSSadaf Ebrahimi }
3978*22dc650dSSadaf Ebrahimi else
3979*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
3981*22dc650dSSadaf Ebrahimi }
3982*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
3983*22dc650dSSadaf Ebrahimi if (common->utf)
3984*22dc650dSSadaf Ebrahimi {
3985*22dc650dSSadaf Ebrahimi if (max < 0xd800) return;
3986*22dc650dSSadaf Ebrahimi
3987*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
3988*22dc650dSSadaf Ebrahimi {
3989*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
3992*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993*22dc650dSSadaf Ebrahimi }
3994*22dc650dSSadaf Ebrahimi else
3995*22dc650dSSadaf Ebrahimi {
3996*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998*22dc650dSSadaf Ebrahimi /* TMP2 contains the low surrogate. */
3999*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004*22dc650dSSadaf Ebrahimi }
4005*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4006*22dc650dSSadaf Ebrahimi }
4007*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
4008*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4009*22dc650dSSadaf Ebrahimi {
4010*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013*22dc650dSSadaf Ebrahimi }
4014*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
4016*22dc650dSSadaf Ebrahimi }
4017*22dc650dSSadaf Ebrahimi
4018*22dc650dSSadaf Ebrahimi #define READ_CHAR_UPDATE_STR_PTR 0x1
4019*22dc650dSSadaf Ebrahimi #define READ_CHAR_UTF8_NEWLINE 0x2
4020*22dc650dSSadaf Ebrahimi #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021*22dc650dSSadaf Ebrahimi #define READ_CHAR_VALID_UTF 0x4
4022*22dc650dSSadaf Ebrahimi
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)4023*22dc650dSSadaf Ebrahimi static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024*22dc650dSSadaf Ebrahimi jump_list **backtracks, sljit_u32 options)
4025*22dc650dSSadaf Ebrahimi {
4026*22dc650dSSadaf Ebrahimi /* Reads the precise value of a character into TMP1, if the character is
4027*22dc650dSSadaf Ebrahimi between min and max (c >= min && c <= max). Otherwise it returns with a value
4028*22dc650dSSadaf Ebrahimi outside the range. Does not check STR_END. */
4029*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4030*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4032*22dc650dSSadaf Ebrahimi #endif
4033*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034*22dc650dSSadaf Ebrahimi struct sljit_jump *jump2;
4035*22dc650dSSadaf Ebrahimi #endif
4036*22dc650dSSadaf Ebrahimi
4037*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(min);
4038*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(max);
4039*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(backtracks);
4040*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(options);
4041*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(min <= max);
4042*22dc650dSSadaf Ebrahimi
4043*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045*22dc650dSSadaf Ebrahimi
4046*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
4047*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
4048*22dc650dSSadaf Ebrahimi if (common->utf)
4049*22dc650dSSadaf Ebrahimi {
4050*22dc650dSSadaf Ebrahimi if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051*22dc650dSSadaf Ebrahimi
4052*22dc650dSSadaf Ebrahimi if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053*22dc650dSSadaf Ebrahimi {
4054*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055*22dc650dSSadaf Ebrahimi
4056*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UTF8_NEWLINE)
4057*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058*22dc650dSSadaf Ebrahimi else
4059*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060*22dc650dSSadaf Ebrahimi
4061*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4062*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4064*22dc650dSSadaf Ebrahimi return;
4065*22dc650dSSadaf Ebrahimi }
4066*22dc650dSSadaf Ebrahimi
4067*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068*22dc650dSSadaf Ebrahimi if (min >= 0x10000)
4069*22dc650dSSadaf Ebrahimi {
4070*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4072*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074*22dc650dSSadaf Ebrahimi jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083*22dc650dSSadaf Ebrahimi if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088*22dc650dSSadaf Ebrahimi JUMPHERE(jump2);
4089*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4090*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091*22dc650dSSadaf Ebrahimi }
4092*22dc650dSSadaf Ebrahimi else if (min >= 0x800 && max <= 0xffff)
4093*22dc650dSSadaf Ebrahimi {
4094*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4096*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098*22dc650dSSadaf Ebrahimi jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103*22dc650dSSadaf Ebrahimi if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108*22dc650dSSadaf Ebrahimi JUMPHERE(jump2);
4109*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4110*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111*22dc650dSSadaf Ebrahimi }
4112*22dc650dSSadaf Ebrahimi else if (max >= 0x800)
4113*22dc650dSSadaf Ebrahimi {
4114*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115*22dc650dSSadaf Ebrahimi }
4116*22dc650dSSadaf Ebrahimi else if (max < 128)
4117*22dc650dSSadaf Ebrahimi {
4118*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120*22dc650dSSadaf Ebrahimi }
4121*22dc650dSSadaf Ebrahimi else
4122*22dc650dSSadaf Ebrahimi {
4123*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124*22dc650dSSadaf Ebrahimi if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126*22dc650dSSadaf Ebrahimi else
4127*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4133*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134*22dc650dSSadaf Ebrahimi }
4135*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4136*22dc650dSSadaf Ebrahimi }
4137*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
4138*22dc650dSSadaf Ebrahimi if (common->utf)
4139*22dc650dSSadaf Ebrahimi {
4140*22dc650dSSadaf Ebrahimi if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141*22dc650dSSadaf Ebrahimi
4142*22dc650dSSadaf Ebrahimi if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143*22dc650dSSadaf Ebrahimi {
4144*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146*22dc650dSSadaf Ebrahimi
4147*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UTF8_NEWLINE)
4148*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149*22dc650dSSadaf Ebrahimi else
4150*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151*22dc650dSSadaf Ebrahimi
4152*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4153*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4155*22dc650dSSadaf Ebrahimi return;
4156*22dc650dSSadaf Ebrahimi }
4157*22dc650dSSadaf Ebrahimi
4158*22dc650dSSadaf Ebrahimi if (max >= 0x10000)
4159*22dc650dSSadaf Ebrahimi {
4160*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162*22dc650dSSadaf Ebrahimi /* TMP2 contains the high surrogate. */
4163*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4169*22dc650dSSadaf Ebrahimi return;
4170*22dc650dSSadaf Ebrahimi }
4171*22dc650dSSadaf Ebrahimi
4172*22dc650dSSadaf Ebrahimi /* Skip low surrogate if necessary. */
4173*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174*22dc650dSSadaf Ebrahimi
4175*22dc650dSSadaf Ebrahimi if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176*22dc650dSSadaf Ebrahimi {
4177*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4178*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4181*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182*22dc650dSSadaf Ebrahimi if (max >= 0xd800)
4183*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184*22dc650dSSadaf Ebrahimi }
4185*22dc650dSSadaf Ebrahimi else
4186*22dc650dSSadaf Ebrahimi {
4187*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188*22dc650dSSadaf Ebrahimi if (options & READ_CHAR_UPDATE_STR_PTR)
4189*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190*22dc650dSSadaf Ebrahimi if (max >= 0xd800)
4191*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4193*22dc650dSSadaf Ebrahimi }
4194*22dc650dSSadaf Ebrahimi }
4195*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
4196*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4197*22dc650dSSadaf Ebrahimi {
4198*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4199*22dc650dSSadaf Ebrahimi {
4200*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203*22dc650dSSadaf Ebrahimi }
4204*22dc650dSSadaf Ebrahimi else
4205*22dc650dSSadaf Ebrahimi {
4206*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211*22dc650dSSadaf Ebrahimi }
4212*22dc650dSSadaf Ebrahimi }
4213*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
4215*22dc650dSSadaf Ebrahimi }
4216*22dc650dSSadaf Ebrahimi
skip_valid_char(compiler_common * common)4217*22dc650dSSadaf Ebrahimi static void skip_valid_char(compiler_common *common)
4218*22dc650dSSadaf Ebrahimi {
4219*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4220*22dc650dSSadaf Ebrahimi #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4222*22dc650dSSadaf Ebrahimi #endif
4223*22dc650dSSadaf Ebrahimi
4224*22dc650dSSadaf Ebrahimi #if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225*22dc650dSSadaf Ebrahimi if (common->utf)
4226*22dc650dSSadaf Ebrahimi {
4227*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
4230*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
4234*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4242*22dc650dSSadaf Ebrahimi return;
4243*22dc650dSSadaf Ebrahimi }
4244*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246*22dc650dSSadaf Ebrahimi }
4247*22dc650dSSadaf Ebrahimi
4248*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249*22dc650dSSadaf Ebrahimi
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4250*22dc650dSSadaf Ebrahimi static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251*22dc650dSSadaf Ebrahimi {
4252*22dc650dSSadaf Ebrahimi /* Tells whether the character codes below 128 are enough
4253*22dc650dSSadaf Ebrahimi to determine a match. */
4254*22dc650dSSadaf Ebrahimi const sljit_u8 value = nclass ? 0xff : 0;
4255*22dc650dSSadaf Ebrahimi const sljit_u8 *end = bitset + 32;
4256*22dc650dSSadaf Ebrahimi
4257*22dc650dSSadaf Ebrahimi bitset += 16;
4258*22dc650dSSadaf Ebrahimi do
4259*22dc650dSSadaf Ebrahimi {
4260*22dc650dSSadaf Ebrahimi if (*bitset++ != value)
4261*22dc650dSSadaf Ebrahimi return FALSE;
4262*22dc650dSSadaf Ebrahimi }
4263*22dc650dSSadaf Ebrahimi while (bitset < end);
4264*22dc650dSSadaf Ebrahimi return TRUE;
4265*22dc650dSSadaf Ebrahimi }
4266*22dc650dSSadaf Ebrahimi
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4267*22dc650dSSadaf Ebrahimi static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268*22dc650dSSadaf Ebrahimi {
4269*22dc650dSSadaf Ebrahimi /* Reads the precise character type of a character into TMP1, if the character
4270*22dc650dSSadaf Ebrahimi is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271*22dc650dSSadaf Ebrahimi full_read argument tells whether characters above max are accepted or not. */
4272*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4273*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4274*22dc650dSSadaf Ebrahimi
4275*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->utf);
4276*22dc650dSSadaf Ebrahimi
4277*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279*22dc650dSSadaf Ebrahimi
4280*22dc650dSSadaf Ebrahimi /* All values > 127 are zero in ctypes. */
4281*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282*22dc650dSSadaf Ebrahimi
4283*22dc650dSSadaf Ebrahimi if (negated)
4284*22dc650dSSadaf Ebrahimi {
4285*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286*22dc650dSSadaf Ebrahimi
4287*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4288*22dc650dSSadaf Ebrahimi {
4289*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293*22dc650dSSadaf Ebrahimi }
4294*22dc650dSSadaf Ebrahimi else
4295*22dc650dSSadaf Ebrahimi {
4296*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298*22dc650dSSadaf Ebrahimi }
4299*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4300*22dc650dSSadaf Ebrahimi }
4301*22dc650dSSadaf Ebrahimi }
4302*22dc650dSSadaf Ebrahimi
4303*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304*22dc650dSSadaf Ebrahimi
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4305*22dc650dSSadaf Ebrahimi static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306*22dc650dSSadaf Ebrahimi {
4307*22dc650dSSadaf Ebrahimi /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4309*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4311*22dc650dSSadaf Ebrahimi #endif
4312*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313*22dc650dSSadaf Ebrahimi struct sljit_jump *jump2;
4314*22dc650dSSadaf Ebrahimi #endif
4315*22dc650dSSadaf Ebrahimi
4316*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(backtracks);
4317*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(negated);
4318*22dc650dSSadaf Ebrahimi
4319*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321*22dc650dSSadaf Ebrahimi
4322*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323*22dc650dSSadaf Ebrahimi if (common->utf)
4324*22dc650dSSadaf Ebrahimi {
4325*22dc650dSSadaf Ebrahimi /* The result of this read may be unused, but saves an "else" part. */
4326*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328*22dc650dSSadaf Ebrahimi
4329*22dc650dSSadaf Ebrahimi if (!negated)
4330*22dc650dSSadaf Ebrahimi {
4331*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4332*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333*22dc650dSSadaf Ebrahimi
4334*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4338*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339*22dc650dSSadaf Ebrahimi
4340*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
4344*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345*22dc650dSSadaf Ebrahimi
4346*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347*22dc650dSSadaf Ebrahimi jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349*22dc650dSSadaf Ebrahimi JUMPHERE(jump2);
4350*22dc650dSSadaf Ebrahimi }
4351*22dc650dSSadaf Ebrahimi else if (common->invalid_utf)
4352*22dc650dSSadaf Ebrahimi {
4353*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356*22dc650dSSadaf Ebrahimi
4357*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358*22dc650dSSadaf Ebrahimi jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360*22dc650dSSadaf Ebrahimi JUMPHERE(jump2);
4361*22dc650dSSadaf Ebrahimi }
4362*22dc650dSSadaf Ebrahimi else
4363*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364*22dc650dSSadaf Ebrahimi
4365*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4366*22dc650dSSadaf Ebrahimi return;
4367*22dc650dSSadaf Ebrahimi }
4368*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369*22dc650dSSadaf Ebrahimi
4370*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371*22dc650dSSadaf Ebrahimi if (common->invalid_utf && negated)
4372*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374*22dc650dSSadaf Ebrahimi
4375*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
4376*22dc650dSSadaf Ebrahimi /* The ctypes array contains only 256 values. */
4377*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
4382*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4383*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384*22dc650dSSadaf Ebrahimi
4385*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386*22dc650dSSadaf Ebrahimi if (common->utf && negated)
4387*22dc650dSSadaf Ebrahimi {
4388*22dc650dSSadaf Ebrahimi /* Skip low surrogate if necessary. */
4389*22dc650dSSadaf Ebrahimi if (!common->invalid_utf)
4390*22dc650dSSadaf Ebrahimi {
4391*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392*22dc650dSSadaf Ebrahimi
4393*22dc650dSSadaf Ebrahimi if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394*22dc650dSSadaf Ebrahimi {
4395*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398*22dc650dSSadaf Ebrahimi }
4399*22dc650dSSadaf Ebrahimi else
4400*22dc650dSSadaf Ebrahimi {
4401*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4404*22dc650dSSadaf Ebrahimi }
4405*22dc650dSSadaf Ebrahimi return;
4406*22dc650dSSadaf Ebrahimi }
4407*22dc650dSSadaf Ebrahimi
4408*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412*22dc650dSSadaf Ebrahimi
4413*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417*22dc650dSSadaf Ebrahimi
4418*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4419*22dc650dSSadaf Ebrahimi return;
4420*22dc650dSSadaf Ebrahimi }
4421*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422*22dc650dSSadaf Ebrahimi }
4423*22dc650dSSadaf Ebrahimi
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4424*22dc650dSSadaf Ebrahimi static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425*22dc650dSSadaf Ebrahimi {
4426*22dc650dSSadaf Ebrahimi /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427*22dc650dSSadaf Ebrahimi TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428*22dc650dSSadaf Ebrahimi and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4430*22dc650dSSadaf Ebrahimi
4431*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4433*22dc650dSSadaf Ebrahimi #endif
4434*22dc650dSSadaf Ebrahimi
4435*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
4436*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
4437*22dc650dSSadaf Ebrahimi struct sljit_label *label;
4438*22dc650dSSadaf Ebrahimi
4439*22dc650dSSadaf Ebrahimi if (common->utf)
4440*22dc650dSSadaf Ebrahimi {
4441*22dc650dSSadaf Ebrahimi if (!must_be_valid && common->invalid_utf)
4442*22dc650dSSadaf Ebrahimi {
4443*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4448*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4450*22dc650dSSadaf Ebrahimi return;
4451*22dc650dSSadaf Ebrahimi }
4452*22dc650dSSadaf Ebrahimi
4453*22dc650dSSadaf Ebrahimi label = LABEL();
4454*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458*22dc650dSSadaf Ebrahimi return;
4459*22dc650dSSadaf Ebrahimi }
4460*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
4461*22dc650dSSadaf Ebrahimi if (common->utf)
4462*22dc650dSSadaf Ebrahimi {
4463*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465*22dc650dSSadaf Ebrahimi
4466*22dc650dSSadaf Ebrahimi if (!must_be_valid && common->invalid_utf)
4467*22dc650dSSadaf Ebrahimi {
4468*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4472*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4474*22dc650dSSadaf Ebrahimi return;
4475*22dc650dSSadaf Ebrahimi }
4476*22dc650dSSadaf Ebrahimi
4477*22dc650dSSadaf Ebrahimi /* Skip low surrogate if necessary. */
4478*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483*22dc650dSSadaf Ebrahimi return;
4484*22dc650dSSadaf Ebrahimi }
4485*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
4486*22dc650dSSadaf Ebrahimi if (common->invalid_utf && !must_be_valid)
4487*22dc650dSSadaf Ebrahimi {
4488*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489*22dc650dSSadaf Ebrahimi if (backtracks != NULL)
4490*22dc650dSSadaf Ebrahimi {
4491*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493*22dc650dSSadaf Ebrahimi return;
4494*22dc650dSSadaf Ebrahimi }
4495*22dc650dSSadaf Ebrahimi
4496*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500*22dc650dSSadaf Ebrahimi return;
4501*22dc650dSSadaf Ebrahimi }
4502*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
4504*22dc650dSSadaf Ebrahimi
4505*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(backtracks);
4506*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(must_be_valid);
4507*22dc650dSSadaf Ebrahimi
4508*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509*22dc650dSSadaf Ebrahimi }
4510*22dc650dSSadaf Ebrahimi
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4511*22dc650dSSadaf Ebrahimi static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512*22dc650dSSadaf Ebrahimi {
4513*22dc650dSSadaf Ebrahimi /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4515*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4516*22dc650dSSadaf Ebrahimi
4517*22dc650dSSadaf Ebrahimi if (nltype == NLTYPE_ANY)
4518*22dc650dSSadaf Ebrahimi {
4519*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522*22dc650dSSadaf Ebrahimi }
4523*22dc650dSSadaf Ebrahimi else if (nltype == NLTYPE_ANYCRLF)
4524*22dc650dSSadaf Ebrahimi {
4525*22dc650dSSadaf Ebrahimi if (jumpifmatch)
4526*22dc650dSSadaf Ebrahimi {
4527*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529*22dc650dSSadaf Ebrahimi }
4530*22dc650dSSadaf Ebrahimi else
4531*22dc650dSSadaf Ebrahimi {
4532*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4535*22dc650dSSadaf Ebrahimi }
4536*22dc650dSSadaf Ebrahimi }
4537*22dc650dSSadaf Ebrahimi else
4538*22dc650dSSadaf Ebrahimi {
4539*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541*22dc650dSSadaf Ebrahimi }
4542*22dc650dSSadaf Ebrahimi }
4543*22dc650dSSadaf Ebrahimi
4544*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
4545*22dc650dSSadaf Ebrahimi
4546*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4547*22dc650dSSadaf Ebrahimi static void do_utfreadchar(compiler_common *common)
4548*22dc650dSSadaf Ebrahimi {
4549*22dc650dSSadaf Ebrahimi /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550*22dc650dSSadaf Ebrahimi of the character (>= 0xc0). Return char value in TMP1. */
4551*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4552*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4553*22dc650dSSadaf Ebrahimi
4554*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559*22dc650dSSadaf Ebrahimi
4560*22dc650dSSadaf Ebrahimi /* Searching for the first zero. */
4561*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4563*22dc650dSSadaf Ebrahimi /* Two byte sequence. */
4564*22dc650dSSadaf Ebrahimi OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567*22dc650dSSadaf Ebrahimi
4568*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4569*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573*22dc650dSSadaf Ebrahimi
4574*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4576*22dc650dSSadaf Ebrahimi /* Three byte sequence. */
4577*22dc650dSSadaf Ebrahimi OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580*22dc650dSSadaf Ebrahimi
4581*22dc650dSSadaf Ebrahimi /* Four byte sequence. */
4582*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4583*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584*22dc650dSSadaf Ebrahimi OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590*22dc650dSSadaf Ebrahimi }
4591*22dc650dSSadaf Ebrahimi
do_utfreadtype8(compiler_common * common)4592*22dc650dSSadaf Ebrahimi static void do_utfreadtype8(compiler_common *common)
4593*22dc650dSSadaf Ebrahimi {
4594*22dc650dSSadaf Ebrahimi /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595*22dc650dSSadaf Ebrahimi of the character (>= 0xc0). Return value in TMP1. */
4596*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4597*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4598*22dc650dSSadaf Ebrahimi struct sljit_jump *compare;
4599*22dc650dSSadaf Ebrahimi
4600*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601*22dc650dSSadaf Ebrahimi
4602*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4604*22dc650dSSadaf Ebrahimi /* Two byte sequence. */
4605*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608*22dc650dSSadaf Ebrahimi /* The upper 5 bits are known at this point. */
4609*22dc650dSSadaf Ebrahimi compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615*22dc650dSSadaf Ebrahimi
4616*22dc650dSSadaf Ebrahimi JUMPHERE(compare);
4617*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619*22dc650dSSadaf Ebrahimi
4620*22dc650dSSadaf Ebrahimi /* We only have types for characters less than 256. */
4621*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4622*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626*22dc650dSSadaf Ebrahimi }
4627*22dc650dSSadaf Ebrahimi
do_utfreadchar_invalid(compiler_common * common)4628*22dc650dSSadaf Ebrahimi static void do_utfreadchar_invalid(compiler_common *common)
4629*22dc650dSSadaf Ebrahimi {
4630*22dc650dSSadaf Ebrahimi /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631*22dc650dSSadaf Ebrahimi of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632*22dc650dSSadaf Ebrahimi undefined for invalid characters. */
4633*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4634*22dc650dSSadaf Ebrahimi sljit_s32 i;
4635*22dc650dSSadaf Ebrahimi sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4637*22dc650dSSadaf Ebrahimi struct sljit_jump *buffer_end_close;
4638*22dc650dSSadaf Ebrahimi struct sljit_label *three_byte_entry;
4639*22dc650dSSadaf Ebrahimi struct sljit_label *exit_invalid_label;
4640*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[11];
4641*22dc650dSSadaf Ebrahimi
4642*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643*22dc650dSSadaf Ebrahimi
4644*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645*22dc650dSSadaf Ebrahimi
4646*22dc650dSSadaf Ebrahimi /* Usually more than 3 characters remained in the subject buffer. */
4647*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648*22dc650dSSadaf Ebrahimi
4649*22dc650dSSadaf Ebrahimi /* Not a valid start of a multi-byte sequence, no more bytes read. */
4650*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651*22dc650dSSadaf Ebrahimi
4652*22dc650dSSadaf Ebrahimi buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653*22dc650dSSadaf Ebrahimi
4654*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656*22dc650dSSadaf Ebrahimi /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660*22dc650dSSadaf Ebrahimi
4661*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4663*22dc650dSSadaf Ebrahimi
4664*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666*22dc650dSSadaf Ebrahimi
4667*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4668*22dc650dSSadaf Ebrahimi
4669*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
4670*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674*22dc650dSSadaf Ebrahimi if (has_cmov)
4675*22dc650dSSadaf Ebrahimi {
4676*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678*22dc650dSSadaf Ebrahimi exit_invalid[2] = NULL;
4679*22dc650dSSadaf Ebrahimi }
4680*22dc650dSSadaf Ebrahimi else
4681*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682*22dc650dSSadaf Ebrahimi
4683*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4685*22dc650dSSadaf Ebrahimi
4686*22dc650dSSadaf Ebrahimi three_byte_entry = LABEL();
4687*22dc650dSSadaf Ebrahimi
4688*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689*22dc650dSSadaf Ebrahimi if (has_cmov)
4690*22dc650dSSadaf Ebrahimi {
4691*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693*22dc650dSSadaf Ebrahimi exit_invalid[3] = NULL;
4694*22dc650dSSadaf Ebrahimi }
4695*22dc650dSSadaf Ebrahimi else
4696*22dc650dSSadaf Ebrahimi exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699*22dc650dSSadaf Ebrahimi
4700*22dc650dSSadaf Ebrahimi if (has_cmov)
4701*22dc650dSSadaf Ebrahimi {
4702*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704*22dc650dSSadaf Ebrahimi exit_invalid[4] = NULL;
4705*22dc650dSSadaf Ebrahimi }
4706*22dc650dSSadaf Ebrahimi else
4707*22dc650dSSadaf Ebrahimi exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709*22dc650dSSadaf Ebrahimi
4710*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4711*22dc650dSSadaf Ebrahimi
4712*22dc650dSSadaf Ebrahimi /* Four-byte sequence. */
4713*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717*22dc650dSSadaf Ebrahimi if (has_cmov)
4718*22dc650dSSadaf Ebrahimi {
4719*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721*22dc650dSSadaf Ebrahimi exit_invalid[5] = NULL;
4722*22dc650dSSadaf Ebrahimi }
4723*22dc650dSSadaf Ebrahimi else
4724*22dc650dSSadaf Ebrahimi exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725*22dc650dSSadaf Ebrahimi
4726*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727*22dc650dSSadaf Ebrahimi if (has_cmov)
4728*22dc650dSSadaf Ebrahimi {
4729*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731*22dc650dSSadaf Ebrahimi exit_invalid[6] = NULL;
4732*22dc650dSSadaf Ebrahimi }
4733*22dc650dSSadaf Ebrahimi else
4734*22dc650dSSadaf Ebrahimi exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735*22dc650dSSadaf Ebrahimi
4736*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738*22dc650dSSadaf Ebrahimi
4739*22dc650dSSadaf Ebrahimi JUMPHERE(buffer_end_close);
4740*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741*22dc650dSSadaf Ebrahimi exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742*22dc650dSSadaf Ebrahimi
4743*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
4744*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746*22dc650dSSadaf Ebrahimi /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749*22dc650dSSadaf Ebrahimi exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750*22dc650dSSadaf Ebrahimi
4751*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO);
4753*22dc650dSSadaf Ebrahimi
4754*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755*22dc650dSSadaf Ebrahimi
4756*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
4757*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4758*22dc650dSSadaf Ebrahimi exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759*22dc650dSSadaf Ebrahimi
4760*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764*22dc650dSSadaf Ebrahimi if (has_cmov)
4765*22dc650dSSadaf Ebrahimi {
4766*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768*22dc650dSSadaf Ebrahimi exit_invalid[10] = NULL;
4769*22dc650dSSadaf Ebrahimi }
4770*22dc650dSSadaf Ebrahimi else
4771*22dc650dSSadaf Ebrahimi exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772*22dc650dSSadaf Ebrahimi
4773*22dc650dSSadaf Ebrahimi /* One will be substracted from STR_PTR later. */
4774*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775*22dc650dSSadaf Ebrahimi
4776*22dc650dSSadaf Ebrahimi /* Four byte sequences are not possible. */
4777*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778*22dc650dSSadaf Ebrahimi
4779*22dc650dSSadaf Ebrahimi exit_invalid_label = LABEL();
4780*22dc650dSSadaf Ebrahimi for (i = 0; i < 11; i++)
4781*22dc650dSSadaf Ebrahimi sljit_set_label(exit_invalid[i], exit_invalid_label);
4782*22dc650dSSadaf Ebrahimi
4783*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785*22dc650dSSadaf Ebrahimi }
4786*22dc650dSSadaf Ebrahimi
do_utfreadnewline_invalid(compiler_common * common)4787*22dc650dSSadaf Ebrahimi static void do_utfreadnewline_invalid(compiler_common *common)
4788*22dc650dSSadaf Ebrahimi {
4789*22dc650dSSadaf Ebrahimi /* Slow decoding a UTF-8 character, specialized for newlines.
4790*22dc650dSSadaf Ebrahimi TMP1 contains the first byte of the character (>= 0xc0). Return
4791*22dc650dSSadaf Ebrahimi char value in TMP1. */
4792*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4793*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
4794*22dc650dSSadaf Ebrahimi struct sljit_label *skip_start;
4795*22dc650dSSadaf Ebrahimi struct sljit_label *three_byte_exit;
4796*22dc650dSSadaf Ebrahimi struct sljit_jump *jump[5];
4797*22dc650dSSadaf Ebrahimi
4798*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799*22dc650dSSadaf Ebrahimi
4800*22dc650dSSadaf Ebrahimi if (common->nltype != NLTYPE_ANY)
4801*22dc650dSSadaf Ebrahimi {
4802*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803*22dc650dSSadaf Ebrahimi
4804*22dc650dSSadaf Ebrahimi /* All newlines are ascii, just skip intermediate octets. */
4805*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806*22dc650dSSadaf Ebrahimi loop = LABEL();
4807*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809*22dc650dSSadaf Ebrahimi else
4810*22dc650dSSadaf Ebrahimi {
4811*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813*22dc650dSSadaf Ebrahimi }
4814*22dc650dSSadaf Ebrahimi
4815*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818*22dc650dSSadaf Ebrahimi
4819*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
4820*22dc650dSSadaf Ebrahimi
4821*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823*22dc650dSSadaf Ebrahimi return;
4824*22dc650dSSadaf Ebrahimi }
4825*22dc650dSSadaf Ebrahimi
4826*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829*22dc650dSSadaf Ebrahimi
4830*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831*22dc650dSSadaf Ebrahimi jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832*22dc650dSSadaf Ebrahimi
4833*22dc650dSSadaf Ebrahimi skip_start = LABEL();
4834*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835*22dc650dSSadaf Ebrahimi jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836*22dc650dSSadaf Ebrahimi
4837*22dc650dSSadaf Ebrahimi /* Skip intermediate octets. */
4838*22dc650dSSadaf Ebrahimi loop = LABEL();
4839*22dc650dSSadaf Ebrahimi jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844*22dc650dSSadaf Ebrahimi
4845*22dc650dSSadaf Ebrahimi JUMPHERE(jump[3]);
4846*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847*22dc650dSSadaf Ebrahimi
4848*22dc650dSSadaf Ebrahimi three_byte_exit = LABEL();
4849*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
4850*22dc650dSSadaf Ebrahimi JUMPHERE(jump[4]);
4851*22dc650dSSadaf Ebrahimi
4852*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854*22dc650dSSadaf Ebrahimi
4855*22dc650dSSadaf Ebrahimi /* Two byte long newline: 0x85. */
4856*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
4857*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858*22dc650dSSadaf Ebrahimi
4859*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861*22dc650dSSadaf Ebrahimi
4862*22dc650dSSadaf Ebrahimi /* Three byte long newlines: 0x2028 and 0x2029. */
4863*22dc650dSSadaf Ebrahimi JUMPHERE(jump[2]);
4864*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866*22dc650dSSadaf Ebrahimi
4867*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869*22dc650dSSadaf Ebrahimi
4870*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872*22dc650dSSadaf Ebrahimi
4873*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876*22dc650dSSadaf Ebrahimi }
4877*22dc650dSSadaf Ebrahimi
do_utfmoveback_invalid(compiler_common * common)4878*22dc650dSSadaf Ebrahimi static void do_utfmoveback_invalid(compiler_common *common)
4879*22dc650dSSadaf Ebrahimi {
4880*22dc650dSSadaf Ebrahimi /* Goes one character back. */
4881*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4882*22dc650dSSadaf Ebrahimi sljit_s32 i;
4883*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4884*22dc650dSSadaf Ebrahimi struct sljit_jump *buffer_start_close;
4885*22dc650dSSadaf Ebrahimi struct sljit_label *exit_ok_label;
4886*22dc650dSSadaf Ebrahimi struct sljit_label *exit_invalid_label;
4887*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[7];
4888*22dc650dSSadaf Ebrahimi
4889*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890*22dc650dSSadaf Ebrahimi
4891*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893*22dc650dSSadaf Ebrahimi
4894*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
4895*22dc650dSSadaf Ebrahimi buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896*22dc650dSSadaf Ebrahimi
4897*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898*22dc650dSSadaf Ebrahimi
4899*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901*22dc650dSSadaf Ebrahimi
4902*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905*22dc650dSSadaf Ebrahimi
4906*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
4907*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4908*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909*22dc650dSSadaf Ebrahimi
4910*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911*22dc650dSSadaf Ebrahimi
4912*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914*22dc650dSSadaf Ebrahimi
4915*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918*22dc650dSSadaf Ebrahimi
4919*22dc650dSSadaf Ebrahimi /* Four-byte sequence. */
4920*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
4921*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923*22dc650dSSadaf Ebrahimi
4924*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926*22dc650dSSadaf Ebrahimi exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927*22dc650dSSadaf Ebrahimi
4928*22dc650dSSadaf Ebrahimi exit_ok_label = LABEL();
4929*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931*22dc650dSSadaf Ebrahimi
4932*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
4933*22dc650dSSadaf Ebrahimi JUMPHERE(buffer_start_close);
4934*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935*22dc650dSSadaf Ebrahimi
4936*22dc650dSSadaf Ebrahimi exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937*22dc650dSSadaf Ebrahimi
4938*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939*22dc650dSSadaf Ebrahimi
4940*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942*22dc650dSSadaf Ebrahimi
4943*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
4944*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945*22dc650dSSadaf Ebrahimi exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946*22dc650dSSadaf Ebrahimi exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947*22dc650dSSadaf Ebrahimi
4948*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949*22dc650dSSadaf Ebrahimi
4950*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952*22dc650dSSadaf Ebrahimi
4953*22dc650dSSadaf Ebrahimi /* Four-byte sequences are not possible. */
4954*22dc650dSSadaf Ebrahimi
4955*22dc650dSSadaf Ebrahimi exit_invalid_label = LABEL();
4956*22dc650dSSadaf Ebrahimi sljit_set_label(exit_invalid[5], exit_invalid_label);
4957*22dc650dSSadaf Ebrahimi sljit_set_label(exit_invalid[6], exit_invalid_label);
4958*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961*22dc650dSSadaf Ebrahimi
4962*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[4]);
4963*22dc650dSSadaf Ebrahimi /* -2 + 4 = 2 */
4964*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965*22dc650dSSadaf Ebrahimi
4966*22dc650dSSadaf Ebrahimi exit_invalid_label = LABEL();
4967*22dc650dSSadaf Ebrahimi for (i = 0; i < 4; i++)
4968*22dc650dSSadaf Ebrahimi sljit_set_label(exit_invalid[i], exit_invalid_label);
4969*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972*22dc650dSSadaf Ebrahimi }
4973*22dc650dSSadaf Ebrahimi
do_utfpeakcharback(compiler_common * common)4974*22dc650dSSadaf Ebrahimi static void do_utfpeakcharback(compiler_common *common)
4975*22dc650dSSadaf Ebrahimi {
4976*22dc650dSSadaf Ebrahimi /* Peak a character back. Does not modify STR_PTR. */
4977*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
4978*22dc650dSSadaf Ebrahimi struct sljit_jump *jump[2];
4979*22dc650dSSadaf Ebrahimi
4980*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981*22dc650dSSadaf Ebrahimi
4982*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985*22dc650dSSadaf Ebrahimi
4986*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989*22dc650dSSadaf Ebrahimi
4990*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995*22dc650dSSadaf Ebrahimi
4996*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
4997*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001*22dc650dSSadaf Ebrahimi
5002*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
5003*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007*22dc650dSSadaf Ebrahimi
5008*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009*22dc650dSSadaf Ebrahimi }
5010*22dc650dSSadaf Ebrahimi
do_utfpeakcharback_invalid(compiler_common * common)5011*22dc650dSSadaf Ebrahimi static void do_utfpeakcharback_invalid(compiler_common *common)
5012*22dc650dSSadaf Ebrahimi {
5013*22dc650dSSadaf Ebrahimi /* Peak a character back. Does not modify STR_PTR. */
5014*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5015*22dc650dSSadaf Ebrahimi sljit_s32 i;
5016*22dc650dSSadaf Ebrahimi sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017*22dc650dSSadaf Ebrahimi struct sljit_jump *jump[2];
5018*22dc650dSSadaf Ebrahimi struct sljit_label *two_byte_entry;
5019*22dc650dSSadaf Ebrahimi struct sljit_label *three_byte_entry;
5020*22dc650dSSadaf Ebrahimi struct sljit_label *exit_invalid_label;
5021*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[8];
5022*22dc650dSSadaf Ebrahimi
5023*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024*22dc650dSSadaf Ebrahimi
5025*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028*22dc650dSSadaf Ebrahimi
5029*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
5030*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033*22dc650dSSadaf Ebrahimi
5034*22dc650dSSadaf Ebrahimi two_byte_entry = LABEL();
5035*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036*22dc650dSSadaf Ebrahimi /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039*22dc650dSSadaf Ebrahimi
5040*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
5041*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046*22dc650dSSadaf Ebrahimi
5047*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
5048*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051*22dc650dSSadaf Ebrahimi
5052*22dc650dSSadaf Ebrahimi three_byte_entry = LABEL();
5053*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055*22dc650dSSadaf Ebrahimi
5056*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057*22dc650dSSadaf Ebrahimi if (has_cmov)
5058*22dc650dSSadaf Ebrahimi {
5059*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061*22dc650dSSadaf Ebrahimi exit_invalid[2] = NULL;
5062*22dc650dSSadaf Ebrahimi }
5063*22dc650dSSadaf Ebrahimi else
5064*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065*22dc650dSSadaf Ebrahimi
5066*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067*22dc650dSSadaf Ebrahimi if (has_cmov)
5068*22dc650dSSadaf Ebrahimi {
5069*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071*22dc650dSSadaf Ebrahimi exit_invalid[3] = NULL;
5072*22dc650dSSadaf Ebrahimi }
5073*22dc650dSSadaf Ebrahimi else
5074*22dc650dSSadaf Ebrahimi exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075*22dc650dSSadaf Ebrahimi
5076*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077*22dc650dSSadaf Ebrahimi
5078*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
5079*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080*22dc650dSSadaf Ebrahimi exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083*22dc650dSSadaf Ebrahimi
5084*22dc650dSSadaf Ebrahimi /* Four-byte sequence. */
5085*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089*22dc650dSSadaf Ebrahimi /* ADD is used instead of OR because of the SUB 0x10000 above. */
5090*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091*22dc650dSSadaf Ebrahimi
5092*22dc650dSSadaf Ebrahimi if (has_cmov)
5093*22dc650dSSadaf Ebrahimi {
5094*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096*22dc650dSSadaf Ebrahimi exit_invalid[5] = NULL;
5097*22dc650dSSadaf Ebrahimi }
5098*22dc650dSSadaf Ebrahimi else
5099*22dc650dSSadaf Ebrahimi exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100*22dc650dSSadaf Ebrahimi
5101*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103*22dc650dSSadaf Ebrahimi
5104*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
5105*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107*22dc650dSSadaf Ebrahimi
5108*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
5109*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112*22dc650dSSadaf Ebrahimi
5113*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115*22dc650dSSadaf Ebrahimi exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118*22dc650dSSadaf Ebrahimi
5119*22dc650dSSadaf Ebrahimi /* Three-byte sequence. */
5120*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123*22dc650dSSadaf Ebrahimi
5124*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126*22dc650dSSadaf Ebrahimi
5127*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
5128*22dc650dSSadaf Ebrahimi exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129*22dc650dSSadaf Ebrahimi
5130*22dc650dSSadaf Ebrahimi /* Two-byte sequence. */
5131*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134*22dc650dSSadaf Ebrahimi
5135*22dc650dSSadaf Ebrahimi exit_invalid_label = LABEL();
5136*22dc650dSSadaf Ebrahimi for (i = 0; i < 8; i++)
5137*22dc650dSSadaf Ebrahimi sljit_set_label(exit_invalid[i], exit_invalid_label);
5138*22dc650dSSadaf Ebrahimi
5139*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141*22dc650dSSadaf Ebrahimi }
5142*22dc650dSSadaf Ebrahimi
5143*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144*22dc650dSSadaf Ebrahimi
5145*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16
5146*22dc650dSSadaf Ebrahimi
do_utfreadchar_invalid(compiler_common * common)5147*22dc650dSSadaf Ebrahimi static void do_utfreadchar_invalid(compiler_common *common)
5148*22dc650dSSadaf Ebrahimi {
5149*22dc650dSSadaf Ebrahimi /* Slow decoding a UTF-16 character. TMP1 contains the first half
5150*22dc650dSSadaf Ebrahimi of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151*22dc650dSSadaf Ebrahimi undefined for invalid characters. */
5152*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5153*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[3];
5154*22dc650dSSadaf Ebrahimi
5155*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156*22dc650dSSadaf Ebrahimi
5157*22dc650dSSadaf Ebrahimi /* TMP2 contains the high surrogate. */
5158*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160*22dc650dSSadaf Ebrahimi
5161*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164*22dc650dSSadaf Ebrahimi
5165*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168*22dc650dSSadaf Ebrahimi
5169*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171*22dc650dSSadaf Ebrahimi
5172*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[0]);
5173*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[1]);
5174*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[2]);
5175*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177*22dc650dSSadaf Ebrahimi }
5178*22dc650dSSadaf Ebrahimi
do_utfreadnewline_invalid(compiler_common * common)5179*22dc650dSSadaf Ebrahimi static void do_utfreadnewline_invalid(compiler_common *common)
5180*22dc650dSSadaf Ebrahimi {
5181*22dc650dSSadaf Ebrahimi /* Slow decoding a UTF-16 character, specialized for newlines.
5182*22dc650dSSadaf Ebrahimi TMP1 contains the first half of the character (>= 0xd800). Return
5183*22dc650dSSadaf Ebrahimi char value in TMP1. */
5184*22dc650dSSadaf Ebrahimi
5185*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5186*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[2];
5187*22dc650dSSadaf Ebrahimi
5188*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189*22dc650dSSadaf Ebrahimi
5190*22dc650dSSadaf Ebrahimi /* TMP2 contains the high surrogate. */
5191*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192*22dc650dSSadaf Ebrahimi
5193*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195*22dc650dSSadaf Ebrahimi
5196*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202*22dc650dSSadaf Ebrahimi
5203*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204*22dc650dSSadaf Ebrahimi
5205*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[0]);
5206*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[1]);
5207*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209*22dc650dSSadaf Ebrahimi }
5210*22dc650dSSadaf Ebrahimi
do_utfmoveback_invalid(compiler_common * common)5211*22dc650dSSadaf Ebrahimi static void do_utfmoveback_invalid(compiler_common *common)
5212*22dc650dSSadaf Ebrahimi {
5213*22dc650dSSadaf Ebrahimi /* Goes one character back. */
5214*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5215*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[3];
5216*22dc650dSSadaf Ebrahimi
5217*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218*22dc650dSSadaf Ebrahimi
5219*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221*22dc650dSSadaf Ebrahimi
5222*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225*22dc650dSSadaf Ebrahimi
5226*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229*22dc650dSSadaf Ebrahimi
5230*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[0]);
5231*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[1]);
5232*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[2]);
5233*22dc650dSSadaf Ebrahimi
5234*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237*22dc650dSSadaf Ebrahimi }
5238*22dc650dSSadaf Ebrahimi
do_utfpeakcharback_invalid(compiler_common * common)5239*22dc650dSSadaf Ebrahimi static void do_utfpeakcharback_invalid(compiler_common *common)
5240*22dc650dSSadaf Ebrahimi {
5241*22dc650dSSadaf Ebrahimi /* Peak a character back. Does not modify STR_PTR. */
5242*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5243*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
5244*22dc650dSSadaf Ebrahimi struct sljit_jump *exit_invalid[3];
5245*22dc650dSSadaf Ebrahimi
5246*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247*22dc650dSSadaf Ebrahimi
5248*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250*22dc650dSSadaf Ebrahimi exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251*22dc650dSSadaf Ebrahimi exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252*22dc650dSSadaf Ebrahimi
5253*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256*22dc650dSSadaf Ebrahimi exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259*22dc650dSSadaf Ebrahimi
5260*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5261*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262*22dc650dSSadaf Ebrahimi
5263*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[0]);
5264*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[1]);
5265*22dc650dSSadaf Ebrahimi JUMPHERE(exit_invalid[2]);
5266*22dc650dSSadaf Ebrahimi
5267*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269*22dc650dSSadaf Ebrahimi }
5270*22dc650dSSadaf Ebrahimi
5271*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272*22dc650dSSadaf Ebrahimi
5273*22dc650dSSadaf Ebrahimi /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274*22dc650dSSadaf Ebrahimi #define UCD_BLOCK_MASK 127
5275*22dc650dSSadaf Ebrahimi #define UCD_BLOCK_SHIFT 7
5276*22dc650dSSadaf Ebrahimi
do_getucd(compiler_common * common)5277*22dc650dSSadaf Ebrahimi static void do_getucd(compiler_common *common)
5278*22dc650dSSadaf Ebrahimi {
5279*22dc650dSSadaf Ebrahimi /* Search the UCD record for the character comes in TMP1.
5280*22dc650dSSadaf Ebrahimi Returns chartype in TMP1 and UCD offset in TMP2. */
5281*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5282*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
5283*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
5284*22dc650dSSadaf Ebrahimi #endif
5285*22dc650dSSadaf Ebrahimi
5286*22dc650dSSadaf Ebrahimi #if defined SLJIT_DEBUG && SLJIT_DEBUG
5287*22dc650dSSadaf Ebrahimi /* dummy_ucd_record */
5288*22dc650dSSadaf Ebrahimi const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291*22dc650dSSadaf Ebrahimi #endif
5292*22dc650dSSadaf Ebrahimi
5293*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294*22dc650dSSadaf Ebrahimi
5295*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296*22dc650dSSadaf Ebrahimi
5297*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
5298*22dc650dSSadaf Ebrahimi if (!common->utf)
5299*22dc650dSSadaf Ebrahimi {
5300*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5303*22dc650dSSadaf Ebrahimi }
5304*22dc650dSSadaf Ebrahimi #endif
5305*22dc650dSSadaf Ebrahimi
5306*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315*22dc650dSSadaf Ebrahimi }
5316*22dc650dSSadaf Ebrahimi
do_getucdtype(compiler_common * common)5317*22dc650dSSadaf Ebrahimi static void do_getucdtype(compiler_common *common)
5318*22dc650dSSadaf Ebrahimi {
5319*22dc650dSSadaf Ebrahimi /* Search the UCD record for the character comes in TMP1.
5320*22dc650dSSadaf Ebrahimi Returns chartype in TMP1 and UCD offset in TMP2. */
5321*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5322*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
5323*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
5324*22dc650dSSadaf Ebrahimi #endif
5325*22dc650dSSadaf Ebrahimi
5326*22dc650dSSadaf Ebrahimi #if defined SLJIT_DEBUG && SLJIT_DEBUG
5327*22dc650dSSadaf Ebrahimi /* dummy_ucd_record */
5328*22dc650dSSadaf Ebrahimi const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331*22dc650dSSadaf Ebrahimi #endif
5332*22dc650dSSadaf Ebrahimi
5333*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334*22dc650dSSadaf Ebrahimi
5335*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336*22dc650dSSadaf Ebrahimi
5337*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
5338*22dc650dSSadaf Ebrahimi if (!common->utf)
5339*22dc650dSSadaf Ebrahimi {
5340*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5343*22dc650dSSadaf Ebrahimi }
5344*22dc650dSSadaf Ebrahimi #endif
5345*22dc650dSSadaf Ebrahimi
5346*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354*22dc650dSSadaf Ebrahimi
5355*22dc650dSSadaf Ebrahimi /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360*22dc650dSSadaf Ebrahimi
5361*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362*22dc650dSSadaf Ebrahimi }
5363*22dc650dSSadaf Ebrahimi
5364*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
5365*22dc650dSSadaf Ebrahimi
mainloop_entry(compiler_common * common)5366*22dc650dSSadaf Ebrahimi static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367*22dc650dSSadaf Ebrahimi {
5368*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
5369*22dc650dSSadaf Ebrahimi struct sljit_label *mainloop;
5370*22dc650dSSadaf Ebrahimi struct sljit_label *newlinelabel = NULL;
5371*22dc650dSSadaf Ebrahimi struct sljit_jump *start;
5372*22dc650dSSadaf Ebrahimi struct sljit_jump *end = NULL;
5373*22dc650dSSadaf Ebrahimi struct sljit_jump *end2 = NULL;
5374*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
5376*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
5377*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378*22dc650dSSadaf Ebrahimi jump_list *newline = NULL;
5379*22dc650dSSadaf Ebrahimi sljit_u32 overall_options = common->re->overall_options;
5380*22dc650dSSadaf Ebrahimi BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381*22dc650dSSadaf Ebrahimi BOOL newlinecheck = FALSE;
5382*22dc650dSSadaf Ebrahimi BOOL readuchar = FALSE;
5383*22dc650dSSadaf Ebrahimi
5384*22dc650dSSadaf Ebrahimi if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385*22dc650dSSadaf Ebrahimi && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386*22dc650dSSadaf Ebrahimi newlinecheck = TRUE;
5387*22dc650dSSadaf Ebrahimi
5388*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->abort_label == NULL);
5389*22dc650dSSadaf Ebrahimi
5390*22dc650dSSadaf Ebrahimi if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391*22dc650dSSadaf Ebrahimi {
5392*22dc650dSSadaf Ebrahimi /* Search for the end of the first line. */
5393*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->match_end_ptr != 0);
5394*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395*22dc650dSSadaf Ebrahimi
5396*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397*22dc650dSSadaf Ebrahimi {
5398*22dc650dSSadaf Ebrahimi mainloop = LABEL();
5399*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400*22dc650dSSadaf Ebrahimi end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405*22dc650dSSadaf Ebrahimi JUMPHERE(end);
5406*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407*22dc650dSSadaf Ebrahimi }
5408*22dc650dSSadaf Ebrahimi else
5409*22dc650dSSadaf Ebrahimi {
5410*22dc650dSSadaf Ebrahimi end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411*22dc650dSSadaf Ebrahimi mainloop = LABEL();
5412*22dc650dSSadaf Ebrahimi /* Continual stores does not cause data dependency. */
5413*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414*22dc650dSSadaf Ebrahimi read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->nltype, &newline, TRUE);
5416*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417*22dc650dSSadaf Ebrahimi JUMPHERE(end);
5418*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419*22dc650dSSadaf Ebrahimi set_jumps(newline, LABEL());
5420*22dc650dSSadaf Ebrahimi }
5421*22dc650dSSadaf Ebrahimi
5422*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423*22dc650dSSadaf Ebrahimi }
5424*22dc650dSSadaf Ebrahimi else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425*22dc650dSSadaf Ebrahimi {
5426*22dc650dSSadaf Ebrahimi /* Check whether offset limit is set and valid. */
5427*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->match_end_ptr != 0);
5428*22dc650dSSadaf Ebrahimi
5429*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
5430*22dc650dSSadaf Ebrahimi {
5431*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433*22dc650dSSadaf Ebrahimi }
5434*22dc650dSSadaf Ebrahimi else
5435*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436*22dc650dSSadaf Ebrahimi
5437*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438*22dc650dSSadaf Ebrahimi end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
5440*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441*22dc650dSSadaf Ebrahimi else
5442*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443*22dc650dSSadaf Ebrahimi
5444*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
5448*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449*22dc650dSSadaf Ebrahimi
5450*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451*22dc650dSSadaf Ebrahimi end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453*22dc650dSSadaf Ebrahimi JUMPHERE(end2);
5454*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456*22dc650dSSadaf Ebrahimi JUMPHERE(end);
5457*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458*22dc650dSSadaf Ebrahimi }
5459*22dc650dSSadaf Ebrahimi
5460*22dc650dSSadaf Ebrahimi start = JUMP(SLJIT_JUMP);
5461*22dc650dSSadaf Ebrahimi
5462*22dc650dSSadaf Ebrahimi if (newlinecheck)
5463*22dc650dSSadaf Ebrahimi {
5464*22dc650dSSadaf Ebrahimi newlinelabel = LABEL();
5465*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466*22dc650dSSadaf Ebrahimi end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474*22dc650dSSadaf Ebrahimi end2 = JUMP(SLJIT_JUMP);
5475*22dc650dSSadaf Ebrahimi }
5476*22dc650dSSadaf Ebrahimi
5477*22dc650dSSadaf Ebrahimi mainloop = LABEL();
5478*22dc650dSSadaf Ebrahimi
5479*22dc650dSSadaf Ebrahimi /* Increasing the STR_PTR here requires one less jump in the most common case. */
5480*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481*22dc650dSSadaf Ebrahimi if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483*22dc650dSSadaf Ebrahimi if (newlinecheck) readuchar = TRUE;
5484*22dc650dSSadaf Ebrahimi
5485*22dc650dSSadaf Ebrahimi if (readuchar)
5486*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487*22dc650dSSadaf Ebrahimi
5488*22dc650dSSadaf Ebrahimi if (newlinecheck)
5489*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490*22dc650dSSadaf Ebrahimi
5491*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
5494*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
5495*22dc650dSSadaf Ebrahimi {
5496*22dc650dSSadaf Ebrahimi /* Skip continuation code units. */
5497*22dc650dSSadaf Ebrahimi loop = LABEL();
5498*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5505*22dc650dSSadaf Ebrahimi }
5506*22dc650dSSadaf Ebrahimi else if (common->utf)
5507*22dc650dSSadaf Ebrahimi {
5508*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5512*22dc650dSSadaf Ebrahimi }
5513*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
5514*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
5515*22dc650dSSadaf Ebrahimi {
5516*22dc650dSSadaf Ebrahimi /* Skip continuation code units. */
5517*22dc650dSSadaf Ebrahimi loop = LABEL();
5518*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
5525*22dc650dSSadaf Ebrahimi }
5526*22dc650dSSadaf Ebrahimi else if (common->utf)
5527*22dc650dSSadaf Ebrahimi {
5528*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529*22dc650dSSadaf Ebrahimi
5530*22dc650dSSadaf Ebrahimi if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531*22dc650dSSadaf Ebrahimi {
5532*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535*22dc650dSSadaf Ebrahimi }
5536*22dc650dSSadaf Ebrahimi else
5537*22dc650dSSadaf Ebrahimi {
5538*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542*22dc650dSSadaf Ebrahimi }
5543*22dc650dSSadaf Ebrahimi }
5544*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546*22dc650dSSadaf Ebrahimi JUMPHERE(start);
5547*22dc650dSSadaf Ebrahimi
5548*22dc650dSSadaf Ebrahimi if (newlinecheck)
5549*22dc650dSSadaf Ebrahimi {
5550*22dc650dSSadaf Ebrahimi JUMPHERE(end);
5551*22dc650dSSadaf Ebrahimi JUMPHERE(end2);
5552*22dc650dSSadaf Ebrahimi }
5553*22dc650dSSadaf Ebrahimi
5554*22dc650dSSadaf Ebrahimi return mainloop;
5555*22dc650dSSadaf Ebrahimi }
5556*22dc650dSSadaf Ebrahimi
5557*22dc650dSSadaf Ebrahimi
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5558*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559*22dc650dSSadaf Ebrahimi {
5560*22dc650dSSadaf Ebrahimi sljit_u32 i, count = chars->count;
5561*22dc650dSSadaf Ebrahimi
5562*22dc650dSSadaf Ebrahimi if (count == 255)
5563*22dc650dSSadaf Ebrahimi return;
5564*22dc650dSSadaf Ebrahimi
5565*22dc650dSSadaf Ebrahimi if (count == 0)
5566*22dc650dSSadaf Ebrahimi {
5567*22dc650dSSadaf Ebrahimi chars->count = 1;
5568*22dc650dSSadaf Ebrahimi chars->chars[0] = chr;
5569*22dc650dSSadaf Ebrahimi
5570*22dc650dSSadaf Ebrahimi if (last)
5571*22dc650dSSadaf Ebrahimi chars->last_count = 1;
5572*22dc650dSSadaf Ebrahimi return;
5573*22dc650dSSadaf Ebrahimi }
5574*22dc650dSSadaf Ebrahimi
5575*22dc650dSSadaf Ebrahimi for (i = 0; i < count; i++)
5576*22dc650dSSadaf Ebrahimi if (chars->chars[i] == chr)
5577*22dc650dSSadaf Ebrahimi return;
5578*22dc650dSSadaf Ebrahimi
5579*22dc650dSSadaf Ebrahimi if (count >= MAX_DIFF_CHARS)
5580*22dc650dSSadaf Ebrahimi {
5581*22dc650dSSadaf Ebrahimi chars->count = 255;
5582*22dc650dSSadaf Ebrahimi return;
5583*22dc650dSSadaf Ebrahimi }
5584*22dc650dSSadaf Ebrahimi
5585*22dc650dSSadaf Ebrahimi chars->chars[count] = chr;
5586*22dc650dSSadaf Ebrahimi chars->count = count + 1;
5587*22dc650dSSadaf Ebrahimi
5588*22dc650dSSadaf Ebrahimi if (last)
5589*22dc650dSSadaf Ebrahimi chars->last_count++;
5590*22dc650dSSadaf Ebrahimi }
5591*22dc650dSSadaf Ebrahimi
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5592*22dc650dSSadaf Ebrahimi static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593*22dc650dSSadaf Ebrahimi {
5594*22dc650dSSadaf Ebrahimi /* Recursive function, which scans prefix literals. */
5595*22dc650dSSadaf Ebrahimi BOOL last, any, class, caseless;
5596*22dc650dSSadaf Ebrahimi int len, repeat, len_save, consumed = 0;
5597*22dc650dSSadaf Ebrahimi sljit_u32 chr; /* Any unicode character. */
5598*22dc650dSSadaf Ebrahimi sljit_u8 *bytes, *bytes_end, byte;
5599*22dc650dSSadaf Ebrahimi PCRE2_SPTR alternative, cc_save, oc;
5600*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601*22dc650dSSadaf Ebrahimi PCRE2_UCHAR othercase[4];
5602*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603*22dc650dSSadaf Ebrahimi PCRE2_UCHAR othercase[2];
5604*22dc650dSSadaf Ebrahimi #else
5605*22dc650dSSadaf Ebrahimi PCRE2_UCHAR othercase[1];
5606*22dc650dSSadaf Ebrahimi #endif
5607*22dc650dSSadaf Ebrahimi
5608*22dc650dSSadaf Ebrahimi repeat = 1;
5609*22dc650dSSadaf Ebrahimi while (TRUE)
5610*22dc650dSSadaf Ebrahimi {
5611*22dc650dSSadaf Ebrahimi if (*rec_count == 0)
5612*22dc650dSSadaf Ebrahimi return 0;
5613*22dc650dSSadaf Ebrahimi (*rec_count)--;
5614*22dc650dSSadaf Ebrahimi
5615*22dc650dSSadaf Ebrahimi last = TRUE;
5616*22dc650dSSadaf Ebrahimi any = FALSE;
5617*22dc650dSSadaf Ebrahimi class = FALSE;
5618*22dc650dSSadaf Ebrahimi caseless = FALSE;
5619*22dc650dSSadaf Ebrahimi
5620*22dc650dSSadaf Ebrahimi switch (*cc)
5621*22dc650dSSadaf Ebrahimi {
5622*22dc650dSSadaf Ebrahimi case OP_CHARI:
5623*22dc650dSSadaf Ebrahimi caseless = TRUE;
5624*22dc650dSSadaf Ebrahimi /* Fall through */
5625*22dc650dSSadaf Ebrahimi case OP_CHAR:
5626*22dc650dSSadaf Ebrahimi last = FALSE;
5627*22dc650dSSadaf Ebrahimi cc++;
5628*22dc650dSSadaf Ebrahimi break;
5629*22dc650dSSadaf Ebrahimi
5630*22dc650dSSadaf Ebrahimi case OP_SOD:
5631*22dc650dSSadaf Ebrahimi case OP_SOM:
5632*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
5633*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
5634*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
5635*22dc650dSSadaf Ebrahimi case OP_EODN:
5636*22dc650dSSadaf Ebrahimi case OP_EOD:
5637*22dc650dSSadaf Ebrahimi case OP_CIRC:
5638*22dc650dSSadaf Ebrahimi case OP_CIRCM:
5639*22dc650dSSadaf Ebrahimi case OP_DOLL:
5640*22dc650dSSadaf Ebrahimi case OP_DOLLM:
5641*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
5642*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
5643*22dc650dSSadaf Ebrahimi /* Zero width assertions. */
5644*22dc650dSSadaf Ebrahimi cc++;
5645*22dc650dSSadaf Ebrahimi continue;
5646*22dc650dSSadaf Ebrahimi
5647*22dc650dSSadaf Ebrahimi case OP_ASSERT:
5648*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
5649*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
5650*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
5651*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
5652*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
5653*22dc650dSSadaf Ebrahimi cc = bracketend(cc);
5654*22dc650dSSadaf Ebrahimi continue;
5655*22dc650dSSadaf Ebrahimi
5656*22dc650dSSadaf Ebrahimi case OP_PLUSI:
5657*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
5658*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
5659*22dc650dSSadaf Ebrahimi caseless = TRUE;
5660*22dc650dSSadaf Ebrahimi /* Fall through */
5661*22dc650dSSadaf Ebrahimi case OP_PLUS:
5662*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
5663*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
5664*22dc650dSSadaf Ebrahimi cc++;
5665*22dc650dSSadaf Ebrahimi break;
5666*22dc650dSSadaf Ebrahimi
5667*22dc650dSSadaf Ebrahimi case OP_EXACTI:
5668*22dc650dSSadaf Ebrahimi caseless = TRUE;
5669*22dc650dSSadaf Ebrahimi /* Fall through */
5670*22dc650dSSadaf Ebrahimi case OP_EXACT:
5671*22dc650dSSadaf Ebrahimi repeat = GET2(cc, 1);
5672*22dc650dSSadaf Ebrahimi last = FALSE;
5673*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
5674*22dc650dSSadaf Ebrahimi break;
5675*22dc650dSSadaf Ebrahimi
5676*22dc650dSSadaf Ebrahimi case OP_QUERYI:
5677*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
5678*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
5679*22dc650dSSadaf Ebrahimi caseless = TRUE;
5680*22dc650dSSadaf Ebrahimi /* Fall through */
5681*22dc650dSSadaf Ebrahimi case OP_QUERY:
5682*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
5683*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
5684*22dc650dSSadaf Ebrahimi len = 1;
5685*22dc650dSSadaf Ebrahimi cc++;
5686*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
5687*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688*22dc650dSSadaf Ebrahimi #endif
5689*22dc650dSSadaf Ebrahimi max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690*22dc650dSSadaf Ebrahimi if (max_chars == 0)
5691*22dc650dSSadaf Ebrahimi return consumed;
5692*22dc650dSSadaf Ebrahimi last = FALSE;
5693*22dc650dSSadaf Ebrahimi break;
5694*22dc650dSSadaf Ebrahimi
5695*22dc650dSSadaf Ebrahimi case OP_KET:
5696*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
5697*22dc650dSSadaf Ebrahimi continue;
5698*22dc650dSSadaf Ebrahimi
5699*22dc650dSSadaf Ebrahimi case OP_ALT:
5700*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
5701*22dc650dSSadaf Ebrahimi continue;
5702*22dc650dSSadaf Ebrahimi
5703*22dc650dSSadaf Ebrahimi case OP_ONCE:
5704*22dc650dSSadaf Ebrahimi case OP_BRA:
5705*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
5706*22dc650dSSadaf Ebrahimi case OP_CBRA:
5707*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
5708*22dc650dSSadaf Ebrahimi alternative = cc + GET(cc, 1);
5709*22dc650dSSadaf Ebrahimi while (*alternative == OP_ALT)
5710*22dc650dSSadaf Ebrahimi {
5711*22dc650dSSadaf Ebrahimi max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712*22dc650dSSadaf Ebrahimi if (max_chars == 0)
5713*22dc650dSSadaf Ebrahimi return consumed;
5714*22dc650dSSadaf Ebrahimi alternative += GET(alternative, 1);
5715*22dc650dSSadaf Ebrahimi }
5716*22dc650dSSadaf Ebrahimi
5717*22dc650dSSadaf Ebrahimi if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
5719*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
5720*22dc650dSSadaf Ebrahimi continue;
5721*22dc650dSSadaf Ebrahimi
5722*22dc650dSSadaf Ebrahimi case OP_CLASS:
5723*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724*22dc650dSSadaf Ebrahimi if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725*22dc650dSSadaf Ebrahimi return consumed;
5726*22dc650dSSadaf Ebrahimi #endif
5727*22dc650dSSadaf Ebrahimi class = TRUE;
5728*22dc650dSSadaf Ebrahimi break;
5729*22dc650dSSadaf Ebrahimi
5730*22dc650dSSadaf Ebrahimi case OP_NCLASS:
5731*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732*22dc650dSSadaf Ebrahimi if (common->utf) return consumed;
5733*22dc650dSSadaf Ebrahimi #endif
5734*22dc650dSSadaf Ebrahimi class = TRUE;
5735*22dc650dSSadaf Ebrahimi break;
5736*22dc650dSSadaf Ebrahimi
5737*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738*22dc650dSSadaf Ebrahimi case OP_XCLASS:
5739*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740*22dc650dSSadaf Ebrahimi if (common->utf) return consumed;
5741*22dc650dSSadaf Ebrahimi #endif
5742*22dc650dSSadaf Ebrahimi any = TRUE;
5743*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
5744*22dc650dSSadaf Ebrahimi break;
5745*22dc650dSSadaf Ebrahimi #endif
5746*22dc650dSSadaf Ebrahimi
5747*22dc650dSSadaf Ebrahimi case OP_DIGIT:
5748*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749*22dc650dSSadaf Ebrahimi if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750*22dc650dSSadaf Ebrahimi return consumed;
5751*22dc650dSSadaf Ebrahimi #endif
5752*22dc650dSSadaf Ebrahimi any = TRUE;
5753*22dc650dSSadaf Ebrahimi cc++;
5754*22dc650dSSadaf Ebrahimi break;
5755*22dc650dSSadaf Ebrahimi
5756*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
5757*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758*22dc650dSSadaf Ebrahimi if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759*22dc650dSSadaf Ebrahimi return consumed;
5760*22dc650dSSadaf Ebrahimi #endif
5761*22dc650dSSadaf Ebrahimi any = TRUE;
5762*22dc650dSSadaf Ebrahimi cc++;
5763*22dc650dSSadaf Ebrahimi break;
5764*22dc650dSSadaf Ebrahimi
5765*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
5766*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767*22dc650dSSadaf Ebrahimi if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768*22dc650dSSadaf Ebrahimi return consumed;
5769*22dc650dSSadaf Ebrahimi #endif
5770*22dc650dSSadaf Ebrahimi any = TRUE;
5771*22dc650dSSadaf Ebrahimi cc++;
5772*22dc650dSSadaf Ebrahimi break;
5773*22dc650dSSadaf Ebrahimi
5774*22dc650dSSadaf Ebrahimi case OP_NOT:
5775*22dc650dSSadaf Ebrahimi case OP_NOTI:
5776*22dc650dSSadaf Ebrahimi cc++;
5777*22dc650dSSadaf Ebrahimi /* Fall through. */
5778*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
5779*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
5780*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
5781*22dc650dSSadaf Ebrahimi case OP_ANY:
5782*22dc650dSSadaf Ebrahimi case OP_ALLANY:
5783*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784*22dc650dSSadaf Ebrahimi if (common->utf) return consumed;
5785*22dc650dSSadaf Ebrahimi #endif
5786*22dc650dSSadaf Ebrahimi any = TRUE;
5787*22dc650dSSadaf Ebrahimi cc++;
5788*22dc650dSSadaf Ebrahimi break;
5789*22dc650dSSadaf Ebrahimi
5790*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
5791*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
5792*22dc650dSSadaf Ebrahimi case OP_PROP:
5793*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 32
5794*22dc650dSSadaf Ebrahimi if (common->utf) return consumed;
5795*22dc650dSSadaf Ebrahimi #endif
5796*22dc650dSSadaf Ebrahimi any = TRUE;
5797*22dc650dSSadaf Ebrahimi cc += 1 + 2;
5798*22dc650dSSadaf Ebrahimi break;
5799*22dc650dSSadaf Ebrahimi #endif
5800*22dc650dSSadaf Ebrahimi
5801*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
5802*22dc650dSSadaf Ebrahimi repeat = GET2(cc, 1);
5803*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
5804*22dc650dSSadaf Ebrahimi continue;
5805*22dc650dSSadaf Ebrahimi
5806*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
5807*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
5808*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809*22dc650dSSadaf Ebrahimi if (common->utf) return consumed;
5810*22dc650dSSadaf Ebrahimi #endif
5811*22dc650dSSadaf Ebrahimi any = TRUE;
5812*22dc650dSSadaf Ebrahimi repeat = GET2(cc, 1);
5813*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE + 1;
5814*22dc650dSSadaf Ebrahimi break;
5815*22dc650dSSadaf Ebrahimi
5816*22dc650dSSadaf Ebrahimi default:
5817*22dc650dSSadaf Ebrahimi return consumed;
5818*22dc650dSSadaf Ebrahimi }
5819*22dc650dSSadaf Ebrahimi
5820*22dc650dSSadaf Ebrahimi if (any)
5821*22dc650dSSadaf Ebrahimi {
5822*22dc650dSSadaf Ebrahimi do
5823*22dc650dSSadaf Ebrahimi {
5824*22dc650dSSadaf Ebrahimi chars->count = 255;
5825*22dc650dSSadaf Ebrahimi
5826*22dc650dSSadaf Ebrahimi consumed++;
5827*22dc650dSSadaf Ebrahimi if (--max_chars == 0)
5828*22dc650dSSadaf Ebrahimi return consumed;
5829*22dc650dSSadaf Ebrahimi chars++;
5830*22dc650dSSadaf Ebrahimi }
5831*22dc650dSSadaf Ebrahimi while (--repeat > 0);
5832*22dc650dSSadaf Ebrahimi
5833*22dc650dSSadaf Ebrahimi repeat = 1;
5834*22dc650dSSadaf Ebrahimi continue;
5835*22dc650dSSadaf Ebrahimi }
5836*22dc650dSSadaf Ebrahimi
5837*22dc650dSSadaf Ebrahimi if (class)
5838*22dc650dSSadaf Ebrahimi {
5839*22dc650dSSadaf Ebrahimi bytes = (sljit_u8*) (cc + 1);
5840*22dc650dSSadaf Ebrahimi cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841*22dc650dSSadaf Ebrahimi
5842*22dc650dSSadaf Ebrahimi switch (*cc)
5843*22dc650dSSadaf Ebrahimi {
5844*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
5845*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
5846*22dc650dSSadaf Ebrahimi case OP_CRPOSSTAR:
5847*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
5848*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
5849*22dc650dSSadaf Ebrahimi case OP_CRPOSQUERY:
5850*22dc650dSSadaf Ebrahimi max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851*22dc650dSSadaf Ebrahimi if (max_chars == 0)
5852*22dc650dSSadaf Ebrahimi return consumed;
5853*22dc650dSSadaf Ebrahimi break;
5854*22dc650dSSadaf Ebrahimi
5855*22dc650dSSadaf Ebrahimi default:
5856*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
5857*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
5858*22dc650dSSadaf Ebrahimi case OP_CRPOSPLUS:
5859*22dc650dSSadaf Ebrahimi break;
5860*22dc650dSSadaf Ebrahimi
5861*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
5862*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
5863*22dc650dSSadaf Ebrahimi case OP_CRPOSRANGE:
5864*22dc650dSSadaf Ebrahimi repeat = GET2(cc, 1);
5865*22dc650dSSadaf Ebrahimi if (repeat <= 0)
5866*22dc650dSSadaf Ebrahimi return consumed;
5867*22dc650dSSadaf Ebrahimi break;
5868*22dc650dSSadaf Ebrahimi }
5869*22dc650dSSadaf Ebrahimi
5870*22dc650dSSadaf Ebrahimi do
5871*22dc650dSSadaf Ebrahimi {
5872*22dc650dSSadaf Ebrahimi if (bytes[31] & 0x80)
5873*22dc650dSSadaf Ebrahimi chars->count = 255;
5874*22dc650dSSadaf Ebrahimi else if (chars->count != 255)
5875*22dc650dSSadaf Ebrahimi {
5876*22dc650dSSadaf Ebrahimi bytes_end = bytes + 32;
5877*22dc650dSSadaf Ebrahimi chr = 0;
5878*22dc650dSSadaf Ebrahimi do
5879*22dc650dSSadaf Ebrahimi {
5880*22dc650dSSadaf Ebrahimi byte = *bytes++;
5881*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((chr & 0x7) == 0);
5882*22dc650dSSadaf Ebrahimi if (byte == 0)
5883*22dc650dSSadaf Ebrahimi chr += 8;
5884*22dc650dSSadaf Ebrahimi else
5885*22dc650dSSadaf Ebrahimi {
5886*22dc650dSSadaf Ebrahimi do
5887*22dc650dSSadaf Ebrahimi {
5888*22dc650dSSadaf Ebrahimi if ((byte & 0x1) != 0)
5889*22dc650dSSadaf Ebrahimi add_prefix_char(chr, chars, TRUE);
5890*22dc650dSSadaf Ebrahimi byte >>= 1;
5891*22dc650dSSadaf Ebrahimi chr++;
5892*22dc650dSSadaf Ebrahimi }
5893*22dc650dSSadaf Ebrahimi while (byte != 0);
5894*22dc650dSSadaf Ebrahimi chr = (chr + 7) & (sljit_u32)(~7);
5895*22dc650dSSadaf Ebrahimi }
5896*22dc650dSSadaf Ebrahimi }
5897*22dc650dSSadaf Ebrahimi while (chars->count != 255 && bytes < bytes_end);
5898*22dc650dSSadaf Ebrahimi bytes = bytes_end - 32;
5899*22dc650dSSadaf Ebrahimi }
5900*22dc650dSSadaf Ebrahimi
5901*22dc650dSSadaf Ebrahimi consumed++;
5902*22dc650dSSadaf Ebrahimi if (--max_chars == 0)
5903*22dc650dSSadaf Ebrahimi return consumed;
5904*22dc650dSSadaf Ebrahimi chars++;
5905*22dc650dSSadaf Ebrahimi }
5906*22dc650dSSadaf Ebrahimi while (--repeat > 0);
5907*22dc650dSSadaf Ebrahimi
5908*22dc650dSSadaf Ebrahimi switch (*cc)
5909*22dc650dSSadaf Ebrahimi {
5910*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
5911*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
5912*22dc650dSSadaf Ebrahimi case OP_CRPOSSTAR:
5913*22dc650dSSadaf Ebrahimi return consumed;
5914*22dc650dSSadaf Ebrahimi
5915*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
5916*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
5917*22dc650dSSadaf Ebrahimi case OP_CRPOSQUERY:
5918*22dc650dSSadaf Ebrahimi cc++;
5919*22dc650dSSadaf Ebrahimi break;
5920*22dc650dSSadaf Ebrahimi
5921*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
5922*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
5923*22dc650dSSadaf Ebrahimi case OP_CRPOSRANGE:
5924*22dc650dSSadaf Ebrahimi if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925*22dc650dSSadaf Ebrahimi return consumed;
5926*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
5927*22dc650dSSadaf Ebrahimi break;
5928*22dc650dSSadaf Ebrahimi }
5929*22dc650dSSadaf Ebrahimi
5930*22dc650dSSadaf Ebrahimi repeat = 1;
5931*22dc650dSSadaf Ebrahimi continue;
5932*22dc650dSSadaf Ebrahimi }
5933*22dc650dSSadaf Ebrahimi
5934*22dc650dSSadaf Ebrahimi len = 1;
5935*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
5936*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937*22dc650dSSadaf Ebrahimi #endif
5938*22dc650dSSadaf Ebrahimi
5939*22dc650dSSadaf Ebrahimi if (caseless && char_has_othercase(common, cc))
5940*22dc650dSSadaf Ebrahimi {
5941*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
5942*22dc650dSSadaf Ebrahimi if (common->utf)
5943*22dc650dSSadaf Ebrahimi {
5944*22dc650dSSadaf Ebrahimi GETCHAR(chr, cc);
5945*22dc650dSSadaf Ebrahimi if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946*22dc650dSSadaf Ebrahimi return consumed;
5947*22dc650dSSadaf Ebrahimi }
5948*22dc650dSSadaf Ebrahimi else
5949*22dc650dSSadaf Ebrahimi #endif
5950*22dc650dSSadaf Ebrahimi {
5951*22dc650dSSadaf Ebrahimi chr = *cc;
5952*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
5953*22dc650dSSadaf Ebrahimi if (common->ucp && chr > 127)
5954*22dc650dSSadaf Ebrahimi {
5955*22dc650dSSadaf Ebrahimi chr = UCD_OTHERCASE(chr);
5956*22dc650dSSadaf Ebrahimi othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
5957*22dc650dSSadaf Ebrahimi }
5958*22dc650dSSadaf Ebrahimi else
5959*22dc650dSSadaf Ebrahimi #endif
5960*22dc650dSSadaf Ebrahimi othercase[0] = TABLE_GET(chr, common->fcc, chr);
5961*22dc650dSSadaf Ebrahimi }
5962*22dc650dSSadaf Ebrahimi }
5963*22dc650dSSadaf Ebrahimi else
5964*22dc650dSSadaf Ebrahimi {
5965*22dc650dSSadaf Ebrahimi caseless = FALSE;
5966*22dc650dSSadaf Ebrahimi othercase[0] = 0; /* Stops compiler warning - PH */
5967*22dc650dSSadaf Ebrahimi }
5968*22dc650dSSadaf Ebrahimi
5969*22dc650dSSadaf Ebrahimi len_save = len;
5970*22dc650dSSadaf Ebrahimi cc_save = cc;
5971*22dc650dSSadaf Ebrahimi while (TRUE)
5972*22dc650dSSadaf Ebrahimi {
5973*22dc650dSSadaf Ebrahimi oc = othercase;
5974*22dc650dSSadaf Ebrahimi do
5975*22dc650dSSadaf Ebrahimi {
5976*22dc650dSSadaf Ebrahimi len--;
5977*22dc650dSSadaf Ebrahimi consumed++;
5978*22dc650dSSadaf Ebrahimi
5979*22dc650dSSadaf Ebrahimi chr = *cc;
5980*22dc650dSSadaf Ebrahimi add_prefix_char(*cc, chars, len == 0);
5981*22dc650dSSadaf Ebrahimi
5982*22dc650dSSadaf Ebrahimi if (caseless)
5983*22dc650dSSadaf Ebrahimi add_prefix_char(*oc, chars, len == 0);
5984*22dc650dSSadaf Ebrahimi
5985*22dc650dSSadaf Ebrahimi if (--max_chars == 0)
5986*22dc650dSSadaf Ebrahimi return consumed;
5987*22dc650dSSadaf Ebrahimi chars++;
5988*22dc650dSSadaf Ebrahimi cc++;
5989*22dc650dSSadaf Ebrahimi oc++;
5990*22dc650dSSadaf Ebrahimi }
5991*22dc650dSSadaf Ebrahimi while (len > 0);
5992*22dc650dSSadaf Ebrahimi
5993*22dc650dSSadaf Ebrahimi if (--repeat == 0)
5994*22dc650dSSadaf Ebrahimi break;
5995*22dc650dSSadaf Ebrahimi
5996*22dc650dSSadaf Ebrahimi len = len_save;
5997*22dc650dSSadaf Ebrahimi cc = cc_save;
5998*22dc650dSSadaf Ebrahimi }
5999*22dc650dSSadaf Ebrahimi
6000*22dc650dSSadaf Ebrahimi repeat = 1;
6001*22dc650dSSadaf Ebrahimi if (last)
6002*22dc650dSSadaf Ebrahimi return consumed;
6003*22dc650dSSadaf Ebrahimi }
6004*22dc650dSSadaf Ebrahimi }
6005*22dc650dSSadaf Ebrahimi
6006*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)6007*22dc650dSSadaf Ebrahimi static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6008*22dc650dSSadaf Ebrahimi {
6009*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
6010*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6011*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6012*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
6013*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6014*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6015*22dc650dSSadaf Ebrahimi #else
6016*22dc650dSSadaf Ebrahimi #error "Unknown code width"
6017*22dc650dSSadaf Ebrahimi #endif
6018*22dc650dSSadaf Ebrahimi }
6019*22dc650dSSadaf Ebrahimi #endif
6020*22dc650dSSadaf Ebrahimi
6021*22dc650dSSadaf Ebrahimi #include "pcre2_jit_simd_inc.h"
6022*22dc650dSSadaf Ebrahimi
6023*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6024*22dc650dSSadaf Ebrahimi
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)6025*22dc650dSSadaf Ebrahimi static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6026*22dc650dSSadaf Ebrahimi {
6027*22dc650dSSadaf Ebrahimi sljit_s32 i, j, max_i = 0, max_j = 0;
6028*22dc650dSSadaf Ebrahimi sljit_u32 max_pri = 0;
6029*22dc650dSSadaf Ebrahimi sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6030*22dc650dSSadaf Ebrahimi PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6031*22dc650dSSadaf Ebrahimi
6032*22dc650dSSadaf Ebrahimi for (i = max - 1; i >= 1; i--)
6033*22dc650dSSadaf Ebrahimi {
6034*22dc650dSSadaf Ebrahimi if (chars[i].last_count > 2)
6035*22dc650dSSadaf Ebrahimi {
6036*22dc650dSSadaf Ebrahimi a1 = chars[i].chars[0];
6037*22dc650dSSadaf Ebrahimi a2 = chars[i].chars[1];
6038*22dc650dSSadaf Ebrahimi a_pri = chars[i].last_count;
6039*22dc650dSSadaf Ebrahimi
6040*22dc650dSSadaf Ebrahimi j = i - max_offset;
6041*22dc650dSSadaf Ebrahimi if (j < 0)
6042*22dc650dSSadaf Ebrahimi j = 0;
6043*22dc650dSSadaf Ebrahimi
6044*22dc650dSSadaf Ebrahimi while (j < i)
6045*22dc650dSSadaf Ebrahimi {
6046*22dc650dSSadaf Ebrahimi b_pri = chars[j].last_count;
6047*22dc650dSSadaf Ebrahimi if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6048*22dc650dSSadaf Ebrahimi {
6049*22dc650dSSadaf Ebrahimi b1 = chars[j].chars[0];
6050*22dc650dSSadaf Ebrahimi b2 = chars[j].chars[1];
6051*22dc650dSSadaf Ebrahimi
6052*22dc650dSSadaf Ebrahimi if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6053*22dc650dSSadaf Ebrahimi {
6054*22dc650dSSadaf Ebrahimi max_pri = a_pri + b_pri;
6055*22dc650dSSadaf Ebrahimi max_i = i;
6056*22dc650dSSadaf Ebrahimi max_j = j;
6057*22dc650dSSadaf Ebrahimi }
6058*22dc650dSSadaf Ebrahimi }
6059*22dc650dSSadaf Ebrahimi j++;
6060*22dc650dSSadaf Ebrahimi }
6061*22dc650dSSadaf Ebrahimi }
6062*22dc650dSSadaf Ebrahimi }
6063*22dc650dSSadaf Ebrahimi
6064*22dc650dSSadaf Ebrahimi if (max_pri == 0)
6065*22dc650dSSadaf Ebrahimi return FALSE;
6066*22dc650dSSadaf Ebrahimi
6067*22dc650dSSadaf Ebrahimi fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6068*22dc650dSSadaf Ebrahimi return TRUE;
6069*22dc650dSSadaf Ebrahimi }
6070*22dc650dSSadaf Ebrahimi
6071*22dc650dSSadaf Ebrahimi #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6072*22dc650dSSadaf Ebrahimi
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)6073*22dc650dSSadaf Ebrahimi static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6074*22dc650dSSadaf Ebrahimi {
6075*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6076*22dc650dSSadaf Ebrahimi struct sljit_label *start;
6077*22dc650dSSadaf Ebrahimi struct sljit_jump *match;
6078*22dc650dSSadaf Ebrahimi struct sljit_jump *partial_quit;
6079*22dc650dSSadaf Ebrahimi PCRE2_UCHAR mask;
6080*22dc650dSSadaf Ebrahimi BOOL has_match_end = (common->match_end_ptr != 0);
6081*22dc650dSSadaf Ebrahimi
6082*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6083*22dc650dSSadaf Ebrahimi
6084*22dc650dSSadaf Ebrahimi if (has_match_end)
6085*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6086*22dc650dSSadaf Ebrahimi
6087*22dc650dSSadaf Ebrahimi if (offset > 0)
6088*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6089*22dc650dSSadaf Ebrahimi
6090*22dc650dSSadaf Ebrahimi if (has_match_end)
6091*22dc650dSSadaf Ebrahimi {
6092*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6093*22dc650dSSadaf Ebrahimi
6094*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6095*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6096*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6097*22dc650dSSadaf Ebrahimi }
6098*22dc650dSSadaf Ebrahimi
6099*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6100*22dc650dSSadaf Ebrahimi
6101*22dc650dSSadaf Ebrahimi if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6102*22dc650dSSadaf Ebrahimi {
6103*22dc650dSSadaf Ebrahimi fast_forward_char_simd(common, char1, char2, offset);
6104*22dc650dSSadaf Ebrahimi
6105*22dc650dSSadaf Ebrahimi if (offset > 0)
6106*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6107*22dc650dSSadaf Ebrahimi
6108*22dc650dSSadaf Ebrahimi if (has_match_end)
6109*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6110*22dc650dSSadaf Ebrahimi return;
6111*22dc650dSSadaf Ebrahimi }
6112*22dc650dSSadaf Ebrahimi
6113*22dc650dSSadaf Ebrahimi #endif
6114*22dc650dSSadaf Ebrahimi
6115*22dc650dSSadaf Ebrahimi start = LABEL();
6116*22dc650dSSadaf Ebrahimi
6117*22dc650dSSadaf Ebrahimi partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6118*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
6119*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->failed_match, partial_quit);
6120*22dc650dSSadaf Ebrahimi
6121*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6122*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6123*22dc650dSSadaf Ebrahimi
6124*22dc650dSSadaf Ebrahimi if (char1 == char2)
6125*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6126*22dc650dSSadaf Ebrahimi else
6127*22dc650dSSadaf Ebrahimi {
6128*22dc650dSSadaf Ebrahimi mask = char1 ^ char2;
6129*22dc650dSSadaf Ebrahimi if (is_powerof2(mask))
6130*22dc650dSSadaf Ebrahimi {
6131*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6132*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6133*22dc650dSSadaf Ebrahimi }
6134*22dc650dSSadaf Ebrahimi else
6135*22dc650dSSadaf Ebrahimi {
6136*22dc650dSSadaf Ebrahimi match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6137*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6138*22dc650dSSadaf Ebrahimi JUMPHERE(match);
6139*22dc650dSSadaf Ebrahimi }
6140*22dc650dSSadaf Ebrahimi }
6141*22dc650dSSadaf Ebrahimi
6142*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143*22dc650dSSadaf Ebrahimi if (common->utf && offset > 0)
6144*22dc650dSSadaf Ebrahimi {
6145*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6146*22dc650dSSadaf Ebrahimi jumpto_if_not_utf_char_start(compiler, TMP1, start);
6147*22dc650dSSadaf Ebrahimi }
6148*22dc650dSSadaf Ebrahimi #endif
6149*22dc650dSSadaf Ebrahimi
6150*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6151*22dc650dSSadaf Ebrahimi
6152*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
6153*22dc650dSSadaf Ebrahimi JUMPHERE(partial_quit);
6154*22dc650dSSadaf Ebrahimi
6155*22dc650dSSadaf Ebrahimi if (has_match_end)
6156*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6157*22dc650dSSadaf Ebrahimi }
6158*22dc650dSSadaf Ebrahimi
fast_forward_first_n_chars(compiler_common * common)6159*22dc650dSSadaf Ebrahimi static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6160*22dc650dSSadaf Ebrahimi {
6161*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6162*22dc650dSSadaf Ebrahimi struct sljit_label *start;
6163*22dc650dSSadaf Ebrahimi struct sljit_jump *match;
6164*22dc650dSSadaf Ebrahimi fast_forward_char_data chars[MAX_N_CHARS];
6165*22dc650dSSadaf Ebrahimi sljit_s32 offset;
6166*22dc650dSSadaf Ebrahimi PCRE2_UCHAR mask;
6167*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *char_set, *char_set_end;
6168*22dc650dSSadaf Ebrahimi int i, max, from;
6169*22dc650dSSadaf Ebrahimi int range_right = -1, range_len;
6170*22dc650dSSadaf Ebrahimi sljit_u8 *update_table = NULL;
6171*22dc650dSSadaf Ebrahimi BOOL in_range;
6172*22dc650dSSadaf Ebrahimi sljit_u32 rec_count;
6173*22dc650dSSadaf Ebrahimi
6174*22dc650dSSadaf Ebrahimi for (i = 0; i < MAX_N_CHARS; i++)
6175*22dc650dSSadaf Ebrahimi {
6176*22dc650dSSadaf Ebrahimi chars[i].count = 0;
6177*22dc650dSSadaf Ebrahimi chars[i].last_count = 0;
6178*22dc650dSSadaf Ebrahimi }
6179*22dc650dSSadaf Ebrahimi
6180*22dc650dSSadaf Ebrahimi rec_count = 10000;
6181*22dc650dSSadaf Ebrahimi max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6182*22dc650dSSadaf Ebrahimi
6183*22dc650dSSadaf Ebrahimi if (max < 1)
6184*22dc650dSSadaf Ebrahimi return FALSE;
6185*22dc650dSSadaf Ebrahimi
6186*22dc650dSSadaf Ebrahimi /* Convert last_count to priority. */
6187*22dc650dSSadaf Ebrahimi for (i = 0; i < max; i++)
6188*22dc650dSSadaf Ebrahimi {
6189*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6190*22dc650dSSadaf Ebrahimi
6191*22dc650dSSadaf Ebrahimi switch (chars[i].count)
6192*22dc650dSSadaf Ebrahimi {
6193*22dc650dSSadaf Ebrahimi case 0:
6194*22dc650dSSadaf Ebrahimi chars[i].count = 255;
6195*22dc650dSSadaf Ebrahimi chars[i].last_count = 0;
6196*22dc650dSSadaf Ebrahimi break;
6197*22dc650dSSadaf Ebrahimi
6198*22dc650dSSadaf Ebrahimi case 1:
6199*22dc650dSSadaf Ebrahimi chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6200*22dc650dSSadaf Ebrahimi /* Simplifies algorithms later. */
6201*22dc650dSSadaf Ebrahimi chars[i].chars[1] = chars[i].chars[0];
6202*22dc650dSSadaf Ebrahimi break;
6203*22dc650dSSadaf Ebrahimi
6204*22dc650dSSadaf Ebrahimi case 2:
6205*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6206*22dc650dSSadaf Ebrahimi
6207*22dc650dSSadaf Ebrahimi if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6208*22dc650dSSadaf Ebrahimi chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6209*22dc650dSSadaf Ebrahimi else
6210*22dc650dSSadaf Ebrahimi chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6211*22dc650dSSadaf Ebrahimi break;
6212*22dc650dSSadaf Ebrahimi
6213*22dc650dSSadaf Ebrahimi default:
6214*22dc650dSSadaf Ebrahimi chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6215*22dc650dSSadaf Ebrahimi break;
6216*22dc650dSSadaf Ebrahimi }
6217*22dc650dSSadaf Ebrahimi }
6218*22dc650dSSadaf Ebrahimi
6219*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6220*22dc650dSSadaf Ebrahimi if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6221*22dc650dSSadaf Ebrahimi return TRUE;
6222*22dc650dSSadaf Ebrahimi #endif
6223*22dc650dSSadaf Ebrahimi
6224*22dc650dSSadaf Ebrahimi in_range = FALSE;
6225*22dc650dSSadaf Ebrahimi /* Prevent compiler "uninitialized" warning */
6226*22dc650dSSadaf Ebrahimi from = 0;
6227*22dc650dSSadaf Ebrahimi range_len = 4 /* minimum length */ - 1;
6228*22dc650dSSadaf Ebrahimi for (i = 0; i <= max; i++)
6229*22dc650dSSadaf Ebrahimi {
6230*22dc650dSSadaf Ebrahimi if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6231*22dc650dSSadaf Ebrahimi {
6232*22dc650dSSadaf Ebrahimi range_len = i - from;
6233*22dc650dSSadaf Ebrahimi range_right = i - 1;
6234*22dc650dSSadaf Ebrahimi }
6235*22dc650dSSadaf Ebrahimi
6236*22dc650dSSadaf Ebrahimi if (i < max && chars[i].count < 255)
6237*22dc650dSSadaf Ebrahimi {
6238*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(chars[i].count > 0);
6239*22dc650dSSadaf Ebrahimi if (!in_range)
6240*22dc650dSSadaf Ebrahimi {
6241*22dc650dSSadaf Ebrahimi in_range = TRUE;
6242*22dc650dSSadaf Ebrahimi from = i;
6243*22dc650dSSadaf Ebrahimi }
6244*22dc650dSSadaf Ebrahimi }
6245*22dc650dSSadaf Ebrahimi else
6246*22dc650dSSadaf Ebrahimi in_range = FALSE;
6247*22dc650dSSadaf Ebrahimi }
6248*22dc650dSSadaf Ebrahimi
6249*22dc650dSSadaf Ebrahimi if (range_right >= 0)
6250*22dc650dSSadaf Ebrahimi {
6251*22dc650dSSadaf Ebrahimi update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6252*22dc650dSSadaf Ebrahimi if (update_table == NULL)
6253*22dc650dSSadaf Ebrahimi return TRUE;
6254*22dc650dSSadaf Ebrahimi memset(update_table, IN_UCHARS(range_len), 256);
6255*22dc650dSSadaf Ebrahimi
6256*22dc650dSSadaf Ebrahimi for (i = 0; i < range_len; i++)
6257*22dc650dSSadaf Ebrahimi {
6258*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6259*22dc650dSSadaf Ebrahimi
6260*22dc650dSSadaf Ebrahimi char_set = chars[range_right - i].chars;
6261*22dc650dSSadaf Ebrahimi char_set_end = char_set + chars[range_right - i].count;
6262*22dc650dSSadaf Ebrahimi do
6263*22dc650dSSadaf Ebrahimi {
6264*22dc650dSSadaf Ebrahimi if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6265*22dc650dSSadaf Ebrahimi update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6266*22dc650dSSadaf Ebrahimi char_set++;
6267*22dc650dSSadaf Ebrahimi }
6268*22dc650dSSadaf Ebrahimi while (char_set < char_set_end);
6269*22dc650dSSadaf Ebrahimi }
6270*22dc650dSSadaf Ebrahimi }
6271*22dc650dSSadaf Ebrahimi
6272*22dc650dSSadaf Ebrahimi offset = -1;
6273*22dc650dSSadaf Ebrahimi /* Scan forward. */
6274*22dc650dSSadaf Ebrahimi for (i = 0; i < max; i++)
6275*22dc650dSSadaf Ebrahimi {
6276*22dc650dSSadaf Ebrahimi if (range_right == i)
6277*22dc650dSSadaf Ebrahimi continue;
6278*22dc650dSSadaf Ebrahimi
6279*22dc650dSSadaf Ebrahimi if (offset == -1)
6280*22dc650dSSadaf Ebrahimi {
6281*22dc650dSSadaf Ebrahimi if (chars[i].last_count >= 2)
6282*22dc650dSSadaf Ebrahimi offset = i;
6283*22dc650dSSadaf Ebrahimi }
6284*22dc650dSSadaf Ebrahimi else if (chars[offset].last_count < chars[i].last_count)
6285*22dc650dSSadaf Ebrahimi offset = i;
6286*22dc650dSSadaf Ebrahimi }
6287*22dc650dSSadaf Ebrahimi
6288*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6289*22dc650dSSadaf Ebrahimi
6290*22dc650dSSadaf Ebrahimi if (range_right < 0)
6291*22dc650dSSadaf Ebrahimi {
6292*22dc650dSSadaf Ebrahimi if (offset < 0)
6293*22dc650dSSadaf Ebrahimi return FALSE;
6294*22dc650dSSadaf Ebrahimi /* Works regardless the value is 1 or 2. */
6295*22dc650dSSadaf Ebrahimi fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6296*22dc650dSSadaf Ebrahimi return TRUE;
6297*22dc650dSSadaf Ebrahimi }
6298*22dc650dSSadaf Ebrahimi
6299*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(range_right != offset);
6300*22dc650dSSadaf Ebrahimi
6301*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6302*22dc650dSSadaf Ebrahimi {
6303*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6304*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6305*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6306*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6307*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6308*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6309*22dc650dSSadaf Ebrahimi }
6310*22dc650dSSadaf Ebrahimi else
6311*22dc650dSSadaf Ebrahimi {
6312*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6313*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6314*22dc650dSSadaf Ebrahimi }
6315*22dc650dSSadaf Ebrahimi
6316*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(range_right >= 0);
6317*22dc650dSSadaf Ebrahimi
6318*22dc650dSSadaf Ebrahimi if (!HAS_VIRTUAL_REGISTERS)
6319*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6320*22dc650dSSadaf Ebrahimi
6321*22dc650dSSadaf Ebrahimi start = LABEL();
6322*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6323*22dc650dSSadaf Ebrahimi
6324*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6325*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6326*22dc650dSSadaf Ebrahimi #else
6327*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6328*22dc650dSSadaf Ebrahimi #endif
6329*22dc650dSSadaf Ebrahimi
6330*22dc650dSSadaf Ebrahimi if (!HAS_VIRTUAL_REGISTERS)
6331*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6332*22dc650dSSadaf Ebrahimi else
6333*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6334*22dc650dSSadaf Ebrahimi
6335*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6336*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6337*22dc650dSSadaf Ebrahimi
6338*22dc650dSSadaf Ebrahimi if (offset >= 0)
6339*22dc650dSSadaf Ebrahimi {
6340*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6341*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6342*22dc650dSSadaf Ebrahimi
6343*22dc650dSSadaf Ebrahimi if (chars[offset].count == 1)
6344*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6345*22dc650dSSadaf Ebrahimi else
6346*22dc650dSSadaf Ebrahimi {
6347*22dc650dSSadaf Ebrahimi mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6348*22dc650dSSadaf Ebrahimi if (is_powerof2(mask))
6349*22dc650dSSadaf Ebrahimi {
6350*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6351*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6352*22dc650dSSadaf Ebrahimi }
6353*22dc650dSSadaf Ebrahimi else
6354*22dc650dSSadaf Ebrahimi {
6355*22dc650dSSadaf Ebrahimi match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6356*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6357*22dc650dSSadaf Ebrahimi JUMPHERE(match);
6358*22dc650dSSadaf Ebrahimi }
6359*22dc650dSSadaf Ebrahimi }
6360*22dc650dSSadaf Ebrahimi }
6361*22dc650dSSadaf Ebrahimi
6362*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6363*22dc650dSSadaf Ebrahimi if (common->utf && offset != 0)
6364*22dc650dSSadaf Ebrahimi {
6365*22dc650dSSadaf Ebrahimi if (offset < 0)
6366*22dc650dSSadaf Ebrahimi {
6367*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369*22dc650dSSadaf Ebrahimi }
6370*22dc650dSSadaf Ebrahimi else
6371*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6372*22dc650dSSadaf Ebrahimi
6373*22dc650dSSadaf Ebrahimi jumpto_if_not_utf_char_start(compiler, TMP1, start);
6374*22dc650dSSadaf Ebrahimi
6375*22dc650dSSadaf Ebrahimi if (offset < 0)
6376*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6377*22dc650dSSadaf Ebrahimi }
6378*22dc650dSSadaf Ebrahimi #endif
6379*22dc650dSSadaf Ebrahimi
6380*22dc650dSSadaf Ebrahimi if (offset >= 0)
6381*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6382*22dc650dSSadaf Ebrahimi
6383*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6384*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6385*22dc650dSSadaf Ebrahimi else
6386*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6387*22dc650dSSadaf Ebrahimi return TRUE;
6388*22dc650dSSadaf Ebrahimi }
6389*22dc650dSSadaf Ebrahimi
fast_forward_first_char(compiler_common * common)6390*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6391*22dc650dSSadaf Ebrahimi {
6392*22dc650dSSadaf Ebrahimi PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6393*22dc650dSSadaf Ebrahimi PCRE2_UCHAR oc;
6394*22dc650dSSadaf Ebrahimi
6395*22dc650dSSadaf Ebrahimi oc = first_char;
6396*22dc650dSSadaf Ebrahimi if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6397*22dc650dSSadaf Ebrahimi {
6398*22dc650dSSadaf Ebrahimi oc = TABLE_GET(first_char, common->fcc, first_char);
6399*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
6400*22dc650dSSadaf Ebrahimi if (first_char > 127 && (common->utf || common->ucp))
6401*22dc650dSSadaf Ebrahimi oc = UCD_OTHERCASE(first_char);
6402*22dc650dSSadaf Ebrahimi #endif
6403*22dc650dSSadaf Ebrahimi }
6404*22dc650dSSadaf Ebrahimi
6405*22dc650dSSadaf Ebrahimi fast_forward_first_char2(common, first_char, oc, 0);
6406*22dc650dSSadaf Ebrahimi }
6407*22dc650dSSadaf Ebrahimi
fast_forward_newline(compiler_common * common)6408*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6409*22dc650dSSadaf Ebrahimi {
6410*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6411*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
6412*22dc650dSSadaf Ebrahimi struct sljit_jump *lastchar = NULL;
6413*22dc650dSSadaf Ebrahimi struct sljit_jump *firstchar;
6414*22dc650dSSadaf Ebrahimi struct sljit_jump *quit = NULL;
6415*22dc650dSSadaf Ebrahimi struct sljit_jump *foundcr = NULL;
6416*22dc650dSSadaf Ebrahimi struct sljit_jump *notfoundnl;
6417*22dc650dSSadaf Ebrahimi jump_list *newline = NULL;
6418*22dc650dSSadaf Ebrahimi
6419*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6420*22dc650dSSadaf Ebrahimi {
6421*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6422*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6423*22dc650dSSadaf Ebrahimi }
6424*22dc650dSSadaf Ebrahimi
6425*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6426*22dc650dSSadaf Ebrahimi {
6427*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6428*22dc650dSSadaf Ebrahimi if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6429*22dc650dSSadaf Ebrahimi {
6430*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
6431*22dc650dSSadaf Ebrahimi {
6432*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6433*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6434*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6435*22dc650dSSadaf Ebrahimi }
6436*22dc650dSSadaf Ebrahimi else
6437*22dc650dSSadaf Ebrahimi {
6438*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6439*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6440*22dc650dSSadaf Ebrahimi }
6441*22dc650dSSadaf Ebrahimi firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6442*22dc650dSSadaf Ebrahimi
6443*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6445*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6446*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6447*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6448*22dc650dSSadaf Ebrahimi #endif
6449*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6450*22dc650dSSadaf Ebrahimi
6451*22dc650dSSadaf Ebrahimi fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6452*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6453*22dc650dSSadaf Ebrahimi }
6454*22dc650dSSadaf Ebrahimi else
6455*22dc650dSSadaf Ebrahimi #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6456*22dc650dSSadaf Ebrahimi {
6457*22dc650dSSadaf Ebrahimi lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6458*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
6459*22dc650dSSadaf Ebrahimi {
6460*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6461*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6462*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6463*22dc650dSSadaf Ebrahimi }
6464*22dc650dSSadaf Ebrahimi else
6465*22dc650dSSadaf Ebrahimi {
6466*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6467*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6468*22dc650dSSadaf Ebrahimi }
6469*22dc650dSSadaf Ebrahimi firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6470*22dc650dSSadaf Ebrahimi
6471*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6472*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6473*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6474*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6475*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6476*22dc650dSSadaf Ebrahimi #endif
6477*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6478*22dc650dSSadaf Ebrahimi
6479*22dc650dSSadaf Ebrahimi loop = LABEL();
6480*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6481*22dc650dSSadaf Ebrahimi quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6482*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6483*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6484*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6485*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6486*22dc650dSSadaf Ebrahimi
6487*22dc650dSSadaf Ebrahimi JUMPHERE(quit);
6488*22dc650dSSadaf Ebrahimi JUMPHERE(lastchar);
6489*22dc650dSSadaf Ebrahimi }
6490*22dc650dSSadaf Ebrahimi
6491*22dc650dSSadaf Ebrahimi JUMPHERE(firstchar);
6492*22dc650dSSadaf Ebrahimi
6493*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6494*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6495*22dc650dSSadaf Ebrahimi return;
6496*22dc650dSSadaf Ebrahimi }
6497*22dc650dSSadaf Ebrahimi
6498*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
6499*22dc650dSSadaf Ebrahimi {
6500*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6501*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6502*22dc650dSSadaf Ebrahimi }
6503*22dc650dSSadaf Ebrahimi else
6504*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6505*22dc650dSSadaf Ebrahimi
6506*22dc650dSSadaf Ebrahimi /* Example: match /^/ to \r\n from offset 1. */
6507*22dc650dSSadaf Ebrahimi firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6508*22dc650dSSadaf Ebrahimi
6509*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANY)
6510*22dc650dSSadaf Ebrahimi move_back(common, NULL, FALSE);
6511*22dc650dSSadaf Ebrahimi else
6512*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6513*22dc650dSSadaf Ebrahimi
6514*22dc650dSSadaf Ebrahimi loop = LABEL();
6515*22dc650dSSadaf Ebrahimi common->ff_newline_shortcut = loop;
6516*22dc650dSSadaf Ebrahimi
6517*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6518*22dc650dSSadaf Ebrahimi if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6519*22dc650dSSadaf Ebrahimi {
6520*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANYCRLF)
6521*22dc650dSSadaf Ebrahimi {
6522*22dc650dSSadaf Ebrahimi fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6523*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
6524*22dc650dSSadaf Ebrahimi lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6525*22dc650dSSadaf Ebrahimi
6526*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6527*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6528*22dc650dSSadaf Ebrahimi quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6529*22dc650dSSadaf Ebrahimi }
6530*22dc650dSSadaf Ebrahimi else
6531*22dc650dSSadaf Ebrahimi {
6532*22dc650dSSadaf Ebrahimi fast_forward_char_simd(common, common->newline, common->newline, 0);
6533*22dc650dSSadaf Ebrahimi
6534*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6535*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
6536*22dc650dSSadaf Ebrahimi {
6537*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6538*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6539*22dc650dSSadaf Ebrahimi }
6540*22dc650dSSadaf Ebrahimi }
6541*22dc650dSSadaf Ebrahimi }
6542*22dc650dSSadaf Ebrahimi else
6543*22dc650dSSadaf Ebrahimi #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6544*22dc650dSSadaf Ebrahimi {
6545*22dc650dSSadaf Ebrahimi read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6546*22dc650dSSadaf Ebrahimi lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6547*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6548*22dc650dSSadaf Ebrahimi foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6549*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->nltype, &newline, FALSE);
6550*22dc650dSSadaf Ebrahimi set_jumps(newline, loop);
6551*22dc650dSSadaf Ebrahimi }
6552*22dc650dSSadaf Ebrahimi
6553*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6554*22dc650dSSadaf Ebrahimi {
6555*22dc650dSSadaf Ebrahimi if (quit == NULL)
6556*22dc650dSSadaf Ebrahimi {
6557*22dc650dSSadaf Ebrahimi quit = JUMP(SLJIT_JUMP);
6558*22dc650dSSadaf Ebrahimi JUMPHERE(foundcr);
6559*22dc650dSSadaf Ebrahimi }
6560*22dc650dSSadaf Ebrahimi
6561*22dc650dSSadaf Ebrahimi notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6562*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6563*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6564*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6565*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6566*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6567*22dc650dSSadaf Ebrahimi #endif
6568*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6569*22dc650dSSadaf Ebrahimi JUMPHERE(notfoundnl);
6570*22dc650dSSadaf Ebrahimi JUMPHERE(quit);
6571*22dc650dSSadaf Ebrahimi }
6572*22dc650dSSadaf Ebrahimi
6573*22dc650dSSadaf Ebrahimi if (lastchar)
6574*22dc650dSSadaf Ebrahimi JUMPHERE(lastchar);
6575*22dc650dSSadaf Ebrahimi JUMPHERE(firstchar);
6576*22dc650dSSadaf Ebrahimi
6577*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6578*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6579*22dc650dSSadaf Ebrahimi }
6580*22dc650dSSadaf Ebrahimi
6581*22dc650dSSadaf Ebrahimi static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6582*22dc650dSSadaf Ebrahimi
fast_forward_start_bits(compiler_common * common)6583*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6584*22dc650dSSadaf Ebrahimi {
6585*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6586*22dc650dSSadaf Ebrahimi const sljit_u8 *start_bits = common->re->start_bitmap;
6587*22dc650dSSadaf Ebrahimi struct sljit_label *start;
6588*22dc650dSSadaf Ebrahimi struct sljit_jump *partial_quit;
6589*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6590*22dc650dSSadaf Ebrahimi struct sljit_jump *found = NULL;
6591*22dc650dSSadaf Ebrahimi #endif
6592*22dc650dSSadaf Ebrahimi jump_list *matches = NULL;
6593*22dc650dSSadaf Ebrahimi
6594*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6595*22dc650dSSadaf Ebrahimi {
6596*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6597*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6598*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6599*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6600*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6601*22dc650dSSadaf Ebrahimi }
6602*22dc650dSSadaf Ebrahimi
6603*22dc650dSSadaf Ebrahimi start = LABEL();
6604*22dc650dSSadaf Ebrahimi
6605*22dc650dSSadaf Ebrahimi partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6606*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
6607*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->failed_match, partial_quit);
6608*22dc650dSSadaf Ebrahimi
6609*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6610*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6611*22dc650dSSadaf Ebrahimi
6612*22dc650dSSadaf Ebrahimi if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6613*22dc650dSSadaf Ebrahimi {
6614*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6615*22dc650dSSadaf Ebrahimi if ((start_bits[31] & 0x80) != 0)
6616*22dc650dSSadaf Ebrahimi found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6617*22dc650dSSadaf Ebrahimi else
6618*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6619*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE
6620*22dc650dSSadaf Ebrahimi if (common->utf && is_char7_bitset(start_bits, FALSE))
6621*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6622*22dc650dSSadaf Ebrahimi #endif
6623*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6624*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6625*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6626*22dc650dSSadaf Ebrahimi if (!HAS_VIRTUAL_REGISTERS)
6627*22dc650dSSadaf Ebrahimi {
6628*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6629*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6630*22dc650dSSadaf Ebrahimi }
6631*22dc650dSSadaf Ebrahimi else
6632*22dc650dSSadaf Ebrahimi {
6633*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6634*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6635*22dc650dSSadaf Ebrahimi }
6636*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_ZERO, start);
6637*22dc650dSSadaf Ebrahimi }
6638*22dc650dSSadaf Ebrahimi else
6639*22dc650dSSadaf Ebrahimi set_jumps(matches, start);
6640*22dc650dSSadaf Ebrahimi
6641*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6642*22dc650dSSadaf Ebrahimi if (found != NULL)
6643*22dc650dSSadaf Ebrahimi JUMPHERE(found);
6644*22dc650dSSadaf Ebrahimi #endif
6645*22dc650dSSadaf Ebrahimi
6646*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6647*22dc650dSSadaf Ebrahimi
6648*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
6649*22dc650dSSadaf Ebrahimi JUMPHERE(partial_quit);
6650*22dc650dSSadaf Ebrahimi
6651*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
6652*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6653*22dc650dSSadaf Ebrahimi }
6654*22dc650dSSadaf Ebrahimi
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6655*22dc650dSSadaf Ebrahimi static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6656*22dc650dSSadaf Ebrahimi {
6657*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6658*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
6659*22dc650dSSadaf Ebrahimi struct sljit_jump *toolong;
6660*22dc650dSSadaf Ebrahimi struct sljit_jump *already_found;
6661*22dc650dSSadaf Ebrahimi struct sljit_jump *found;
6662*22dc650dSSadaf Ebrahimi struct sljit_jump *found_oc = NULL;
6663*22dc650dSSadaf Ebrahimi jump_list *not_found = NULL;
6664*22dc650dSSadaf Ebrahimi sljit_u32 oc, bit;
6665*22dc650dSSadaf Ebrahimi
6666*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->req_char_ptr != 0);
6667*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6668*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6669*22dc650dSSadaf Ebrahimi toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6670*22dc650dSSadaf Ebrahimi already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6671*22dc650dSSadaf Ebrahimi
6672*22dc650dSSadaf Ebrahimi if (has_firstchar)
6673*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6674*22dc650dSSadaf Ebrahimi else
6675*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6676*22dc650dSSadaf Ebrahimi
6677*22dc650dSSadaf Ebrahimi oc = req_char;
6678*22dc650dSSadaf Ebrahimi if (caseless)
6679*22dc650dSSadaf Ebrahimi {
6680*22dc650dSSadaf Ebrahimi oc = TABLE_GET(req_char, common->fcc, req_char);
6681*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
6682*22dc650dSSadaf Ebrahimi if (req_char > 127 && (common->utf || common->ucp))
6683*22dc650dSSadaf Ebrahimi oc = UCD_OTHERCASE(req_char);
6684*22dc650dSSadaf Ebrahimi #endif
6685*22dc650dSSadaf Ebrahimi }
6686*22dc650dSSadaf Ebrahimi
6687*22dc650dSSadaf Ebrahimi #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6688*22dc650dSSadaf Ebrahimi if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6689*22dc650dSSadaf Ebrahimi {
6690*22dc650dSSadaf Ebrahimi not_found = fast_requested_char_simd(common, req_char, oc);
6691*22dc650dSSadaf Ebrahimi }
6692*22dc650dSSadaf Ebrahimi else
6693*22dc650dSSadaf Ebrahimi #endif
6694*22dc650dSSadaf Ebrahimi {
6695*22dc650dSSadaf Ebrahimi loop = LABEL();
6696*22dc650dSSadaf Ebrahimi add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6697*22dc650dSSadaf Ebrahimi
6698*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6699*22dc650dSSadaf Ebrahimi
6700*22dc650dSSadaf Ebrahimi if (req_char == oc)
6701*22dc650dSSadaf Ebrahimi found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6702*22dc650dSSadaf Ebrahimi else
6703*22dc650dSSadaf Ebrahimi {
6704*22dc650dSSadaf Ebrahimi bit = req_char ^ oc;
6705*22dc650dSSadaf Ebrahimi if (is_powerof2(bit))
6706*22dc650dSSadaf Ebrahimi {
6707*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6708*22dc650dSSadaf Ebrahimi found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6709*22dc650dSSadaf Ebrahimi }
6710*22dc650dSSadaf Ebrahimi else
6711*22dc650dSSadaf Ebrahimi {
6712*22dc650dSSadaf Ebrahimi found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6713*22dc650dSSadaf Ebrahimi found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6714*22dc650dSSadaf Ebrahimi }
6715*22dc650dSSadaf Ebrahimi }
6716*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6717*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, loop);
6718*22dc650dSSadaf Ebrahimi
6719*22dc650dSSadaf Ebrahimi JUMPHERE(found);
6720*22dc650dSSadaf Ebrahimi if (found_oc)
6721*22dc650dSSadaf Ebrahimi JUMPHERE(found_oc);
6722*22dc650dSSadaf Ebrahimi }
6723*22dc650dSSadaf Ebrahimi
6724*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6725*22dc650dSSadaf Ebrahimi
6726*22dc650dSSadaf Ebrahimi JUMPHERE(already_found);
6727*22dc650dSSadaf Ebrahimi JUMPHERE(toolong);
6728*22dc650dSSadaf Ebrahimi return not_found;
6729*22dc650dSSadaf Ebrahimi }
6730*22dc650dSSadaf Ebrahimi
do_revertframes(compiler_common * common)6731*22dc650dSSadaf Ebrahimi static void do_revertframes(compiler_common *common)
6732*22dc650dSSadaf Ebrahimi {
6733*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6734*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
6735*22dc650dSSadaf Ebrahimi struct sljit_label *mainloop;
6736*22dc650dSSadaf Ebrahimi
6737*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6738*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP1, 0, 0);
6739*22dc650dSSadaf Ebrahimi
6740*22dc650dSSadaf Ebrahimi /* Drop frames until we reach STACK_TOP. */
6741*22dc650dSSadaf Ebrahimi mainloop = LABEL();
6742*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6743*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6744*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6745*22dc650dSSadaf Ebrahimi
6746*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6747*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
6748*22dc650dSSadaf Ebrahimi {
6749*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6750*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6751*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6752*22dc650dSSadaf Ebrahimi }
6753*22dc650dSSadaf Ebrahimi else
6754*22dc650dSSadaf Ebrahimi {
6755*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6756*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6757*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6758*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6759*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP1, 0, 0);
6760*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6761*22dc650dSSadaf Ebrahimi }
6762*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, mainloop);
6763*22dc650dSSadaf Ebrahimi
6764*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6765*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6766*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6767*22dc650dSSadaf Ebrahimi /* End of reverting values. */
6768*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6769*22dc650dSSadaf Ebrahimi
6770*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6771*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6772*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6773*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
6774*22dc650dSSadaf Ebrahimi {
6775*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6776*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6777*22dc650dSSadaf Ebrahimi }
6778*22dc650dSSadaf Ebrahimi else
6779*22dc650dSSadaf Ebrahimi {
6780*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6781*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6782*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6783*22dc650dSSadaf Ebrahimi }
6784*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, mainloop);
6785*22dc650dSSadaf Ebrahimi }
6786*22dc650dSSadaf Ebrahimi
6787*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6788*22dc650dSSadaf Ebrahimi #define UCPCAT(bit) (1 << (bit))
6789*22dc650dSSadaf Ebrahimi #define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6790*22dc650dSSadaf Ebrahimi #define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6791*22dc650dSSadaf Ebrahimi #define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6792*22dc650dSSadaf Ebrahimi #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6793*22dc650dSSadaf Ebrahimi #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6794*22dc650dSSadaf Ebrahimi #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6795*22dc650dSSadaf Ebrahimi #endif
6796*22dc650dSSadaf Ebrahimi
check_wordboundary(compiler_common * common,BOOL ucp)6797*22dc650dSSadaf Ebrahimi static void check_wordboundary(compiler_common *common, BOOL ucp)
6798*22dc650dSSadaf Ebrahimi {
6799*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6800*22dc650dSSadaf Ebrahimi struct sljit_jump *skipread;
6801*22dc650dSSadaf Ebrahimi jump_list *skipread_list = NULL;
6802*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6803*22dc650dSSadaf Ebrahimi struct sljit_label *valid_utf;
6804*22dc650dSSadaf Ebrahimi jump_list *invalid_utf1 = NULL;
6805*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
6806*22dc650dSSadaf Ebrahimi jump_list *invalid_utf2 = NULL;
6807*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6808*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
6809*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6810*22dc650dSSadaf Ebrahimi
6811*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(ucp);
6812*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6813*22dc650dSSadaf Ebrahimi
6814*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6815*22dc650dSSadaf Ebrahimi /* Get type of the previous char, and put it to TMP3. */
6816*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6817*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6818*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6819*22dc650dSSadaf Ebrahimi skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6820*22dc650dSSadaf Ebrahimi
6821*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6822*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
6823*22dc650dSSadaf Ebrahimi {
6824*22dc650dSSadaf Ebrahimi peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6825*22dc650dSSadaf Ebrahimi
6826*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
6827*22dc650dSSadaf Ebrahimi {
6828*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6829*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6830*22dc650dSSadaf Ebrahimi move_back(common, NULL, TRUE);
6831*22dc650dSSadaf Ebrahimi check_start_used_ptr(common);
6832*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6833*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6834*22dc650dSSadaf Ebrahimi }
6835*22dc650dSSadaf Ebrahimi }
6836*22dc650dSSadaf Ebrahimi else
6837*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
6838*22dc650dSSadaf Ebrahimi {
6839*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
6840*22dc650dSSadaf Ebrahimi peek_char_back(common, READ_CHAR_MAX, NULL);
6841*22dc650dSSadaf Ebrahimi else
6842*22dc650dSSadaf Ebrahimi {
6843*22dc650dSSadaf Ebrahimi move_back(common, NULL, TRUE);
6844*22dc650dSSadaf Ebrahimi check_start_used_ptr(common);
6845*22dc650dSSadaf Ebrahimi read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6846*22dc650dSSadaf Ebrahimi }
6847*22dc650dSSadaf Ebrahimi }
6848*22dc650dSSadaf Ebrahimi
6849*22dc650dSSadaf Ebrahimi /* Testing char type. */
6850*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6851*22dc650dSSadaf Ebrahimi if (ucp)
6852*22dc650dSSadaf Ebrahimi {
6853*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6854*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6855*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6856*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6857*22dc650dSSadaf Ebrahimi }
6858*22dc650dSSadaf Ebrahimi else
6859*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
6860*22dc650dSSadaf Ebrahimi {
6861*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6862*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6863*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE
6864*22dc650dSSadaf Ebrahimi /* Here TMP3 has already been zeroed. */
6865*22dc650dSSadaf Ebrahimi jump = NULL;
6866*22dc650dSSadaf Ebrahimi if (common->utf)
6867*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6868*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6869*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6870*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6871*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6872*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6873*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6874*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE
6875*22dc650dSSadaf Ebrahimi if (jump != NULL)
6876*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6877*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6878*22dc650dSSadaf Ebrahimi }
6879*22dc650dSSadaf Ebrahimi JUMPHERE(skipread);
6880*22dc650dSSadaf Ebrahimi
6881*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6882*22dc650dSSadaf Ebrahimi check_str_end(common, &skipread_list);
6883*22dc650dSSadaf Ebrahimi peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6884*22dc650dSSadaf Ebrahimi
6885*22dc650dSSadaf Ebrahimi /* Testing char type. This is a code duplication. */
6886*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6887*22dc650dSSadaf Ebrahimi
6888*22dc650dSSadaf Ebrahimi valid_utf = LABEL();
6889*22dc650dSSadaf Ebrahimi
6890*22dc650dSSadaf Ebrahimi if (ucp)
6891*22dc650dSSadaf Ebrahimi {
6892*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6893*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6894*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6895*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6896*22dc650dSSadaf Ebrahimi }
6897*22dc650dSSadaf Ebrahimi else
6898*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
6899*22dc650dSSadaf Ebrahimi {
6900*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6901*22dc650dSSadaf Ebrahimi /* TMP2 may be destroyed by peek_char. */
6902*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6903*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6904*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE
6905*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6906*22dc650dSSadaf Ebrahimi jump = NULL;
6907*22dc650dSSadaf Ebrahimi if (common->utf)
6908*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6909*22dc650dSSadaf Ebrahimi #endif
6910*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6911*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6912*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6913*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
6914*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6915*22dc650dSSadaf Ebrahimi #elif defined SUPPORT_UNICODE
6916*22dc650dSSadaf Ebrahimi if (jump != NULL)
6917*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
6918*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6919*22dc650dSSadaf Ebrahimi }
6920*22dc650dSSadaf Ebrahimi set_jumps(skipread_list, LABEL());
6921*22dc650dSSadaf Ebrahimi
6922*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923*22dc650dSSadaf Ebrahimi OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6924*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925*22dc650dSSadaf Ebrahimi
6926*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
6927*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
6928*22dc650dSSadaf Ebrahimi {
6929*22dc650dSSadaf Ebrahimi set_jumps(invalid_utf1, LABEL());
6930*22dc650dSSadaf Ebrahimi
6931*22dc650dSSadaf Ebrahimi peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6932*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6933*22dc650dSSadaf Ebrahimi
6934*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6935*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6936*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6937*22dc650dSSadaf Ebrahimi
6938*22dc650dSSadaf Ebrahimi set_jumps(invalid_utf2, LABEL());
6939*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6940*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6941*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6942*22dc650dSSadaf Ebrahimi }
6943*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
6944*22dc650dSSadaf Ebrahimi }
6945*22dc650dSSadaf Ebrahimi
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6946*22dc650dSSadaf Ebrahimi static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6947*22dc650dSSadaf Ebrahimi {
6948*22dc650dSSadaf Ebrahimi /* May destroy TMP1. */
6949*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
6950*22dc650dSSadaf Ebrahimi int ranges[MAX_CLASS_RANGE_SIZE];
6951*22dc650dSSadaf Ebrahimi sljit_u8 bit, cbit, all;
6952*22dc650dSSadaf Ebrahimi int i, byte, length = 0;
6953*22dc650dSSadaf Ebrahimi
6954*22dc650dSSadaf Ebrahimi bit = bits[0] & 0x1;
6955*22dc650dSSadaf Ebrahimi /* All bits will be zero or one (since bit is zero or one). */
6956*22dc650dSSadaf Ebrahimi all = (sljit_u8)-bit;
6957*22dc650dSSadaf Ebrahimi
6958*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; )
6959*22dc650dSSadaf Ebrahimi {
6960*22dc650dSSadaf Ebrahimi byte = i >> 3;
6961*22dc650dSSadaf Ebrahimi if ((i & 0x7) == 0 && bits[byte] == all)
6962*22dc650dSSadaf Ebrahimi i += 8;
6963*22dc650dSSadaf Ebrahimi else
6964*22dc650dSSadaf Ebrahimi {
6965*22dc650dSSadaf Ebrahimi cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6966*22dc650dSSadaf Ebrahimi if (cbit != bit)
6967*22dc650dSSadaf Ebrahimi {
6968*22dc650dSSadaf Ebrahimi if (length >= MAX_CLASS_RANGE_SIZE)
6969*22dc650dSSadaf Ebrahimi return FALSE;
6970*22dc650dSSadaf Ebrahimi ranges[length] = i;
6971*22dc650dSSadaf Ebrahimi length++;
6972*22dc650dSSadaf Ebrahimi bit = cbit;
6973*22dc650dSSadaf Ebrahimi all = (sljit_u8)-cbit; /* sign extend bit into byte */
6974*22dc650dSSadaf Ebrahimi }
6975*22dc650dSSadaf Ebrahimi i++;
6976*22dc650dSSadaf Ebrahimi }
6977*22dc650dSSadaf Ebrahimi }
6978*22dc650dSSadaf Ebrahimi
6979*22dc650dSSadaf Ebrahimi if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6980*22dc650dSSadaf Ebrahimi {
6981*22dc650dSSadaf Ebrahimi if (length >= MAX_CLASS_RANGE_SIZE)
6982*22dc650dSSadaf Ebrahimi return FALSE;
6983*22dc650dSSadaf Ebrahimi ranges[length] = 256;
6984*22dc650dSSadaf Ebrahimi length++;
6985*22dc650dSSadaf Ebrahimi }
6986*22dc650dSSadaf Ebrahimi
6987*22dc650dSSadaf Ebrahimi if (length < 0 || length > 4)
6988*22dc650dSSadaf Ebrahimi return FALSE;
6989*22dc650dSSadaf Ebrahimi
6990*22dc650dSSadaf Ebrahimi bit = bits[0] & 0x1;
6991*22dc650dSSadaf Ebrahimi if (invert) bit ^= 0x1;
6992*22dc650dSSadaf Ebrahimi
6993*22dc650dSSadaf Ebrahimi /* No character is accepted. */
6994*22dc650dSSadaf Ebrahimi if (length == 0 && bit == 0)
6995*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6996*22dc650dSSadaf Ebrahimi
6997*22dc650dSSadaf Ebrahimi switch(length)
6998*22dc650dSSadaf Ebrahimi {
6999*22dc650dSSadaf Ebrahimi case 0:
7000*22dc650dSSadaf Ebrahimi /* When bit != 0, all characters are accepted. */
7001*22dc650dSSadaf Ebrahimi return TRUE;
7002*22dc650dSSadaf Ebrahimi
7003*22dc650dSSadaf Ebrahimi case 1:
7004*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7005*22dc650dSSadaf Ebrahimi return TRUE;
7006*22dc650dSSadaf Ebrahimi
7007*22dc650dSSadaf Ebrahimi case 2:
7008*22dc650dSSadaf Ebrahimi if (ranges[0] + 1 != ranges[1])
7009*22dc650dSSadaf Ebrahimi {
7010*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7011*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7012*22dc650dSSadaf Ebrahimi }
7013*22dc650dSSadaf Ebrahimi else
7014*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7015*22dc650dSSadaf Ebrahimi return TRUE;
7016*22dc650dSSadaf Ebrahimi
7017*22dc650dSSadaf Ebrahimi case 3:
7018*22dc650dSSadaf Ebrahimi if (bit != 0)
7019*22dc650dSSadaf Ebrahimi {
7020*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7021*22dc650dSSadaf Ebrahimi if (ranges[0] + 1 != ranges[1])
7022*22dc650dSSadaf Ebrahimi {
7023*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7024*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7025*22dc650dSSadaf Ebrahimi }
7026*22dc650dSSadaf Ebrahimi else
7027*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7028*22dc650dSSadaf Ebrahimi return TRUE;
7029*22dc650dSSadaf Ebrahimi }
7030*22dc650dSSadaf Ebrahimi
7031*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7032*22dc650dSSadaf Ebrahimi if (ranges[1] + 1 != ranges[2])
7033*22dc650dSSadaf Ebrahimi {
7034*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7035*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7036*22dc650dSSadaf Ebrahimi }
7037*22dc650dSSadaf Ebrahimi else
7038*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7039*22dc650dSSadaf Ebrahimi return TRUE;
7040*22dc650dSSadaf Ebrahimi
7041*22dc650dSSadaf Ebrahimi case 4:
7042*22dc650dSSadaf Ebrahimi if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7043*22dc650dSSadaf Ebrahimi && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7044*22dc650dSSadaf Ebrahimi && (ranges[1] & (ranges[2] - ranges[0])) == 0
7045*22dc650dSSadaf Ebrahimi && is_powerof2(ranges[2] - ranges[0]))
7046*22dc650dSSadaf Ebrahimi {
7047*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7048*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7049*22dc650dSSadaf Ebrahimi if (ranges[2] + 1 != ranges[3])
7050*22dc650dSSadaf Ebrahimi {
7051*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7052*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7053*22dc650dSSadaf Ebrahimi }
7054*22dc650dSSadaf Ebrahimi else
7055*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7056*22dc650dSSadaf Ebrahimi return TRUE;
7057*22dc650dSSadaf Ebrahimi }
7058*22dc650dSSadaf Ebrahimi
7059*22dc650dSSadaf Ebrahimi if (bit != 0)
7060*22dc650dSSadaf Ebrahimi {
7061*22dc650dSSadaf Ebrahimi i = 0;
7062*22dc650dSSadaf Ebrahimi if (ranges[0] + 1 != ranges[1])
7063*22dc650dSSadaf Ebrahimi {
7064*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7065*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7066*22dc650dSSadaf Ebrahimi i = ranges[0];
7067*22dc650dSSadaf Ebrahimi }
7068*22dc650dSSadaf Ebrahimi else
7069*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7070*22dc650dSSadaf Ebrahimi
7071*22dc650dSSadaf Ebrahimi if (ranges[2] + 1 != ranges[3])
7072*22dc650dSSadaf Ebrahimi {
7073*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7074*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7075*22dc650dSSadaf Ebrahimi }
7076*22dc650dSSadaf Ebrahimi else
7077*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7078*22dc650dSSadaf Ebrahimi return TRUE;
7079*22dc650dSSadaf Ebrahimi }
7080*22dc650dSSadaf Ebrahimi
7081*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7082*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7083*22dc650dSSadaf Ebrahimi if (ranges[1] + 1 != ranges[2])
7084*22dc650dSSadaf Ebrahimi {
7085*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7086*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7087*22dc650dSSadaf Ebrahimi }
7088*22dc650dSSadaf Ebrahimi else
7089*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7090*22dc650dSSadaf Ebrahimi return TRUE;
7091*22dc650dSSadaf Ebrahimi
7092*22dc650dSSadaf Ebrahimi default:
7093*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
7094*22dc650dSSadaf Ebrahimi return FALSE;
7095*22dc650dSSadaf Ebrahimi }
7096*22dc650dSSadaf Ebrahimi }
7097*22dc650dSSadaf Ebrahimi
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7098*22dc650dSSadaf Ebrahimi static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7099*22dc650dSSadaf Ebrahimi {
7100*22dc650dSSadaf Ebrahimi /* May destroy TMP1. */
7101*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7102*22dc650dSSadaf Ebrahimi uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7103*22dc650dSSadaf Ebrahimi uint8_t byte;
7104*22dc650dSSadaf Ebrahimi sljit_s32 type;
7105*22dc650dSSadaf Ebrahimi int i, j, k, len, c;
7106*22dc650dSSadaf Ebrahimi
7107*22dc650dSSadaf Ebrahimi if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7108*22dc650dSSadaf Ebrahimi return FALSE;
7109*22dc650dSSadaf Ebrahimi
7110*22dc650dSSadaf Ebrahimi len = 0;
7111*22dc650dSSadaf Ebrahimi
7112*22dc650dSSadaf Ebrahimi for (i = 0; i < 32; i++)
7113*22dc650dSSadaf Ebrahimi {
7114*22dc650dSSadaf Ebrahimi byte = bits[i];
7115*22dc650dSSadaf Ebrahimi
7116*22dc650dSSadaf Ebrahimi if (nclass)
7117*22dc650dSSadaf Ebrahimi byte = (sljit_u8)~byte;
7118*22dc650dSSadaf Ebrahimi
7119*22dc650dSSadaf Ebrahimi j = 0;
7120*22dc650dSSadaf Ebrahimi while (byte != 0)
7121*22dc650dSSadaf Ebrahimi {
7122*22dc650dSSadaf Ebrahimi if (byte & 0x1)
7123*22dc650dSSadaf Ebrahimi {
7124*22dc650dSSadaf Ebrahimi c = i * 8 + j;
7125*22dc650dSSadaf Ebrahimi
7126*22dc650dSSadaf Ebrahimi k = len;
7127*22dc650dSSadaf Ebrahimi
7128*22dc650dSSadaf Ebrahimi if ((c & 0x20) != 0)
7129*22dc650dSSadaf Ebrahimi {
7130*22dc650dSSadaf Ebrahimi for (k = 0; k < len; k++)
7131*22dc650dSSadaf Ebrahimi if (char_list[k] == c - 0x20)
7132*22dc650dSSadaf Ebrahimi {
7133*22dc650dSSadaf Ebrahimi char_list[k] |= 0x120;
7134*22dc650dSSadaf Ebrahimi break;
7135*22dc650dSSadaf Ebrahimi }
7136*22dc650dSSadaf Ebrahimi }
7137*22dc650dSSadaf Ebrahimi
7138*22dc650dSSadaf Ebrahimi if (k == len)
7139*22dc650dSSadaf Ebrahimi {
7140*22dc650dSSadaf Ebrahimi if (len >= MAX_CLASS_CHARS_SIZE)
7141*22dc650dSSadaf Ebrahimi return FALSE;
7142*22dc650dSSadaf Ebrahimi
7143*22dc650dSSadaf Ebrahimi char_list[len++] = (uint16_t) c;
7144*22dc650dSSadaf Ebrahimi }
7145*22dc650dSSadaf Ebrahimi }
7146*22dc650dSSadaf Ebrahimi
7147*22dc650dSSadaf Ebrahimi byte >>= 1;
7148*22dc650dSSadaf Ebrahimi j++;
7149*22dc650dSSadaf Ebrahimi }
7150*22dc650dSSadaf Ebrahimi }
7151*22dc650dSSadaf Ebrahimi
7152*22dc650dSSadaf Ebrahimi if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
7153*22dc650dSSadaf Ebrahimi
7154*22dc650dSSadaf Ebrahimi i = 0;
7155*22dc650dSSadaf Ebrahimi j = 0;
7156*22dc650dSSadaf Ebrahimi
7157*22dc650dSSadaf Ebrahimi if (char_list[0] == 0)
7158*22dc650dSSadaf Ebrahimi {
7159*22dc650dSSadaf Ebrahimi i++;
7160*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7161*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7162*22dc650dSSadaf Ebrahimi }
7163*22dc650dSSadaf Ebrahimi else
7164*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7165*22dc650dSSadaf Ebrahimi
7166*22dc650dSSadaf Ebrahimi while (i < len)
7167*22dc650dSSadaf Ebrahimi {
7168*22dc650dSSadaf Ebrahimi if ((char_list[i] & 0x100) != 0)
7169*22dc650dSSadaf Ebrahimi j++;
7170*22dc650dSSadaf Ebrahimi else
7171*22dc650dSSadaf Ebrahimi {
7172*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7173*22dc650dSSadaf Ebrahimi SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7174*22dc650dSSadaf Ebrahimi }
7175*22dc650dSSadaf Ebrahimi i++;
7176*22dc650dSSadaf Ebrahimi }
7177*22dc650dSSadaf Ebrahimi
7178*22dc650dSSadaf Ebrahimi if (j != 0)
7179*22dc650dSSadaf Ebrahimi {
7180*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7181*22dc650dSSadaf Ebrahimi
7182*22dc650dSSadaf Ebrahimi for (i = 0; i < len; i++)
7183*22dc650dSSadaf Ebrahimi if ((char_list[i] & 0x100) != 0)
7184*22dc650dSSadaf Ebrahimi {
7185*22dc650dSSadaf Ebrahimi j--;
7186*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7187*22dc650dSSadaf Ebrahimi SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7188*22dc650dSSadaf Ebrahimi }
7189*22dc650dSSadaf Ebrahimi }
7190*22dc650dSSadaf Ebrahimi
7191*22dc650dSSadaf Ebrahimi if (invert)
7192*22dc650dSSadaf Ebrahimi nclass = !nclass;
7193*22dc650dSSadaf Ebrahimi
7194*22dc650dSSadaf Ebrahimi type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7195*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7196*22dc650dSSadaf Ebrahimi return TRUE;
7197*22dc650dSSadaf Ebrahimi }
7198*22dc650dSSadaf Ebrahimi
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7199*22dc650dSSadaf Ebrahimi static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7200*22dc650dSSadaf Ebrahimi {
7201*22dc650dSSadaf Ebrahimi /* May destroy TMP1. */
7202*22dc650dSSadaf Ebrahimi if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7203*22dc650dSSadaf Ebrahimi return TRUE;
7204*22dc650dSSadaf Ebrahimi return optimize_class_chars(common, bits, nclass, invert, backtracks);
7205*22dc650dSSadaf Ebrahimi }
7206*22dc650dSSadaf Ebrahimi
check_anynewline(compiler_common * common)7207*22dc650dSSadaf Ebrahimi static void check_anynewline(compiler_common *common)
7208*22dc650dSSadaf Ebrahimi {
7209*22dc650dSSadaf Ebrahimi /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7210*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7211*22dc650dSSadaf Ebrahimi
7212*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7213*22dc650dSSadaf Ebrahimi
7214*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7215*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7216*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7217*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7218*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7219*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7220*22dc650dSSadaf Ebrahimi if (common->utf)
7221*22dc650dSSadaf Ebrahimi {
7222*22dc650dSSadaf Ebrahimi #endif
7223*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7224*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7225*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7226*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7227*22dc650dSSadaf Ebrahimi }
7228*22dc650dSSadaf Ebrahimi #endif
7229*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7230*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7231*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7232*22dc650dSSadaf Ebrahimi }
7233*22dc650dSSadaf Ebrahimi
check_hspace(compiler_common * common)7234*22dc650dSSadaf Ebrahimi static void check_hspace(compiler_common *common)
7235*22dc650dSSadaf Ebrahimi {
7236*22dc650dSSadaf Ebrahimi /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7237*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7238*22dc650dSSadaf Ebrahimi
7239*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7240*22dc650dSSadaf Ebrahimi
7241*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7242*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7243*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7244*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7245*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7246*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7247*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7248*22dc650dSSadaf Ebrahimi if (common->utf)
7249*22dc650dSSadaf Ebrahimi {
7250*22dc650dSSadaf Ebrahimi #endif
7251*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7252*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7253*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7254*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7255*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7256*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7257*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7258*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7259*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7260*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7261*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7262*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7263*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7264*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7265*22dc650dSSadaf Ebrahimi }
7266*22dc650dSSadaf Ebrahimi #endif
7267*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7268*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7269*22dc650dSSadaf Ebrahimi
7270*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7271*22dc650dSSadaf Ebrahimi }
7272*22dc650dSSadaf Ebrahimi
check_vspace(compiler_common * common)7273*22dc650dSSadaf Ebrahimi static void check_vspace(compiler_common *common)
7274*22dc650dSSadaf Ebrahimi {
7275*22dc650dSSadaf Ebrahimi /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7276*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7277*22dc650dSSadaf Ebrahimi
7278*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7279*22dc650dSSadaf Ebrahimi
7280*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7281*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7282*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7283*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7284*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7285*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7286*22dc650dSSadaf Ebrahimi if (common->utf)
7287*22dc650dSSadaf Ebrahimi {
7288*22dc650dSSadaf Ebrahimi #endif
7289*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7290*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7291*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7292*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7293*22dc650dSSadaf Ebrahimi }
7294*22dc650dSSadaf Ebrahimi #endif
7295*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7296*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7297*22dc650dSSadaf Ebrahimi
7298*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7299*22dc650dSSadaf Ebrahimi }
7300*22dc650dSSadaf Ebrahimi
do_casefulcmp(compiler_common * common)7301*22dc650dSSadaf Ebrahimi static void do_casefulcmp(compiler_common *common)
7302*22dc650dSSadaf Ebrahimi {
7303*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7304*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
7305*22dc650dSSadaf Ebrahimi struct sljit_label *label;
7306*22dc650dSSadaf Ebrahimi int char1_reg;
7307*22dc650dSSadaf Ebrahimi int char2_reg;
7308*22dc650dSSadaf Ebrahimi
7309*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
7310*22dc650dSSadaf Ebrahimi {
7311*22dc650dSSadaf Ebrahimi char1_reg = STR_END;
7312*22dc650dSSadaf Ebrahimi char2_reg = STACK_TOP;
7313*22dc650dSSadaf Ebrahimi }
7314*22dc650dSSadaf Ebrahimi else
7315*22dc650dSSadaf Ebrahimi {
7316*22dc650dSSadaf Ebrahimi char1_reg = TMP3;
7317*22dc650dSSadaf Ebrahimi char2_reg = RETURN_ADDR;
7318*22dc650dSSadaf Ebrahimi }
7319*22dc650dSSadaf Ebrahimi
7320*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7321*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7322*22dc650dSSadaf Ebrahimi
7323*22dc650dSSadaf Ebrahimi if (char1_reg == STR_END)
7324*22dc650dSSadaf Ebrahimi {
7325*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7326*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7327*22dc650dSSadaf Ebrahimi }
7328*22dc650dSSadaf Ebrahimi
7329*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330*22dc650dSSadaf Ebrahimi {
7331*22dc650dSSadaf Ebrahimi label = LABEL();
7332*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7333*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7334*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7335*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7336*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
7337*22dc650dSSadaf Ebrahimi
7338*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7339*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7340*22dc650dSSadaf Ebrahimi }
7341*22dc650dSSadaf Ebrahimi else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7342*22dc650dSSadaf Ebrahimi {
7343*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7344*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7345*22dc650dSSadaf Ebrahimi
7346*22dc650dSSadaf Ebrahimi label = LABEL();
7347*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7348*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7349*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7350*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7351*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
7352*22dc650dSSadaf Ebrahimi
7353*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7354*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7355*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7356*22dc650dSSadaf Ebrahimi }
7357*22dc650dSSadaf Ebrahimi else
7358*22dc650dSSadaf Ebrahimi {
7359*22dc650dSSadaf Ebrahimi label = LABEL();
7360*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7361*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7362*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7363*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7364*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7365*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7366*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
7367*22dc650dSSadaf Ebrahimi
7368*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7369*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7370*22dc650dSSadaf Ebrahimi }
7371*22dc650dSSadaf Ebrahimi
7372*22dc650dSSadaf Ebrahimi if (char1_reg == STR_END)
7373*22dc650dSSadaf Ebrahimi {
7374*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7375*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7376*22dc650dSSadaf Ebrahimi }
7377*22dc650dSSadaf Ebrahimi
7378*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7379*22dc650dSSadaf Ebrahimi }
7380*22dc650dSSadaf Ebrahimi
do_caselesscmp(compiler_common * common)7381*22dc650dSSadaf Ebrahimi static void do_caselesscmp(compiler_common *common)
7382*22dc650dSSadaf Ebrahimi {
7383*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7384*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
7385*22dc650dSSadaf Ebrahimi struct sljit_label *label;
7386*22dc650dSSadaf Ebrahimi int char1_reg = STR_END;
7387*22dc650dSSadaf Ebrahimi int char2_reg;
7388*22dc650dSSadaf Ebrahimi int lcc_table;
7389*22dc650dSSadaf Ebrahimi int opt_type = 0;
7390*22dc650dSSadaf Ebrahimi
7391*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
7392*22dc650dSSadaf Ebrahimi {
7393*22dc650dSSadaf Ebrahimi char2_reg = STACK_TOP;
7394*22dc650dSSadaf Ebrahimi lcc_table = STACK_LIMIT;
7395*22dc650dSSadaf Ebrahimi }
7396*22dc650dSSadaf Ebrahimi else
7397*22dc650dSSadaf Ebrahimi {
7398*22dc650dSSadaf Ebrahimi char2_reg = RETURN_ADDR;
7399*22dc650dSSadaf Ebrahimi lcc_table = TMP3;
7400*22dc650dSSadaf Ebrahimi }
7401*22dc650dSSadaf Ebrahimi
7402*22dc650dSSadaf Ebrahimi if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7403*22dc650dSSadaf Ebrahimi opt_type = 1;
7404*22dc650dSSadaf Ebrahimi else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7405*22dc650dSSadaf Ebrahimi opt_type = 2;
7406*22dc650dSSadaf Ebrahimi
7407*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7408*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7409*22dc650dSSadaf Ebrahimi
7410*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7411*22dc650dSSadaf Ebrahimi
7412*22dc650dSSadaf Ebrahimi if (char2_reg == STACK_TOP)
7413*22dc650dSSadaf Ebrahimi {
7414*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7415*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7416*22dc650dSSadaf Ebrahimi }
7417*22dc650dSSadaf Ebrahimi
7418*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7419*22dc650dSSadaf Ebrahimi
7420*22dc650dSSadaf Ebrahimi if (opt_type == 1)
7421*22dc650dSSadaf Ebrahimi {
7422*22dc650dSSadaf Ebrahimi label = LABEL();
7423*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7424*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7425*22dc650dSSadaf Ebrahimi }
7426*22dc650dSSadaf Ebrahimi else if (opt_type == 2)
7427*22dc650dSSadaf Ebrahimi {
7428*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7430*22dc650dSSadaf Ebrahimi
7431*22dc650dSSadaf Ebrahimi label = LABEL();
7432*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7433*22dc650dSSadaf Ebrahimi sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7434*22dc650dSSadaf Ebrahimi }
7435*22dc650dSSadaf Ebrahimi else
7436*22dc650dSSadaf Ebrahimi {
7437*22dc650dSSadaf Ebrahimi label = LABEL();
7438*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7439*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7440*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7441*22dc650dSSadaf Ebrahimi }
7442*22dc650dSSadaf Ebrahimi
7443*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
7444*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7445*22dc650dSSadaf Ebrahimi #endif
7446*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7447*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
7448*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7449*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7450*22dc650dSSadaf Ebrahimi #endif
7451*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7452*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
7453*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7454*22dc650dSSadaf Ebrahimi #endif
7455*22dc650dSSadaf Ebrahimi
7456*22dc650dSSadaf Ebrahimi if (opt_type == 0)
7457*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7458*22dc650dSSadaf Ebrahimi
7459*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7460*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7461*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
7462*22dc650dSSadaf Ebrahimi
7463*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7464*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7465*22dc650dSSadaf Ebrahimi
7466*22dc650dSSadaf Ebrahimi if (opt_type == 2)
7467*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7468*22dc650dSSadaf Ebrahimi
7469*22dc650dSSadaf Ebrahimi if (char2_reg == STACK_TOP)
7470*22dc650dSSadaf Ebrahimi {
7471*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7472*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7473*22dc650dSSadaf Ebrahimi }
7474*22dc650dSSadaf Ebrahimi
7475*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7476*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7477*22dc650dSSadaf Ebrahimi }
7478*22dc650dSSadaf Ebrahimi
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7479*22dc650dSSadaf Ebrahimi static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7480*22dc650dSSadaf Ebrahimi compare_context *context, jump_list **backtracks)
7481*22dc650dSSadaf Ebrahimi {
7482*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7483*22dc650dSSadaf Ebrahimi unsigned int othercasebit = 0;
7484*22dc650dSSadaf Ebrahimi PCRE2_SPTR othercasechar = NULL;
7485*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7486*22dc650dSSadaf Ebrahimi int utflength;
7487*22dc650dSSadaf Ebrahimi #endif
7488*22dc650dSSadaf Ebrahimi
7489*22dc650dSSadaf Ebrahimi if (caseless && char_has_othercase(common, cc))
7490*22dc650dSSadaf Ebrahimi {
7491*22dc650dSSadaf Ebrahimi othercasebit = char_get_othercase_bit(common, cc);
7492*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(othercasebit);
7493*22dc650dSSadaf Ebrahimi /* Extracting bit difference info. */
7494*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7495*22dc650dSSadaf Ebrahimi othercasechar = cc + (othercasebit >> 8);
7496*22dc650dSSadaf Ebrahimi othercasebit &= 0xff;
7497*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7498*22dc650dSSadaf Ebrahimi /* Note that this code only handles characters in the BMP. If there
7499*22dc650dSSadaf Ebrahimi ever are characters outside the BMP whose othercase differs in only one
7500*22dc650dSSadaf Ebrahimi bit from itself (there currently are none), this code will need to be
7501*22dc650dSSadaf Ebrahimi revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7502*22dc650dSSadaf Ebrahimi othercasechar = cc + (othercasebit >> 9);
7503*22dc650dSSadaf Ebrahimi if ((othercasebit & 0x100) != 0)
7504*22dc650dSSadaf Ebrahimi othercasebit = (othercasebit & 0xff) << 8;
7505*22dc650dSSadaf Ebrahimi else
7506*22dc650dSSadaf Ebrahimi othercasebit &= 0xff;
7507*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7508*22dc650dSSadaf Ebrahimi }
7509*22dc650dSSadaf Ebrahimi
7510*22dc650dSSadaf Ebrahimi if (context->sourcereg == -1)
7511*22dc650dSSadaf Ebrahimi {
7512*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7513*22dc650dSSadaf Ebrahimi #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7514*22dc650dSSadaf Ebrahimi if (context->length >= 4)
7515*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516*22dc650dSSadaf Ebrahimi else if (context->length >= 2)
7517*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518*22dc650dSSadaf Ebrahimi else
7519*22dc650dSSadaf Ebrahimi #endif
7520*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7521*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
7522*22dc650dSSadaf Ebrahimi #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7523*22dc650dSSadaf Ebrahimi if (context->length >= 4)
7524*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7525*22dc650dSSadaf Ebrahimi else
7526*22dc650dSSadaf Ebrahimi #endif
7527*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
7529*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7530*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7531*22dc650dSSadaf Ebrahimi context->sourcereg = TMP2;
7532*22dc650dSSadaf Ebrahimi }
7533*22dc650dSSadaf Ebrahimi
7534*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7535*22dc650dSSadaf Ebrahimi utflength = 1;
7536*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(*cc))
7537*22dc650dSSadaf Ebrahimi utflength += GET_EXTRALEN(*cc);
7538*22dc650dSSadaf Ebrahimi
7539*22dc650dSSadaf Ebrahimi do
7540*22dc650dSSadaf Ebrahimi {
7541*22dc650dSSadaf Ebrahimi #endif
7542*22dc650dSSadaf Ebrahimi
7543*22dc650dSSadaf Ebrahimi context->length -= IN_UCHARS(1);
7544*22dc650dSSadaf Ebrahimi #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7545*22dc650dSSadaf Ebrahimi
7546*22dc650dSSadaf Ebrahimi /* Unaligned read is supported. */
7547*22dc650dSSadaf Ebrahimi if (othercasebit != 0 && othercasechar == cc)
7548*22dc650dSSadaf Ebrahimi {
7549*22dc650dSSadaf Ebrahimi context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7550*22dc650dSSadaf Ebrahimi context->oc.asuchars[context->ucharptr] = othercasebit;
7551*22dc650dSSadaf Ebrahimi }
7552*22dc650dSSadaf Ebrahimi else
7553*22dc650dSSadaf Ebrahimi {
7554*22dc650dSSadaf Ebrahimi context->c.asuchars[context->ucharptr] = *cc;
7555*22dc650dSSadaf Ebrahimi context->oc.asuchars[context->ucharptr] = 0;
7556*22dc650dSSadaf Ebrahimi }
7557*22dc650dSSadaf Ebrahimi context->ucharptr++;
7558*22dc650dSSadaf Ebrahimi
7559*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7560*22dc650dSSadaf Ebrahimi if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7561*22dc650dSSadaf Ebrahimi #else
7562*22dc650dSSadaf Ebrahimi if (context->ucharptr >= 2 || context->length == 0)
7563*22dc650dSSadaf Ebrahimi #endif
7564*22dc650dSSadaf Ebrahimi {
7565*22dc650dSSadaf Ebrahimi if (context->length >= 4)
7566*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7567*22dc650dSSadaf Ebrahimi else if (context->length >= 2)
7568*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7569*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7570*22dc650dSSadaf Ebrahimi else if (context->length >= 1)
7571*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7572*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7573*22dc650dSSadaf Ebrahimi context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7574*22dc650dSSadaf Ebrahimi
7575*22dc650dSSadaf Ebrahimi switch(context->ucharptr)
7576*22dc650dSSadaf Ebrahimi {
7577*22dc650dSSadaf Ebrahimi case 4 / sizeof(PCRE2_UCHAR):
7578*22dc650dSSadaf Ebrahimi if (context->oc.asint != 0)
7579*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7580*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7581*22dc650dSSadaf Ebrahimi break;
7582*22dc650dSSadaf Ebrahimi
7583*22dc650dSSadaf Ebrahimi case 2 / sizeof(PCRE2_UCHAR):
7584*22dc650dSSadaf Ebrahimi if (context->oc.asushort != 0)
7585*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7586*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7587*22dc650dSSadaf Ebrahimi break;
7588*22dc650dSSadaf Ebrahimi
7589*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7590*22dc650dSSadaf Ebrahimi case 1:
7591*22dc650dSSadaf Ebrahimi if (context->oc.asbyte != 0)
7592*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7593*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7594*22dc650dSSadaf Ebrahimi break;
7595*22dc650dSSadaf Ebrahimi #endif
7596*22dc650dSSadaf Ebrahimi
7597*22dc650dSSadaf Ebrahimi default:
7598*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
7599*22dc650dSSadaf Ebrahimi break;
7600*22dc650dSSadaf Ebrahimi }
7601*22dc650dSSadaf Ebrahimi context->ucharptr = 0;
7602*22dc650dSSadaf Ebrahimi }
7603*22dc650dSSadaf Ebrahimi
7604*22dc650dSSadaf Ebrahimi #else
7605*22dc650dSSadaf Ebrahimi
7606*22dc650dSSadaf Ebrahimi /* Unaligned read is unsupported or in 32 bit mode. */
7607*22dc650dSSadaf Ebrahimi if (context->length >= 1)
7608*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7609*22dc650dSSadaf Ebrahimi
7610*22dc650dSSadaf Ebrahimi context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7611*22dc650dSSadaf Ebrahimi
7612*22dc650dSSadaf Ebrahimi if (othercasebit != 0 && othercasechar == cc)
7613*22dc650dSSadaf Ebrahimi {
7614*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7615*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7616*22dc650dSSadaf Ebrahimi }
7617*22dc650dSSadaf Ebrahimi else
7618*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7619*22dc650dSSadaf Ebrahimi
7620*22dc650dSSadaf Ebrahimi #endif
7621*22dc650dSSadaf Ebrahimi
7622*22dc650dSSadaf Ebrahimi cc++;
7623*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7624*22dc650dSSadaf Ebrahimi utflength--;
7625*22dc650dSSadaf Ebrahimi }
7626*22dc650dSSadaf Ebrahimi while (utflength > 0);
7627*22dc650dSSadaf Ebrahimi #endif
7628*22dc650dSSadaf Ebrahimi
7629*22dc650dSSadaf Ebrahimi return cc;
7630*22dc650dSSadaf Ebrahimi }
7631*22dc650dSSadaf Ebrahimi
7632*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7633*22dc650dSSadaf Ebrahimi
7634*22dc650dSSadaf Ebrahimi #define SET_CHAR_OFFSET(value) \
7635*22dc650dSSadaf Ebrahimi if ((value) != charoffset) \
7636*22dc650dSSadaf Ebrahimi { \
7637*22dc650dSSadaf Ebrahimi if ((value) < charoffset) \
7638*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7639*22dc650dSSadaf Ebrahimi else \
7640*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7641*22dc650dSSadaf Ebrahimi } \
7642*22dc650dSSadaf Ebrahimi charoffset = (value);
7643*22dc650dSSadaf Ebrahimi
7644*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7645*22dc650dSSadaf Ebrahimi
7646*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7647*22dc650dSSadaf Ebrahimi #define XCLASS_SAVE_CHAR 0x001
7648*22dc650dSSadaf Ebrahimi #define XCLASS_CHAR_SAVED 0x002
7649*22dc650dSSadaf Ebrahimi #define XCLASS_HAS_TYPE 0x004
7650*22dc650dSSadaf Ebrahimi #define XCLASS_HAS_SCRIPT 0x008
7651*22dc650dSSadaf Ebrahimi #define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7652*22dc650dSSadaf Ebrahimi #define XCLASS_HAS_BOOL 0x020
7653*22dc650dSSadaf Ebrahimi #define XCLASS_HAS_BIDICL 0x040
7654*22dc650dSSadaf Ebrahimi #define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7655*22dc650dSSadaf Ebrahimi #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7656*22dc650dSSadaf Ebrahimi #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7657*22dc650dSSadaf Ebrahimi #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7658*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7659*22dc650dSSadaf Ebrahimi
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7660*22dc650dSSadaf Ebrahimi static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7661*22dc650dSSadaf Ebrahimi {
7662*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
7663*22dc650dSSadaf Ebrahimi jump_list *found = NULL;
7664*22dc650dSSadaf Ebrahimi jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7665*22dc650dSSadaf Ebrahimi sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7666*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
7667*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin;
7668*22dc650dSSadaf Ebrahimi int compares, invertcmp, numberofcmps;
7669*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7670*22dc650dSSadaf Ebrahimi BOOL utf = common->utf;
7671*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7672*22dc650dSSadaf Ebrahimi
7673*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7674*22dc650dSSadaf Ebrahimi sljit_u32 unicode_status = 0;
7675*22dc650dSSadaf Ebrahimi sljit_u32 category_list = 0;
7676*22dc650dSSadaf Ebrahimi sljit_u32 items;
7677*22dc650dSSadaf Ebrahimi int typereg = TMP1;
7678*22dc650dSSadaf Ebrahimi const sljit_u32 *other_cases;
7679*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7680*22dc650dSSadaf Ebrahimi
7681*22dc650dSSadaf Ebrahimi /* Scanning the necessary info. */
7682*22dc650dSSadaf Ebrahimi cc++;
7683*22dc650dSSadaf Ebrahimi ccbegin = cc;
7684*22dc650dSSadaf Ebrahimi compares = 0;
7685*22dc650dSSadaf Ebrahimi
7686*22dc650dSSadaf Ebrahimi if (cc[-1] & XCL_MAP)
7687*22dc650dSSadaf Ebrahimi {
7688*22dc650dSSadaf Ebrahimi min = 0;
7689*22dc650dSSadaf Ebrahimi cc += 32 / sizeof(PCRE2_UCHAR);
7690*22dc650dSSadaf Ebrahimi }
7691*22dc650dSSadaf Ebrahimi
7692*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
7693*22dc650dSSadaf Ebrahimi {
7694*22dc650dSSadaf Ebrahimi compares++;
7695*22dc650dSSadaf Ebrahimi
7696*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
7697*22dc650dSSadaf Ebrahimi {
7698*22dc650dSSadaf Ebrahimi cc ++;
7699*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7700*22dc650dSSadaf Ebrahimi if (c > max) max = c;
7701*22dc650dSSadaf Ebrahimi if (c < min) min = c;
7702*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7703*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SAVE_CHAR;
7704*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7705*22dc650dSSadaf Ebrahimi }
7706*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
7707*22dc650dSSadaf Ebrahimi {
7708*22dc650dSSadaf Ebrahimi cc ++;
7709*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7710*22dc650dSSadaf Ebrahimi if (c < min) min = c;
7711*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7712*22dc650dSSadaf Ebrahimi if (c > max) max = c;
7713*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7714*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SAVE_CHAR;
7715*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7716*22dc650dSSadaf Ebrahimi }
7717*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7718*22dc650dSSadaf Ebrahimi else
7719*22dc650dSSadaf Ebrahimi {
7720*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7721*22dc650dSSadaf Ebrahimi cc++;
7722*22dc650dSSadaf Ebrahimi
7723*22dc650dSSadaf Ebrahimi if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7724*22dc650dSSadaf Ebrahimi {
7725*22dc650dSSadaf Ebrahimi other_cases = PRIV(ucd_caseless_sets) + cc[1];
7726*22dc650dSSadaf Ebrahimi while (*other_cases != NOTACHAR)
7727*22dc650dSSadaf Ebrahimi {
7728*22dc650dSSadaf Ebrahimi if (*other_cases > max) max = *other_cases;
7729*22dc650dSSadaf Ebrahimi if (*other_cases < min) min = *other_cases;
7730*22dc650dSSadaf Ebrahimi other_cases++;
7731*22dc650dSSadaf Ebrahimi }
7732*22dc650dSSadaf Ebrahimi }
7733*22dc650dSSadaf Ebrahimi else
7734*22dc650dSSadaf Ebrahimi {
7735*22dc650dSSadaf Ebrahimi max = READ_CHAR_MAX;
7736*22dc650dSSadaf Ebrahimi min = 0;
7737*22dc650dSSadaf Ebrahimi }
7738*22dc650dSSadaf Ebrahimi
7739*22dc650dSSadaf Ebrahimi items = 0;
7740*22dc650dSSadaf Ebrahimi
7741*22dc650dSSadaf Ebrahimi switch(*cc)
7742*22dc650dSSadaf Ebrahimi {
7743*22dc650dSSadaf Ebrahimi case PT_ANY:
7744*22dc650dSSadaf Ebrahimi /* Any either accepts everything or ignored. */
7745*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_PROP)
7746*22dc650dSSadaf Ebrahimi items = UCPCAT_ALL;
7747*22dc650dSSadaf Ebrahimi else
7748*22dc650dSSadaf Ebrahimi compares--;
7749*22dc650dSSadaf Ebrahimi break;
7750*22dc650dSSadaf Ebrahimi
7751*22dc650dSSadaf Ebrahimi case PT_LAMP:
7752*22dc650dSSadaf Ebrahimi items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7753*22dc650dSSadaf Ebrahimi break;
7754*22dc650dSSadaf Ebrahimi
7755*22dc650dSSadaf Ebrahimi case PT_GC:
7756*22dc650dSSadaf Ebrahimi items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7757*22dc650dSSadaf Ebrahimi break;
7758*22dc650dSSadaf Ebrahimi
7759*22dc650dSSadaf Ebrahimi case PT_PC:
7760*22dc650dSSadaf Ebrahimi items = UCPCAT(cc[1]);
7761*22dc650dSSadaf Ebrahimi break;
7762*22dc650dSSadaf Ebrahimi
7763*22dc650dSSadaf Ebrahimi case PT_WORD:
7764*22dc650dSSadaf Ebrahimi items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7765*22dc650dSSadaf Ebrahimi break;
7766*22dc650dSSadaf Ebrahimi
7767*22dc650dSSadaf Ebrahimi case PT_ALNUM:
7768*22dc650dSSadaf Ebrahimi items = UCPCAT_L | UCPCAT_N;
7769*22dc650dSSadaf Ebrahimi break;
7770*22dc650dSSadaf Ebrahimi
7771*22dc650dSSadaf Ebrahimi case PT_SCX:
7772*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7773*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
7774*22dc650dSSadaf Ebrahimi {
7775*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7776*22dc650dSSadaf Ebrahimi break;
7777*22dc650dSSadaf Ebrahimi }
7778*22dc650dSSadaf Ebrahimi compares++;
7779*22dc650dSSadaf Ebrahimi /* Fall through */
7780*22dc650dSSadaf Ebrahimi
7781*22dc650dSSadaf Ebrahimi case PT_SC:
7782*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_HAS_SCRIPT;
7783*22dc650dSSadaf Ebrahimi break;
7784*22dc650dSSadaf Ebrahimi
7785*22dc650dSSadaf Ebrahimi case PT_SPACE:
7786*22dc650dSSadaf Ebrahimi case PT_PXSPACE:
7787*22dc650dSSadaf Ebrahimi case PT_PXGRAPH:
7788*22dc650dSSadaf Ebrahimi case PT_PXPRINT:
7789*22dc650dSSadaf Ebrahimi case PT_PXPUNCT:
7790*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7791*22dc650dSSadaf Ebrahimi break;
7792*22dc650dSSadaf Ebrahimi
7793*22dc650dSSadaf Ebrahimi case PT_CLIST:
7794*22dc650dSSadaf Ebrahimi case PT_UCNC:
7795*22dc650dSSadaf Ebrahimi case PT_PXXDIGIT:
7796*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SAVE_CHAR;
7797*22dc650dSSadaf Ebrahimi break;
7798*22dc650dSSadaf Ebrahimi
7799*22dc650dSSadaf Ebrahimi case PT_BOOL:
7800*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_HAS_BOOL;
7801*22dc650dSSadaf Ebrahimi break;
7802*22dc650dSSadaf Ebrahimi
7803*22dc650dSSadaf Ebrahimi case PT_BIDICL:
7804*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_HAS_BIDICL;
7805*22dc650dSSadaf Ebrahimi break;
7806*22dc650dSSadaf Ebrahimi
7807*22dc650dSSadaf Ebrahimi default:
7808*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
7809*22dc650dSSadaf Ebrahimi break;
7810*22dc650dSSadaf Ebrahimi }
7811*22dc650dSSadaf Ebrahimi
7812*22dc650dSSadaf Ebrahimi if (items > 0)
7813*22dc650dSSadaf Ebrahimi {
7814*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
7815*22dc650dSSadaf Ebrahimi items ^= UCPCAT_ALL;
7816*22dc650dSSadaf Ebrahimi category_list |= items;
7817*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_HAS_TYPE;
7818*22dc650dSSadaf Ebrahimi compares--;
7819*22dc650dSSadaf Ebrahimi }
7820*22dc650dSSadaf Ebrahimi
7821*22dc650dSSadaf Ebrahimi cc += 2;
7822*22dc650dSSadaf Ebrahimi }
7823*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7824*22dc650dSSadaf Ebrahimi }
7825*22dc650dSSadaf Ebrahimi
7826*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7827*22dc650dSSadaf Ebrahimi if (category_list == UCPCAT_ALL)
7828*22dc650dSSadaf Ebrahimi {
7829*22dc650dSSadaf Ebrahimi /* All characters are accepted, same as dotall. */
7830*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7831*22dc650dSSadaf Ebrahimi if (list == backtracks)
7832*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7833*22dc650dSSadaf Ebrahimi return;
7834*22dc650dSSadaf Ebrahimi }
7835*22dc650dSSadaf Ebrahimi
7836*22dc650dSSadaf Ebrahimi if (compares == 0 && category_list == 0)
7837*22dc650dSSadaf Ebrahimi {
7838*22dc650dSSadaf Ebrahimi /* No characters are accepted, same as (*F) or dotall. */
7839*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7840*22dc650dSSadaf Ebrahimi if (list != backtracks)
7841*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7842*22dc650dSSadaf Ebrahimi return;
7843*22dc650dSSadaf Ebrahimi }
7844*22dc650dSSadaf Ebrahimi #else /* !SUPPORT_UNICODE */
7845*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(compares > 0);
7846*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7847*22dc650dSSadaf Ebrahimi
7848*22dc650dSSadaf Ebrahimi /* We are not necessary in utf mode even in 8 bit mode. */
7849*22dc650dSSadaf Ebrahimi cc = ccbegin;
7850*22dc650dSSadaf Ebrahimi if ((cc[-1] & XCL_NOT) != 0)
7851*22dc650dSSadaf Ebrahimi read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7852*22dc650dSSadaf Ebrahimi else
7853*22dc650dSSadaf Ebrahimi {
7854*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7855*22dc650dSSadaf Ebrahimi read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7856*22dc650dSSadaf Ebrahimi #else /* !SUPPORT_UNICODE */
7857*22dc650dSSadaf Ebrahimi read_char(common, min, max, NULL, 0);
7858*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7859*22dc650dSSadaf Ebrahimi }
7860*22dc650dSSadaf Ebrahimi
7861*22dc650dSSadaf Ebrahimi if ((cc[-1] & XCL_HASPROP) == 0)
7862*22dc650dSSadaf Ebrahimi {
7863*22dc650dSSadaf Ebrahimi if ((cc[-1] & XCL_MAP) != 0)
7864*22dc650dSSadaf Ebrahimi {
7865*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7866*22dc650dSSadaf Ebrahimi if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7867*22dc650dSSadaf Ebrahimi {
7868*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7869*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7870*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7871*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7872*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7873*22dc650dSSadaf Ebrahimi add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7874*22dc650dSSadaf Ebrahimi }
7875*22dc650dSSadaf Ebrahimi
7876*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7877*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7878*22dc650dSSadaf Ebrahimi
7879*22dc650dSSadaf Ebrahimi cc += 32 / sizeof(PCRE2_UCHAR);
7880*22dc650dSSadaf Ebrahimi }
7881*22dc650dSSadaf Ebrahimi else
7882*22dc650dSSadaf Ebrahimi {
7883*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7884*22dc650dSSadaf Ebrahimi add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7885*22dc650dSSadaf Ebrahimi }
7886*22dc650dSSadaf Ebrahimi }
7887*22dc650dSSadaf Ebrahimi else if ((cc[-1] & XCL_MAP) != 0)
7888*22dc650dSSadaf Ebrahimi {
7889*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7890*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7891*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_CHAR_SAVED;
7892*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
7893*22dc650dSSadaf Ebrahimi if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7894*22dc650dSSadaf Ebrahimi {
7895*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7896*22dc650dSSadaf Ebrahimi jump = NULL;
7897*22dc650dSSadaf Ebrahimi if (common->utf)
7898*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7900*22dc650dSSadaf Ebrahimi
7901*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7902*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7903*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7904*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7905*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7906*22dc650dSSadaf Ebrahimi add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7907*22dc650dSSadaf Ebrahimi
7908*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
7909*22dc650dSSadaf Ebrahimi if (common->utf)
7910*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7911*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7912*22dc650dSSadaf Ebrahimi }
7913*22dc650dSSadaf Ebrahimi
7914*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7915*22dc650dSSadaf Ebrahimi cc += 32 / sizeof(PCRE2_UCHAR);
7916*22dc650dSSadaf Ebrahimi }
7917*22dc650dSSadaf Ebrahimi
7918*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
7919*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_NEEDS_UCD)
7920*22dc650dSSadaf Ebrahimi {
7921*22dc650dSSadaf Ebrahimi if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7922*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7923*22dc650dSSadaf Ebrahimi
7924*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
7925*22dc650dSSadaf Ebrahimi if (!common->utf)
7926*22dc650dSSadaf Ebrahimi {
7927*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7928*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7929*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
7930*22dc650dSSadaf Ebrahimi }
7931*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7932*22dc650dSSadaf Ebrahimi
7933*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7934*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7935*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7936*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7937*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7938*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7939*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7940*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7941*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7942*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7943*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7944*22dc650dSSadaf Ebrahimi
7945*22dc650dSSadaf Ebrahimi ccbegin = cc;
7946*22dc650dSSadaf Ebrahimi
7947*22dc650dSSadaf Ebrahimi if (category_list != 0)
7948*22dc650dSSadaf Ebrahimi compares++;
7949*22dc650dSSadaf Ebrahimi
7950*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_BIDICL)
7951*22dc650dSSadaf Ebrahimi {
7952*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7953*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7954*22dc650dSSadaf Ebrahimi
7955*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
7956*22dc650dSSadaf Ebrahimi {
7957*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
7958*22dc650dSSadaf Ebrahimi {
7959*22dc650dSSadaf Ebrahimi cc ++;
7960*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7961*22dc650dSSadaf Ebrahimi }
7962*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
7963*22dc650dSSadaf Ebrahimi {
7964*22dc650dSSadaf Ebrahimi cc ++;
7965*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7966*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
7967*22dc650dSSadaf Ebrahimi }
7968*22dc650dSSadaf Ebrahimi else
7969*22dc650dSSadaf Ebrahimi {
7970*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7971*22dc650dSSadaf Ebrahimi cc++;
7972*22dc650dSSadaf Ebrahimi if (*cc == PT_BIDICL)
7973*22dc650dSSadaf Ebrahimi {
7974*22dc650dSSadaf Ebrahimi compares--;
7975*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
7976*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
7977*22dc650dSSadaf Ebrahimi invertcmp ^= 0x1;
7978*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7979*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, jump);
7980*22dc650dSSadaf Ebrahimi }
7981*22dc650dSSadaf Ebrahimi cc += 2;
7982*22dc650dSSadaf Ebrahimi }
7983*22dc650dSSadaf Ebrahimi }
7984*22dc650dSSadaf Ebrahimi
7985*22dc650dSSadaf Ebrahimi cc = ccbegin;
7986*22dc650dSSadaf Ebrahimi }
7987*22dc650dSSadaf Ebrahimi
7988*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_BOOL)
7989*22dc650dSSadaf Ebrahimi {
7990*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7991*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7992*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7993*22dc650dSSadaf Ebrahimi
7994*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
7995*22dc650dSSadaf Ebrahimi {
7996*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
7997*22dc650dSSadaf Ebrahimi {
7998*22dc650dSSadaf Ebrahimi cc ++;
7999*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8000*22dc650dSSadaf Ebrahimi }
8001*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
8002*22dc650dSSadaf Ebrahimi {
8003*22dc650dSSadaf Ebrahimi cc ++;
8004*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8005*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8006*22dc650dSSadaf Ebrahimi }
8007*22dc650dSSadaf Ebrahimi else
8008*22dc650dSSadaf Ebrahimi {
8009*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8010*22dc650dSSadaf Ebrahimi cc++;
8011*22dc650dSSadaf Ebrahimi if (*cc == PT_BOOL)
8012*22dc650dSSadaf Ebrahimi {
8013*22dc650dSSadaf Ebrahimi compares--;
8014*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
8015*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
8016*22dc650dSSadaf Ebrahimi invertcmp ^= 0x1;
8017*22dc650dSSadaf Ebrahimi
8018*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8019*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8020*22dc650dSSadaf Ebrahimi }
8021*22dc650dSSadaf Ebrahimi cc += 2;
8022*22dc650dSSadaf Ebrahimi }
8023*22dc650dSSadaf Ebrahimi }
8024*22dc650dSSadaf Ebrahimi
8025*22dc650dSSadaf Ebrahimi cc = ccbegin;
8026*22dc650dSSadaf Ebrahimi }
8027*22dc650dSSadaf Ebrahimi
8028*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_SCRIPT)
8029*22dc650dSSadaf Ebrahimi {
8030*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8031*22dc650dSSadaf Ebrahimi
8032*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
8033*22dc650dSSadaf Ebrahimi {
8034*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
8035*22dc650dSSadaf Ebrahimi {
8036*22dc650dSSadaf Ebrahimi cc ++;
8037*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8038*22dc650dSSadaf Ebrahimi }
8039*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
8040*22dc650dSSadaf Ebrahimi {
8041*22dc650dSSadaf Ebrahimi cc ++;
8042*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8043*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8044*22dc650dSSadaf Ebrahimi }
8045*22dc650dSSadaf Ebrahimi else
8046*22dc650dSSadaf Ebrahimi {
8047*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8048*22dc650dSSadaf Ebrahimi cc++;
8049*22dc650dSSadaf Ebrahimi switch (*cc)
8050*22dc650dSSadaf Ebrahimi {
8051*22dc650dSSadaf Ebrahimi case PT_SCX:
8052*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
8053*22dc650dSSadaf Ebrahimi break;
8054*22dc650dSSadaf Ebrahimi /* Fall through */
8055*22dc650dSSadaf Ebrahimi
8056*22dc650dSSadaf Ebrahimi case PT_SC:
8057*22dc650dSSadaf Ebrahimi compares--;
8058*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
8059*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
8060*22dc650dSSadaf Ebrahimi invertcmp ^= 0x1;
8061*22dc650dSSadaf Ebrahimi
8062*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8063*22dc650dSSadaf Ebrahimi }
8064*22dc650dSSadaf Ebrahimi cc += 2;
8065*22dc650dSSadaf Ebrahimi }
8066*22dc650dSSadaf Ebrahimi }
8067*22dc650dSSadaf Ebrahimi
8068*22dc650dSSadaf Ebrahimi cc = ccbegin;
8069*22dc650dSSadaf Ebrahimi }
8070*22dc650dSSadaf Ebrahimi
8071*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8072*22dc650dSSadaf Ebrahimi {
8073*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8074*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8075*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8076*22dc650dSSadaf Ebrahimi
8077*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8078*22dc650dSSadaf Ebrahimi {
8079*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_TYPE)
8080*22dc650dSSadaf Ebrahimi {
8081*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_SAVE_CHAR)
8082*22dc650dSSadaf Ebrahimi {
8083*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8084*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8085*22dc650dSSadaf Ebrahimi }
8086*22dc650dSSadaf Ebrahimi else
8087*22dc650dSSadaf Ebrahimi {
8088*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8089*22dc650dSSadaf Ebrahimi unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8090*22dc650dSSadaf Ebrahimi }
8091*22dc650dSSadaf Ebrahimi }
8092*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8093*22dc650dSSadaf Ebrahimi }
8094*22dc650dSSadaf Ebrahimi
8095*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
8096*22dc650dSSadaf Ebrahimi {
8097*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
8098*22dc650dSSadaf Ebrahimi {
8099*22dc650dSSadaf Ebrahimi cc ++;
8100*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8101*22dc650dSSadaf Ebrahimi }
8102*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
8103*22dc650dSSadaf Ebrahimi {
8104*22dc650dSSadaf Ebrahimi cc ++;
8105*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8106*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8107*22dc650dSSadaf Ebrahimi }
8108*22dc650dSSadaf Ebrahimi else
8109*22dc650dSSadaf Ebrahimi {
8110*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8111*22dc650dSSadaf Ebrahimi cc++;
8112*22dc650dSSadaf Ebrahimi if (*cc == PT_SCX)
8113*22dc650dSSadaf Ebrahimi {
8114*22dc650dSSadaf Ebrahimi compares--;
8115*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
8116*22dc650dSSadaf Ebrahimi
8117*22dc650dSSadaf Ebrahimi jump = NULL;
8118*22dc650dSSadaf Ebrahimi if (cc[-1] == XCL_NOTPROP)
8119*22dc650dSSadaf Ebrahimi {
8120*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8121*22dc650dSSadaf Ebrahimi if (invertcmp)
8122*22dc650dSSadaf Ebrahimi {
8123*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, jump);
8124*22dc650dSSadaf Ebrahimi jump = NULL;
8125*22dc650dSSadaf Ebrahimi }
8126*22dc650dSSadaf Ebrahimi invertcmp ^= 0x1;
8127*22dc650dSSadaf Ebrahimi }
8128*22dc650dSSadaf Ebrahimi
8129*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8130*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8131*22dc650dSSadaf Ebrahimi
8132*22dc650dSSadaf Ebrahimi if (jump != NULL)
8133*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
8134*22dc650dSSadaf Ebrahimi }
8135*22dc650dSSadaf Ebrahimi cc += 2;
8136*22dc650dSSadaf Ebrahimi }
8137*22dc650dSSadaf Ebrahimi }
8138*22dc650dSSadaf Ebrahimi
8139*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8140*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8141*22dc650dSSadaf Ebrahimi else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8142*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8143*22dc650dSSadaf Ebrahimi cc = ccbegin;
8144*22dc650dSSadaf Ebrahimi }
8145*22dc650dSSadaf Ebrahimi
8146*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_SAVE_CHAR)
8147*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8148*22dc650dSSadaf Ebrahimi
8149*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_HAS_TYPE)
8150*22dc650dSSadaf Ebrahimi {
8151*22dc650dSSadaf Ebrahimi if (unicode_status & XCLASS_SAVE_CHAR)
8152*22dc650dSSadaf Ebrahimi typereg = RETURN_ADDR;
8153*22dc650dSSadaf Ebrahimi
8154*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8155*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8156*22dc650dSSadaf Ebrahimi
8157*22dc650dSSadaf Ebrahimi if (category_list > 0)
8158*22dc650dSSadaf Ebrahimi {
8159*22dc650dSSadaf Ebrahimi compares--;
8160*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
8161*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8162*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8163*22dc650dSSadaf Ebrahimi }
8164*22dc650dSSadaf Ebrahimi }
8165*22dc650dSSadaf Ebrahimi }
8166*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
8167*22dc650dSSadaf Ebrahimi
8168*22dc650dSSadaf Ebrahimi /* Generating code. */
8169*22dc650dSSadaf Ebrahimi charoffset = 0;
8170*22dc650dSSadaf Ebrahimi numberofcmps = 0;
8171*22dc650dSSadaf Ebrahimi
8172*22dc650dSSadaf Ebrahimi while (*cc != XCL_END)
8173*22dc650dSSadaf Ebrahimi {
8174*22dc650dSSadaf Ebrahimi compares--;
8175*22dc650dSSadaf Ebrahimi invertcmp = (compares == 0 && list != backtracks);
8176*22dc650dSSadaf Ebrahimi jump = NULL;
8177*22dc650dSSadaf Ebrahimi
8178*22dc650dSSadaf Ebrahimi if (*cc == XCL_SINGLE)
8179*22dc650dSSadaf Ebrahimi {
8180*22dc650dSSadaf Ebrahimi cc ++;
8181*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8182*22dc650dSSadaf Ebrahimi
8183*22dc650dSSadaf Ebrahimi if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8184*22dc650dSSadaf Ebrahimi {
8185*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8186*22dc650dSSadaf Ebrahimi OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8187*22dc650dSSadaf Ebrahimi numberofcmps++;
8188*22dc650dSSadaf Ebrahimi }
8189*22dc650dSSadaf Ebrahimi else if (numberofcmps > 0)
8190*22dc650dSSadaf Ebrahimi {
8191*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8192*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8193*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8194*22dc650dSSadaf Ebrahimi numberofcmps = 0;
8195*22dc650dSSadaf Ebrahimi }
8196*22dc650dSSadaf Ebrahimi else
8197*22dc650dSSadaf Ebrahimi {
8198*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8199*22dc650dSSadaf Ebrahimi numberofcmps = 0;
8200*22dc650dSSadaf Ebrahimi }
8201*22dc650dSSadaf Ebrahimi }
8202*22dc650dSSadaf Ebrahimi else if (*cc == XCL_RANGE)
8203*22dc650dSSadaf Ebrahimi {
8204*22dc650dSSadaf Ebrahimi cc ++;
8205*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8206*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(c);
8207*22dc650dSSadaf Ebrahimi GETCHARINCTEST(c, cc);
8208*22dc650dSSadaf Ebrahimi
8209*22dc650dSSadaf Ebrahimi if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8210*22dc650dSSadaf Ebrahimi {
8211*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8212*22dc650dSSadaf Ebrahimi OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8213*22dc650dSSadaf Ebrahimi numberofcmps++;
8214*22dc650dSSadaf Ebrahimi }
8215*22dc650dSSadaf Ebrahimi else if (numberofcmps > 0)
8216*22dc650dSSadaf Ebrahimi {
8217*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8218*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8219*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8220*22dc650dSSadaf Ebrahimi numberofcmps = 0;
8221*22dc650dSSadaf Ebrahimi }
8222*22dc650dSSadaf Ebrahimi else
8223*22dc650dSSadaf Ebrahimi {
8224*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8225*22dc650dSSadaf Ebrahimi numberofcmps = 0;
8226*22dc650dSSadaf Ebrahimi }
8227*22dc650dSSadaf Ebrahimi }
8228*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
8229*22dc650dSSadaf Ebrahimi else
8230*22dc650dSSadaf Ebrahimi {
8231*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8232*22dc650dSSadaf Ebrahimi if (*cc == XCL_NOTPROP)
8233*22dc650dSSadaf Ebrahimi invertcmp ^= 0x1;
8234*22dc650dSSadaf Ebrahimi cc++;
8235*22dc650dSSadaf Ebrahimi switch(*cc)
8236*22dc650dSSadaf Ebrahimi {
8237*22dc650dSSadaf Ebrahimi case PT_ANY:
8238*22dc650dSSadaf Ebrahimi case PT_LAMP:
8239*22dc650dSSadaf Ebrahimi case PT_GC:
8240*22dc650dSSadaf Ebrahimi case PT_PC:
8241*22dc650dSSadaf Ebrahimi case PT_SC:
8242*22dc650dSSadaf Ebrahimi case PT_SCX:
8243*22dc650dSSadaf Ebrahimi case PT_BOOL:
8244*22dc650dSSadaf Ebrahimi case PT_BIDICL:
8245*22dc650dSSadaf Ebrahimi case PT_WORD:
8246*22dc650dSSadaf Ebrahimi case PT_ALNUM:
8247*22dc650dSSadaf Ebrahimi compares++;
8248*22dc650dSSadaf Ebrahimi /* Already handled. */
8249*22dc650dSSadaf Ebrahimi break;
8250*22dc650dSSadaf Ebrahimi
8251*22dc650dSSadaf Ebrahimi case PT_SPACE:
8252*22dc650dSSadaf Ebrahimi case PT_PXSPACE:
8253*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(9);
8254*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8255*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8256*22dc650dSSadaf Ebrahimi
8257*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8258*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8259*22dc650dSSadaf Ebrahimi
8260*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8261*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8262*22dc650dSSadaf Ebrahimi
8263*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8264*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8265*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8266*22dc650dSSadaf Ebrahimi break;
8267*22dc650dSSadaf Ebrahimi
8268*22dc650dSSadaf Ebrahimi case PT_CLIST:
8269*22dc650dSSadaf Ebrahimi other_cases = PRIV(ucd_caseless_sets) + cc[1];
8270*22dc650dSSadaf Ebrahimi
8271*22dc650dSSadaf Ebrahimi /* At least three characters are required.
8272*22dc650dSSadaf Ebrahimi Otherwise this case would be handled by the normal code path. */
8273*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8274*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8275*22dc650dSSadaf Ebrahimi
8276*22dc650dSSadaf Ebrahimi /* Optimizing character pairs, if their difference is power of 2. */
8277*22dc650dSSadaf Ebrahimi if (is_powerof2(other_cases[1] ^ other_cases[0]))
8278*22dc650dSSadaf Ebrahimi {
8279*22dc650dSSadaf Ebrahimi if (charoffset == 0)
8280*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8281*22dc650dSSadaf Ebrahimi else
8282*22dc650dSSadaf Ebrahimi {
8283*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8284*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8285*22dc650dSSadaf Ebrahimi }
8286*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8287*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8288*22dc650dSSadaf Ebrahimi other_cases += 2;
8289*22dc650dSSadaf Ebrahimi }
8290*22dc650dSSadaf Ebrahimi else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8291*22dc650dSSadaf Ebrahimi {
8292*22dc650dSSadaf Ebrahimi if (charoffset == 0)
8293*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8294*22dc650dSSadaf Ebrahimi else
8295*22dc650dSSadaf Ebrahimi {
8296*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8297*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8298*22dc650dSSadaf Ebrahimi }
8299*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8300*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8301*22dc650dSSadaf Ebrahimi
8302*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8303*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8304*22dc650dSSadaf Ebrahimi
8305*22dc650dSSadaf Ebrahimi other_cases += 3;
8306*22dc650dSSadaf Ebrahimi }
8307*22dc650dSSadaf Ebrahimi else
8308*22dc650dSSadaf Ebrahimi {
8309*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8310*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8311*22dc650dSSadaf Ebrahimi }
8312*22dc650dSSadaf Ebrahimi
8313*22dc650dSSadaf Ebrahimi while (*other_cases != NOTACHAR)
8314*22dc650dSSadaf Ebrahimi {
8315*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8316*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8317*22dc650dSSadaf Ebrahimi }
8318*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8319*22dc650dSSadaf Ebrahimi break;
8320*22dc650dSSadaf Ebrahimi
8321*22dc650dSSadaf Ebrahimi case PT_UCNC:
8322*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8323*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8324*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8325*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8326*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8327*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8328*22dc650dSSadaf Ebrahimi
8329*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0xa0);
8330*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8331*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8332*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0);
8333*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8334*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8335*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8336*22dc650dSSadaf Ebrahimi break;
8337*22dc650dSSadaf Ebrahimi
8338*22dc650dSSadaf Ebrahimi case PT_PXGRAPH:
8339*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8340*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8341*22dc650dSSadaf Ebrahimi
8342*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8343*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_ZERO);
8344*22dc650dSSadaf Ebrahimi
8345*22dc650dSSadaf Ebrahimi c = charoffset;
8346*22dc650dSSadaf Ebrahimi /* In case of ucp_Cf, we overwrite the result. */
8347*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0x2066);
8348*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8349*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8350*22dc650dSSadaf Ebrahimi
8351*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8352*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8353*22dc650dSSadaf Ebrahimi
8354*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8355*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8356*22dc650dSSadaf Ebrahimi
8357*22dc650dSSadaf Ebrahimi /* Restore charoffset. */
8358*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(c);
8359*22dc650dSSadaf Ebrahimi
8360*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
8361*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8362*22dc650dSSadaf Ebrahimi break;
8363*22dc650dSSadaf Ebrahimi
8364*22dc650dSSadaf Ebrahimi case PT_PXPRINT:
8365*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8366*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8367*22dc650dSSadaf Ebrahimi
8368*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8369*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_ZERO);
8370*22dc650dSSadaf Ebrahimi
8371*22dc650dSSadaf Ebrahimi c = charoffset;
8372*22dc650dSSadaf Ebrahimi /* In case of ucp_Cf, we overwrite the result. */
8373*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0x2066);
8374*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8375*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8376*22dc650dSSadaf Ebrahimi
8377*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8378*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8379*22dc650dSSadaf Ebrahimi
8380*22dc650dSSadaf Ebrahimi /* Restore charoffset. */
8381*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(c);
8382*22dc650dSSadaf Ebrahimi
8383*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
8384*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8385*22dc650dSSadaf Ebrahimi break;
8386*22dc650dSSadaf Ebrahimi
8387*22dc650dSSadaf Ebrahimi case PT_PXPUNCT:
8388*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8389*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8390*22dc650dSSadaf Ebrahimi
8391*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0);
8392*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8393*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8394*22dc650dSSadaf Ebrahimi
8395*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8396*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8397*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8398*22dc650dSSadaf Ebrahimi break;
8399*22dc650dSSadaf Ebrahimi
8400*22dc650dSSadaf Ebrahimi case PT_PXXDIGIT:
8401*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(CHAR_A);
8402*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8403*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8404*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8405*22dc650dSSadaf Ebrahimi
8406*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(CHAR_0);
8407*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8408*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8409*22dc650dSSadaf Ebrahimi
8410*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0xff10);
8411*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8412*22dc650dSSadaf Ebrahimi
8413*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8414*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8415*22dc650dSSadaf Ebrahimi
8416*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0xff21);
8417*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8418*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8419*22dc650dSSadaf Ebrahimi
8420*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0xff41);
8421*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8422*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8423*22dc650dSSadaf Ebrahimi
8424*22dc650dSSadaf Ebrahimi SET_CHAR_OFFSET(0xff10);
8425*22dc650dSSadaf Ebrahimi
8426*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
8427*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8428*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8429*22dc650dSSadaf Ebrahimi break;
8430*22dc650dSSadaf Ebrahimi
8431*22dc650dSSadaf Ebrahimi default:
8432*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
8433*22dc650dSSadaf Ebrahimi break;
8434*22dc650dSSadaf Ebrahimi }
8435*22dc650dSSadaf Ebrahimi cc += 2;
8436*22dc650dSSadaf Ebrahimi }
8437*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
8438*22dc650dSSadaf Ebrahimi
8439*22dc650dSSadaf Ebrahimi if (jump != NULL)
8440*22dc650dSSadaf Ebrahimi add_jump(compiler, compares > 0 ? list : backtracks, jump);
8441*22dc650dSSadaf Ebrahimi }
8442*22dc650dSSadaf Ebrahimi
8443*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(compares == 0);
8444*22dc650dSSadaf Ebrahimi if (found != NULL)
8445*22dc650dSSadaf Ebrahimi set_jumps(found, LABEL());
8446*22dc650dSSadaf Ebrahimi }
8447*22dc650dSSadaf Ebrahimi
8448*22dc650dSSadaf Ebrahimi #undef SET_TYPE_OFFSET
8449*22dc650dSSadaf Ebrahimi #undef SET_CHAR_OFFSET
8450*22dc650dSSadaf Ebrahimi
8451*22dc650dSSadaf Ebrahimi #endif
8452*22dc650dSSadaf Ebrahimi
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8453*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8454*22dc650dSSadaf Ebrahimi {
8455*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
8456*22dc650dSSadaf Ebrahimi struct sljit_jump *jump[4];
8457*22dc650dSSadaf Ebrahimi
8458*22dc650dSSadaf Ebrahimi switch(type)
8459*22dc650dSSadaf Ebrahimi {
8460*22dc650dSSadaf Ebrahimi case OP_SOD:
8461*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8462*22dc650dSSadaf Ebrahimi {
8463*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8464*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8465*22dc650dSSadaf Ebrahimi }
8466*22dc650dSSadaf Ebrahimi else
8467*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8468*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8469*22dc650dSSadaf Ebrahimi return cc;
8470*22dc650dSSadaf Ebrahimi
8471*22dc650dSSadaf Ebrahimi case OP_SOM:
8472*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8473*22dc650dSSadaf Ebrahimi {
8474*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8475*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8476*22dc650dSSadaf Ebrahimi }
8477*22dc650dSSadaf Ebrahimi else
8478*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8479*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8480*22dc650dSSadaf Ebrahimi return cc;
8481*22dc650dSSadaf Ebrahimi
8482*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
8483*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
8484*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
8485*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
8486*22dc650dSSadaf Ebrahimi add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8487*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
8488*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
8489*22dc650dSSadaf Ebrahimi {
8490*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8491*22dc650dSSadaf Ebrahimi return cc;
8492*22dc650dSSadaf Ebrahimi }
8493*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
8494*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_SET_Z);
8495*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8496*22dc650dSSadaf Ebrahimi return cc;
8497*22dc650dSSadaf Ebrahimi
8498*22dc650dSSadaf Ebrahimi case OP_EODN:
8499*22dc650dSSadaf Ebrahimi /* Requires rather complex checks. */
8500*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8501*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8502*22dc650dSSadaf Ebrahimi {
8503*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8504*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8505*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
8506*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8507*22dc650dSSadaf Ebrahimi else
8508*22dc650dSSadaf Ebrahimi {
8509*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8510*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8511*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8512*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8513*22dc650dSSadaf Ebrahimi OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8514*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8515*22dc650dSSadaf Ebrahimi check_partial(common, TRUE);
8516*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8517*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
8518*22dc650dSSadaf Ebrahimi }
8519*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8520*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8521*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8522*22dc650dSSadaf Ebrahimi }
8523*22dc650dSSadaf Ebrahimi else if (common->nltype == NLTYPE_FIXED)
8524*22dc650dSSadaf Ebrahimi {
8525*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8526*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8527*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8528*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8529*22dc650dSSadaf Ebrahimi }
8530*22dc650dSSadaf Ebrahimi else
8531*22dc650dSSadaf Ebrahimi {
8532*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8533*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8534*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8535*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8536*22dc650dSSadaf Ebrahimi jump[2] = JUMP(SLJIT_GREATER);
8537*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8538*22dc650dSSadaf Ebrahimi /* Equal. */
8539*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8540*22dc650dSSadaf Ebrahimi jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8541*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8542*22dc650dSSadaf Ebrahimi
8543*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
8544*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANYCRLF)
8545*22dc650dSSadaf Ebrahimi {
8546*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8547*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8548*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8549*22dc650dSSadaf Ebrahimi }
8550*22dc650dSSadaf Ebrahimi else
8551*22dc650dSSadaf Ebrahimi {
8552*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8553*22dc650dSSadaf Ebrahimi read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8554*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8555*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8556*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_SET_Z);
8557*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8558*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8559*22dc650dSSadaf Ebrahimi }
8560*22dc650dSSadaf Ebrahimi JUMPHERE(jump[2]);
8561*22dc650dSSadaf Ebrahimi JUMPHERE(jump[3]);
8562*22dc650dSSadaf Ebrahimi }
8563*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
8564*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
8565*22dc650dSSadaf Ebrahimi check_partial(common, TRUE);
8566*22dc650dSSadaf Ebrahimi return cc;
8567*22dc650dSSadaf Ebrahimi
8568*22dc650dSSadaf Ebrahimi case OP_EOD:
8569*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8570*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
8571*22dc650dSSadaf Ebrahimi check_partial(common, TRUE);
8572*22dc650dSSadaf Ebrahimi return cc;
8573*22dc650dSSadaf Ebrahimi
8574*22dc650dSSadaf Ebrahimi case OP_DOLL:
8575*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8576*22dc650dSSadaf Ebrahimi {
8577*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8578*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8579*22dc650dSSadaf Ebrahimi }
8580*22dc650dSSadaf Ebrahimi else
8581*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8582*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8583*22dc650dSSadaf Ebrahimi
8584*22dc650dSSadaf Ebrahimi if (!common->endonly)
8585*22dc650dSSadaf Ebrahimi compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8586*22dc650dSSadaf Ebrahimi else
8587*22dc650dSSadaf Ebrahimi {
8588*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8589*22dc650dSSadaf Ebrahimi check_partial(common, FALSE);
8590*22dc650dSSadaf Ebrahimi }
8591*22dc650dSSadaf Ebrahimi return cc;
8592*22dc650dSSadaf Ebrahimi
8593*22dc650dSSadaf Ebrahimi case OP_DOLLM:
8594*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8595*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8596*22dc650dSSadaf Ebrahimi {
8597*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8598*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8599*22dc650dSSadaf Ebrahimi }
8600*22dc650dSSadaf Ebrahimi else
8601*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8602*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8603*22dc650dSSadaf Ebrahimi check_partial(common, FALSE);
8604*22dc650dSSadaf Ebrahimi jump[0] = JUMP(SLJIT_JUMP);
8605*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
8606*22dc650dSSadaf Ebrahimi
8607*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8608*22dc650dSSadaf Ebrahimi {
8609*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8610*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8611*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
8612*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8613*22dc650dSSadaf Ebrahimi else
8614*22dc650dSSadaf Ebrahimi {
8615*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8616*22dc650dSSadaf Ebrahimi /* STR_PTR = STR_END - IN_UCHARS(1) */
8617*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8618*22dc650dSSadaf Ebrahimi check_partial(common, TRUE);
8619*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8620*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
8621*22dc650dSSadaf Ebrahimi }
8622*22dc650dSSadaf Ebrahimi
8623*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8624*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8625*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8626*22dc650dSSadaf Ebrahimi }
8627*22dc650dSSadaf Ebrahimi else
8628*22dc650dSSadaf Ebrahimi {
8629*22dc650dSSadaf Ebrahimi peek_char(common, common->nlmax, TMP3, 0, NULL);
8630*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->nltype, backtracks, FALSE);
8631*22dc650dSSadaf Ebrahimi }
8632*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
8633*22dc650dSSadaf Ebrahimi return cc;
8634*22dc650dSSadaf Ebrahimi
8635*22dc650dSSadaf Ebrahimi case OP_CIRC:
8636*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8637*22dc650dSSadaf Ebrahimi {
8638*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8639*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8640*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8641*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8642*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8643*22dc650dSSadaf Ebrahimi }
8644*22dc650dSSadaf Ebrahimi else
8645*22dc650dSSadaf Ebrahimi {
8646*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8647*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8648*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8650*22dc650dSSadaf Ebrahimi }
8651*22dc650dSSadaf Ebrahimi return cc;
8652*22dc650dSSadaf Ebrahimi
8653*22dc650dSSadaf Ebrahimi case OP_CIRCM:
8654*22dc650dSSadaf Ebrahimi /* TMP2 might be used by peek_char_back. */
8655*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
8656*22dc650dSSadaf Ebrahimi {
8657*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8658*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8659*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8660*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8661*22dc650dSSadaf Ebrahimi }
8662*22dc650dSSadaf Ebrahimi else
8663*22dc650dSSadaf Ebrahimi {
8664*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8665*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8666*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8667*22dc650dSSadaf Ebrahimi }
8668*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8669*22dc650dSSadaf Ebrahimi jump[0] = JUMP(SLJIT_JUMP);
8670*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
8671*22dc650dSSadaf Ebrahimi
8672*22dc650dSSadaf Ebrahimi if (!common->alt_circumflex)
8673*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8674*22dc650dSSadaf Ebrahimi
8675*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8676*22dc650dSSadaf Ebrahimi {
8677*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8678*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8679*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8680*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8681*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8682*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8683*22dc650dSSadaf Ebrahimi }
8684*22dc650dSSadaf Ebrahimi else
8685*22dc650dSSadaf Ebrahimi {
8686*22dc650dSSadaf Ebrahimi peek_char_back(common, common->nlmax, backtracks);
8687*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->nltype, backtracks, FALSE);
8688*22dc650dSSadaf Ebrahimi }
8689*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
8690*22dc650dSSadaf Ebrahimi return cc;
8691*22dc650dSSadaf Ebrahimi }
8692*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
8693*22dc650dSSadaf Ebrahimi return cc;
8694*22dc650dSSadaf Ebrahimi }
8695*22dc650dSSadaf Ebrahimi
8696*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
8697*22dc650dSSadaf Ebrahimi
8698*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 32
8699*22dc650dSSadaf Ebrahimi
8700*22dc650dSSadaf Ebrahimi /* The code in this function copies the logic of the interpreter function that
8701*22dc650dSSadaf Ebrahimi is defined in the pcre2_extuni.c source. If that code is updated, this
8702*22dc650dSSadaf Ebrahimi function, and those below it, must be kept in step (note by PH, June 2024). */
8703*22dc650dSSadaf Ebrahimi
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8704*22dc650dSSadaf Ebrahimi static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8705*22dc650dSSadaf Ebrahimi {
8706*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject = args->begin;
8707*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject = args->end;
8708*22dc650dSSadaf Ebrahimi int lgb, rgb, ricount;
8709*22dc650dSSadaf Ebrahimi PCRE2_SPTR prevcc, endcc, bptr;
8710*22dc650dSSadaf Ebrahimi BOOL first = TRUE;
8711*22dc650dSSadaf Ebrahimi BOOL was_ep_ZWJ = FALSE;
8712*22dc650dSSadaf Ebrahimi uint32_t c;
8713*22dc650dSSadaf Ebrahimi
8714*22dc650dSSadaf Ebrahimi prevcc = cc;
8715*22dc650dSSadaf Ebrahimi endcc = NULL;
8716*22dc650dSSadaf Ebrahimi do
8717*22dc650dSSadaf Ebrahimi {
8718*22dc650dSSadaf Ebrahimi GETCHARINC(c, cc);
8719*22dc650dSSadaf Ebrahimi rgb = UCD_GRAPHBREAK(c);
8720*22dc650dSSadaf Ebrahimi
8721*22dc650dSSadaf Ebrahimi if (first)
8722*22dc650dSSadaf Ebrahimi {
8723*22dc650dSSadaf Ebrahimi lgb = rgb;
8724*22dc650dSSadaf Ebrahimi endcc = cc;
8725*22dc650dSSadaf Ebrahimi first = FALSE;
8726*22dc650dSSadaf Ebrahimi continue;
8727*22dc650dSSadaf Ebrahimi }
8728*22dc650dSSadaf Ebrahimi
8729*22dc650dSSadaf Ebrahimi if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8730*22dc650dSSadaf Ebrahimi break;
8731*22dc650dSSadaf Ebrahimi
8732*22dc650dSSadaf Ebrahimi /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8733*22dc650dSSadaf Ebrahimi preceded by Extended Pictographic. */
8734*22dc650dSSadaf Ebrahimi
8735*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8736*22dc650dSSadaf Ebrahimi break;
8737*22dc650dSSadaf Ebrahimi
8738*22dc650dSSadaf Ebrahimi /* Not breaking between Regional Indicators is allowed only if there
8739*22dc650dSSadaf Ebrahimi are an even number of preceding RIs. */
8740*22dc650dSSadaf Ebrahimi
8741*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8742*22dc650dSSadaf Ebrahimi {
8743*22dc650dSSadaf Ebrahimi ricount = 0;
8744*22dc650dSSadaf Ebrahimi bptr = prevcc;
8745*22dc650dSSadaf Ebrahimi
8746*22dc650dSSadaf Ebrahimi /* bptr is pointing to the left-hand character */
8747*22dc650dSSadaf Ebrahimi while (bptr > start_subject)
8748*22dc650dSSadaf Ebrahimi {
8749*22dc650dSSadaf Ebrahimi bptr--;
8750*22dc650dSSadaf Ebrahimi BACKCHAR(bptr);
8751*22dc650dSSadaf Ebrahimi GETCHAR(c, bptr);
8752*22dc650dSSadaf Ebrahimi
8753*22dc650dSSadaf Ebrahimi if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8754*22dc650dSSadaf Ebrahimi break;
8755*22dc650dSSadaf Ebrahimi
8756*22dc650dSSadaf Ebrahimi ricount++;
8757*22dc650dSSadaf Ebrahimi }
8758*22dc650dSSadaf Ebrahimi
8759*22dc650dSSadaf Ebrahimi if ((ricount & 1) != 0) break; /* Grapheme break required */
8760*22dc650dSSadaf Ebrahimi }
8761*22dc650dSSadaf Ebrahimi
8762*22dc650dSSadaf Ebrahimi /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8763*22dc650dSSadaf Ebrahimi between; see next statement). */
8764*22dc650dSSadaf Ebrahimi
8765*22dc650dSSadaf Ebrahimi was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8766*22dc650dSSadaf Ebrahimi
8767*22dc650dSSadaf Ebrahimi /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8768*22dc650dSSadaf Ebrahimi any number of them before a following ZWJ. */
8769*22dc650dSSadaf Ebrahimi
8770*22dc650dSSadaf Ebrahimi if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8771*22dc650dSSadaf Ebrahimi lgb = rgb;
8772*22dc650dSSadaf Ebrahimi
8773*22dc650dSSadaf Ebrahimi prevcc = endcc;
8774*22dc650dSSadaf Ebrahimi endcc = cc;
8775*22dc650dSSadaf Ebrahimi }
8776*22dc650dSSadaf Ebrahimi while (cc < end_subject);
8777*22dc650dSSadaf Ebrahimi
8778*22dc650dSSadaf Ebrahimi return endcc;
8779*22dc650dSSadaf Ebrahimi }
8780*22dc650dSSadaf Ebrahimi
8781*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8782*22dc650dSSadaf Ebrahimi
8783*22dc650dSSadaf Ebrahimi /* The code in this function copies the logic of the interpreter function that
8784*22dc650dSSadaf Ebrahimi is defined in the pcre2_extuni.c source. If that code is updated, this
8785*22dc650dSSadaf Ebrahimi function, and the one below it, must be kept in step (note by PH, June 2024). */
8786*22dc650dSSadaf Ebrahimi
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8787*22dc650dSSadaf Ebrahimi static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8788*22dc650dSSadaf Ebrahimi {
8789*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject = args->begin;
8790*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject = args->end;
8791*22dc650dSSadaf Ebrahimi int lgb, rgb, ricount;
8792*22dc650dSSadaf Ebrahimi PCRE2_SPTR prevcc, endcc, bptr;
8793*22dc650dSSadaf Ebrahimi BOOL first = TRUE;
8794*22dc650dSSadaf Ebrahimi BOOL was_ep_ZWJ = FALSE;
8795*22dc650dSSadaf Ebrahimi uint32_t c;
8796*22dc650dSSadaf Ebrahimi
8797*22dc650dSSadaf Ebrahimi prevcc = cc;
8798*22dc650dSSadaf Ebrahimi endcc = NULL;
8799*22dc650dSSadaf Ebrahimi do
8800*22dc650dSSadaf Ebrahimi {
8801*22dc650dSSadaf Ebrahimi GETCHARINC_INVALID(c, cc, end_subject, break);
8802*22dc650dSSadaf Ebrahimi rgb = UCD_GRAPHBREAK(c);
8803*22dc650dSSadaf Ebrahimi
8804*22dc650dSSadaf Ebrahimi if (first)
8805*22dc650dSSadaf Ebrahimi {
8806*22dc650dSSadaf Ebrahimi lgb = rgb;
8807*22dc650dSSadaf Ebrahimi endcc = cc;
8808*22dc650dSSadaf Ebrahimi first = FALSE;
8809*22dc650dSSadaf Ebrahimi continue;
8810*22dc650dSSadaf Ebrahimi }
8811*22dc650dSSadaf Ebrahimi
8812*22dc650dSSadaf Ebrahimi if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8813*22dc650dSSadaf Ebrahimi break;
8814*22dc650dSSadaf Ebrahimi
8815*22dc650dSSadaf Ebrahimi /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8816*22dc650dSSadaf Ebrahimi preceded by Extended Pictographic. */
8817*22dc650dSSadaf Ebrahimi
8818*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8819*22dc650dSSadaf Ebrahimi break;
8820*22dc650dSSadaf Ebrahimi
8821*22dc650dSSadaf Ebrahimi /* Not breaking between Regional Indicators is allowed only if there
8822*22dc650dSSadaf Ebrahimi are an even number of preceding RIs. */
8823*22dc650dSSadaf Ebrahimi
8824*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8825*22dc650dSSadaf Ebrahimi {
8826*22dc650dSSadaf Ebrahimi ricount = 0;
8827*22dc650dSSadaf Ebrahimi bptr = prevcc;
8828*22dc650dSSadaf Ebrahimi
8829*22dc650dSSadaf Ebrahimi /* bptr is pointing to the left-hand character */
8830*22dc650dSSadaf Ebrahimi while (bptr > start_subject)
8831*22dc650dSSadaf Ebrahimi {
8832*22dc650dSSadaf Ebrahimi GETCHARBACK_INVALID(c, bptr, start_subject, break);
8833*22dc650dSSadaf Ebrahimi
8834*22dc650dSSadaf Ebrahimi if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8835*22dc650dSSadaf Ebrahimi break;
8836*22dc650dSSadaf Ebrahimi
8837*22dc650dSSadaf Ebrahimi ricount++;
8838*22dc650dSSadaf Ebrahimi }
8839*22dc650dSSadaf Ebrahimi
8840*22dc650dSSadaf Ebrahimi if ((ricount & 1) != 0)
8841*22dc650dSSadaf Ebrahimi break; /* Grapheme break required */
8842*22dc650dSSadaf Ebrahimi }
8843*22dc650dSSadaf Ebrahimi
8844*22dc650dSSadaf Ebrahimi /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8845*22dc650dSSadaf Ebrahimi between; see next statement). */
8846*22dc650dSSadaf Ebrahimi
8847*22dc650dSSadaf Ebrahimi was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8848*22dc650dSSadaf Ebrahimi
8849*22dc650dSSadaf Ebrahimi /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8850*22dc650dSSadaf Ebrahimi any number of them before a following ZWJ. */
8851*22dc650dSSadaf Ebrahimi
8852*22dc650dSSadaf Ebrahimi if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8853*22dc650dSSadaf Ebrahimi lgb = rgb;
8854*22dc650dSSadaf Ebrahimi
8855*22dc650dSSadaf Ebrahimi prevcc = endcc;
8856*22dc650dSSadaf Ebrahimi endcc = cc;
8857*22dc650dSSadaf Ebrahimi }
8858*22dc650dSSadaf Ebrahimi while (cc < end_subject);
8859*22dc650dSSadaf Ebrahimi
8860*22dc650dSSadaf Ebrahimi return endcc;
8861*22dc650dSSadaf Ebrahimi }
8862*22dc650dSSadaf Ebrahimi
8863*22dc650dSSadaf Ebrahimi /* The code in this function copies the logic of the interpreter function that
8864*22dc650dSSadaf Ebrahimi is defined in the pcre2_extuni.c source. If that code is updated, this
8865*22dc650dSSadaf Ebrahimi function must be kept in step (note by PH, June 2024). */
8866*22dc650dSSadaf Ebrahimi
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8867*22dc650dSSadaf Ebrahimi static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8868*22dc650dSSadaf Ebrahimi {
8869*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject = args->begin;
8870*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject = args->end;
8871*22dc650dSSadaf Ebrahimi int lgb, rgb, ricount;
8872*22dc650dSSadaf Ebrahimi PCRE2_SPTR bptr;
8873*22dc650dSSadaf Ebrahimi uint32_t c;
8874*22dc650dSSadaf Ebrahimi BOOL was_ep_ZWJ = FALSE;
8875*22dc650dSSadaf Ebrahimi
8876*22dc650dSSadaf Ebrahimi /* Patch by PH */
8877*22dc650dSSadaf Ebrahimi /* GETCHARINC(c, cc); */
8878*22dc650dSSadaf Ebrahimi c = *cc++;
8879*22dc650dSSadaf Ebrahimi
8880*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
8881*22dc650dSSadaf Ebrahimi if (c >= 0x110000)
8882*22dc650dSSadaf Ebrahimi return cc;
8883*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8884*22dc650dSSadaf Ebrahimi lgb = UCD_GRAPHBREAK(c);
8885*22dc650dSSadaf Ebrahimi
8886*22dc650dSSadaf Ebrahimi while (cc < end_subject)
8887*22dc650dSSadaf Ebrahimi {
8888*22dc650dSSadaf Ebrahimi c = *cc;
8889*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
8890*22dc650dSSadaf Ebrahimi if (c >= 0x110000)
8891*22dc650dSSadaf Ebrahimi break;
8892*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8893*22dc650dSSadaf Ebrahimi rgb = UCD_GRAPHBREAK(c);
8894*22dc650dSSadaf Ebrahimi
8895*22dc650dSSadaf Ebrahimi if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8896*22dc650dSSadaf Ebrahimi break;
8897*22dc650dSSadaf Ebrahimi
8898*22dc650dSSadaf Ebrahimi /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8899*22dc650dSSadaf Ebrahimi preceded by Extended Pictographic. */
8900*22dc650dSSadaf Ebrahimi
8901*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8902*22dc650dSSadaf Ebrahimi break;
8903*22dc650dSSadaf Ebrahimi
8904*22dc650dSSadaf Ebrahimi /* Not breaking between Regional Indicators is allowed only if there
8905*22dc650dSSadaf Ebrahimi are an even number of preceding RIs. */
8906*22dc650dSSadaf Ebrahimi
8907*22dc650dSSadaf Ebrahimi if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8908*22dc650dSSadaf Ebrahimi {
8909*22dc650dSSadaf Ebrahimi ricount = 0;
8910*22dc650dSSadaf Ebrahimi bptr = cc - 1;
8911*22dc650dSSadaf Ebrahimi
8912*22dc650dSSadaf Ebrahimi /* bptr is pointing to the left-hand character */
8913*22dc650dSSadaf Ebrahimi while (bptr > start_subject)
8914*22dc650dSSadaf Ebrahimi {
8915*22dc650dSSadaf Ebrahimi bptr--;
8916*22dc650dSSadaf Ebrahimi c = *bptr;
8917*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
8918*22dc650dSSadaf Ebrahimi if (c >= 0x110000)
8919*22dc650dSSadaf Ebrahimi break;
8920*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8921*22dc650dSSadaf Ebrahimi
8922*22dc650dSSadaf Ebrahimi if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8923*22dc650dSSadaf Ebrahimi
8924*22dc650dSSadaf Ebrahimi ricount++;
8925*22dc650dSSadaf Ebrahimi }
8926*22dc650dSSadaf Ebrahimi
8927*22dc650dSSadaf Ebrahimi if ((ricount & 1) != 0)
8928*22dc650dSSadaf Ebrahimi break; /* Grapheme break required */
8929*22dc650dSSadaf Ebrahimi }
8930*22dc650dSSadaf Ebrahimi
8931*22dc650dSSadaf Ebrahimi /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8932*22dc650dSSadaf Ebrahimi between; see next statement). */
8933*22dc650dSSadaf Ebrahimi
8934*22dc650dSSadaf Ebrahimi was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8935*22dc650dSSadaf Ebrahimi
8936*22dc650dSSadaf Ebrahimi /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8937*22dc650dSSadaf Ebrahimi any number of them before a following ZWJ. */
8938*22dc650dSSadaf Ebrahimi
8939*22dc650dSSadaf Ebrahimi if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8940*22dc650dSSadaf Ebrahimi lgb = rgb;
8941*22dc650dSSadaf Ebrahimi
8942*22dc650dSSadaf Ebrahimi cc++;
8943*22dc650dSSadaf Ebrahimi }
8944*22dc650dSSadaf Ebrahimi
8945*22dc650dSSadaf Ebrahimi return cc;
8946*22dc650dSSadaf Ebrahimi }
8947*22dc650dSSadaf Ebrahimi
8948*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
8949*22dc650dSSadaf Ebrahimi
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8950*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8951*22dc650dSSadaf Ebrahimi {
8952*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
8953*22dc650dSSadaf Ebrahimi int length;
8954*22dc650dSSadaf Ebrahimi unsigned int c, oc, bit;
8955*22dc650dSSadaf Ebrahimi compare_context context;
8956*22dc650dSSadaf Ebrahimi struct sljit_jump *jump[3];
8957*22dc650dSSadaf Ebrahimi jump_list *end_list;
8958*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
8959*22dc650dSSadaf Ebrahimi PCRE2_UCHAR propdata[5];
8960*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
8961*22dc650dSSadaf Ebrahimi
8962*22dc650dSSadaf Ebrahimi switch(type)
8963*22dc650dSSadaf Ebrahimi {
8964*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
8965*22dc650dSSadaf Ebrahimi case OP_DIGIT:
8966*22dc650dSSadaf Ebrahimi /* Digits are usually 0-9, so it is worth to optimize them. */
8967*22dc650dSSadaf Ebrahimi if (check_str_ptr)
8968*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
8969*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8970*22dc650dSSadaf Ebrahimi if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8971*22dc650dSSadaf Ebrahimi read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8972*22dc650dSSadaf Ebrahimi else
8973*22dc650dSSadaf Ebrahimi #endif
8974*22dc650dSSadaf Ebrahimi read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8975*22dc650dSSadaf Ebrahimi /* Flip the starting bit in the negative case. */
8976*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8977*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8978*22dc650dSSadaf Ebrahimi return cc;
8979*22dc650dSSadaf Ebrahimi
8980*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
8981*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
8982*22dc650dSSadaf Ebrahimi if (check_str_ptr)
8983*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
8984*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8985*22dc650dSSadaf Ebrahimi if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8986*22dc650dSSadaf Ebrahimi read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8987*22dc650dSSadaf Ebrahimi else
8988*22dc650dSSadaf Ebrahimi #endif
8989*22dc650dSSadaf Ebrahimi read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8990*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8991*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8992*22dc650dSSadaf Ebrahimi return cc;
8993*22dc650dSSadaf Ebrahimi
8994*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
8995*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
8996*22dc650dSSadaf Ebrahimi if (check_str_ptr)
8997*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
8998*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8999*22dc650dSSadaf Ebrahimi if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
9000*22dc650dSSadaf Ebrahimi read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
9001*22dc650dSSadaf Ebrahimi else
9002*22dc650dSSadaf Ebrahimi #endif
9003*22dc650dSSadaf Ebrahimi read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
9004*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
9005*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
9006*22dc650dSSadaf Ebrahimi return cc;
9007*22dc650dSSadaf Ebrahimi
9008*22dc650dSSadaf Ebrahimi case OP_ANY:
9009*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9010*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9011*22dc650dSSadaf Ebrahimi read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
9012*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED && common->newline > 255)
9013*22dc650dSSadaf Ebrahimi {
9014*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
9015*22dc650dSSadaf Ebrahimi end_list = NULL;
9016*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9017*22dc650dSSadaf Ebrahimi add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9018*22dc650dSSadaf Ebrahimi else
9019*22dc650dSSadaf Ebrahimi check_str_end(common, &end_list);
9020*22dc650dSSadaf Ebrahimi
9021*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9022*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
9023*22dc650dSSadaf Ebrahimi set_jumps(end_list, LABEL());
9024*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9025*22dc650dSSadaf Ebrahimi }
9026*22dc650dSSadaf Ebrahimi else
9027*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->nltype, backtracks, TRUE);
9028*22dc650dSSadaf Ebrahimi return cc;
9029*22dc650dSSadaf Ebrahimi
9030*22dc650dSSadaf Ebrahimi case OP_ALLANY:
9031*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9032*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9033*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9034*22dc650dSSadaf Ebrahimi if (common->utf && common->invalid_utf)
9035*22dc650dSSadaf Ebrahimi {
9036*22dc650dSSadaf Ebrahimi read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
9037*22dc650dSSadaf Ebrahimi return cc;
9038*22dc650dSSadaf Ebrahimi }
9039*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
9040*22dc650dSSadaf Ebrahimi
9041*22dc650dSSadaf Ebrahimi skip_valid_char(common);
9042*22dc650dSSadaf Ebrahimi return cc;
9043*22dc650dSSadaf Ebrahimi
9044*22dc650dSSadaf Ebrahimi case OP_ANYBYTE:
9045*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9046*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9047*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9048*22dc650dSSadaf Ebrahimi return cc;
9049*22dc650dSSadaf Ebrahimi
9050*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9051*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
9052*22dc650dSSadaf Ebrahimi case OP_PROP:
9053*22dc650dSSadaf Ebrahimi propdata[0] = XCL_HASPROP;
9054*22dc650dSSadaf Ebrahimi propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
9055*22dc650dSSadaf Ebrahimi propdata[2] = cc[0];
9056*22dc650dSSadaf Ebrahimi propdata[3] = cc[1];
9057*22dc650dSSadaf Ebrahimi propdata[4] = XCL_END;
9058*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9059*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9060*22dc650dSSadaf Ebrahimi compile_xclass_matchingpath(common, propdata, backtracks);
9061*22dc650dSSadaf Ebrahimi return cc + 2;
9062*22dc650dSSadaf Ebrahimi #endif
9063*22dc650dSSadaf Ebrahimi
9064*22dc650dSSadaf Ebrahimi case OP_ANYNL:
9065*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9066*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9067*22dc650dSSadaf Ebrahimi read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9068*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9069*22dc650dSSadaf Ebrahimi /* We don't need to handle soft partial matching case. */
9070*22dc650dSSadaf Ebrahimi end_list = NULL;
9071*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9072*22dc650dSSadaf Ebrahimi add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9073*22dc650dSSadaf Ebrahimi else
9074*22dc650dSSadaf Ebrahimi check_str_end(common, &end_list);
9075*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9076*22dc650dSSadaf Ebrahimi jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9077*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9078*22dc650dSSadaf Ebrahimi jump[2] = JUMP(SLJIT_JUMP);
9079*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9080*22dc650dSSadaf Ebrahimi check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9081*22dc650dSSadaf Ebrahimi set_jumps(end_list, LABEL());
9082*22dc650dSSadaf Ebrahimi JUMPHERE(jump[1]);
9083*22dc650dSSadaf Ebrahimi JUMPHERE(jump[2]);
9084*22dc650dSSadaf Ebrahimi return cc;
9085*22dc650dSSadaf Ebrahimi
9086*22dc650dSSadaf Ebrahimi case OP_NOT_HSPACE:
9087*22dc650dSSadaf Ebrahimi case OP_HSPACE:
9088*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9089*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9090*22dc650dSSadaf Ebrahimi
9091*22dc650dSSadaf Ebrahimi if (type == OP_NOT_HSPACE)
9092*22dc650dSSadaf Ebrahimi read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9093*22dc650dSSadaf Ebrahimi else
9094*22dc650dSSadaf Ebrahimi read_char(common, 0x9, 0x3000, NULL, 0);
9095*22dc650dSSadaf Ebrahimi
9096*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9097*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_SET_Z);
9098*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9099*22dc650dSSadaf Ebrahimi return cc;
9100*22dc650dSSadaf Ebrahimi
9101*22dc650dSSadaf Ebrahimi case OP_NOT_VSPACE:
9102*22dc650dSSadaf Ebrahimi case OP_VSPACE:
9103*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9104*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9105*22dc650dSSadaf Ebrahimi
9106*22dc650dSSadaf Ebrahimi if (type == OP_NOT_VSPACE)
9107*22dc650dSSadaf Ebrahimi read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9108*22dc650dSSadaf Ebrahimi else
9109*22dc650dSSadaf Ebrahimi read_char(common, 0xa, 0x2029, NULL, 0);
9110*22dc650dSSadaf Ebrahimi
9111*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9112*22dc650dSSadaf Ebrahimi sljit_set_current_flags(compiler, SLJIT_SET_Z);
9113*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9114*22dc650dSSadaf Ebrahimi return cc;
9115*22dc650dSSadaf Ebrahimi
9116*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9117*22dc650dSSadaf Ebrahimi case OP_EXTUNI:
9118*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9119*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9120*22dc650dSSadaf Ebrahimi
9121*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9122*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9123*22dc650dSSadaf Ebrahimi
9124*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 32
9125*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9126*22dc650dSSadaf Ebrahimi common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9127*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
9128*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9129*22dc650dSSadaf Ebrahimi #else
9130*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9131*22dc650dSSadaf Ebrahimi common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9132*22dc650dSSadaf Ebrahimi if (common->invalid_utf)
9133*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9134*22dc650dSSadaf Ebrahimi #endif
9135*22dc650dSSadaf Ebrahimi
9136*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9137*22dc650dSSadaf Ebrahimi
9138*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9139*22dc650dSSadaf Ebrahimi {
9140*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9141*22dc650dSSadaf Ebrahimi /* Since we successfully read a char above, partial matching must occure. */
9142*22dc650dSSadaf Ebrahimi check_partial(common, TRUE);
9143*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9144*22dc650dSSadaf Ebrahimi }
9145*22dc650dSSadaf Ebrahimi return cc;
9146*22dc650dSSadaf Ebrahimi #endif
9147*22dc650dSSadaf Ebrahimi
9148*22dc650dSSadaf Ebrahimi case OP_CHAR:
9149*22dc650dSSadaf Ebrahimi case OP_CHARI:
9150*22dc650dSSadaf Ebrahimi length = 1;
9151*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9152*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9153*22dc650dSSadaf Ebrahimi #endif
9154*22dc650dSSadaf Ebrahimi
9155*22dc650dSSadaf Ebrahimi if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9156*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9157*22dc650dSSadaf Ebrahimi
9158*22dc650dSSadaf Ebrahimi if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9159*22dc650dSSadaf Ebrahimi {
9160*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9161*22dc650dSSadaf Ebrahimi if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9162*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9163*22dc650dSSadaf Ebrahimi
9164*22dc650dSSadaf Ebrahimi context.length = IN_UCHARS(length);
9165*22dc650dSSadaf Ebrahimi context.sourcereg = -1;
9166*22dc650dSSadaf Ebrahimi #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9167*22dc650dSSadaf Ebrahimi context.ucharptr = 0;
9168*22dc650dSSadaf Ebrahimi #endif
9169*22dc650dSSadaf Ebrahimi return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9170*22dc650dSSadaf Ebrahimi }
9171*22dc650dSSadaf Ebrahimi
9172*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9173*22dc650dSSadaf Ebrahimi if (common->utf)
9174*22dc650dSSadaf Ebrahimi {
9175*22dc650dSSadaf Ebrahimi GETCHAR(c, cc);
9176*22dc650dSSadaf Ebrahimi }
9177*22dc650dSSadaf Ebrahimi else
9178*22dc650dSSadaf Ebrahimi #endif
9179*22dc650dSSadaf Ebrahimi c = *cc;
9180*22dc650dSSadaf Ebrahimi
9181*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9182*22dc650dSSadaf Ebrahimi
9183*22dc650dSSadaf Ebrahimi if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9184*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9185*22dc650dSSadaf Ebrahimi
9186*22dc650dSSadaf Ebrahimi oc = char_othercase(common, c);
9187*22dc650dSSadaf Ebrahimi read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9188*22dc650dSSadaf Ebrahimi
9189*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!is_powerof2(c ^ oc));
9190*22dc650dSSadaf Ebrahimi
9191*22dc650dSSadaf Ebrahimi if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9192*22dc650dSSadaf Ebrahimi {
9193*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9194*22dc650dSSadaf Ebrahimi SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9195*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9196*22dc650dSSadaf Ebrahimi }
9197*22dc650dSSadaf Ebrahimi else
9198*22dc650dSSadaf Ebrahimi {
9199*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9200*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9201*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9202*22dc650dSSadaf Ebrahimi }
9203*22dc650dSSadaf Ebrahimi return cc + length;
9204*22dc650dSSadaf Ebrahimi
9205*22dc650dSSadaf Ebrahimi case OP_NOT:
9206*22dc650dSSadaf Ebrahimi case OP_NOTI:
9207*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9208*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9209*22dc650dSSadaf Ebrahimi
9210*22dc650dSSadaf Ebrahimi length = 1;
9211*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9212*22dc650dSSadaf Ebrahimi if (common->utf)
9213*22dc650dSSadaf Ebrahimi {
9214*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
9215*22dc650dSSadaf Ebrahimi c = *cc;
9216*22dc650dSSadaf Ebrahimi if (c < 128 && !common->invalid_utf)
9217*22dc650dSSadaf Ebrahimi {
9218*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9219*22dc650dSSadaf Ebrahimi if (type == OP_NOT || !char_has_othercase(common, cc))
9220*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9221*22dc650dSSadaf Ebrahimi else
9222*22dc650dSSadaf Ebrahimi {
9223*22dc650dSSadaf Ebrahimi /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9224*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9225*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9226*22dc650dSSadaf Ebrahimi }
9227*22dc650dSSadaf Ebrahimi /* Skip the variable-length character. */
9228*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9229*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9230*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9231*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9232*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9233*22dc650dSSadaf Ebrahimi return cc + 1;
9234*22dc650dSSadaf Ebrahimi }
9235*22dc650dSSadaf Ebrahimi else
9236*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9237*22dc650dSSadaf Ebrahimi {
9238*22dc650dSSadaf Ebrahimi GETCHARLEN(c, cc, length);
9239*22dc650dSSadaf Ebrahimi }
9240*22dc650dSSadaf Ebrahimi }
9241*22dc650dSSadaf Ebrahimi else
9242*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
9243*22dc650dSSadaf Ebrahimi c = *cc;
9244*22dc650dSSadaf Ebrahimi
9245*22dc650dSSadaf Ebrahimi if (type == OP_NOT || !char_has_othercase(common, cc))
9246*22dc650dSSadaf Ebrahimi {
9247*22dc650dSSadaf Ebrahimi read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9248*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9249*22dc650dSSadaf Ebrahimi }
9250*22dc650dSSadaf Ebrahimi else
9251*22dc650dSSadaf Ebrahimi {
9252*22dc650dSSadaf Ebrahimi oc = char_othercase(common, c);
9253*22dc650dSSadaf Ebrahimi read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9254*22dc650dSSadaf Ebrahimi bit = c ^ oc;
9255*22dc650dSSadaf Ebrahimi if (is_powerof2(bit))
9256*22dc650dSSadaf Ebrahimi {
9257*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9258*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9259*22dc650dSSadaf Ebrahimi }
9260*22dc650dSSadaf Ebrahimi else
9261*22dc650dSSadaf Ebrahimi {
9262*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9263*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9264*22dc650dSSadaf Ebrahimi }
9265*22dc650dSSadaf Ebrahimi }
9266*22dc650dSSadaf Ebrahimi return cc + length;
9267*22dc650dSSadaf Ebrahimi
9268*22dc650dSSadaf Ebrahimi case OP_CLASS:
9269*22dc650dSSadaf Ebrahimi case OP_NCLASS:
9270*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9271*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9272*22dc650dSSadaf Ebrahimi
9273*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9274*22dc650dSSadaf Ebrahimi bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9275*22dc650dSSadaf Ebrahimi if (type == OP_NCLASS)
9276*22dc650dSSadaf Ebrahimi read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9277*22dc650dSSadaf Ebrahimi else
9278*22dc650dSSadaf Ebrahimi read_char(common, 0, bit, NULL, 0);
9279*22dc650dSSadaf Ebrahimi #else
9280*22dc650dSSadaf Ebrahimi if (type == OP_NCLASS)
9281*22dc650dSSadaf Ebrahimi read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9282*22dc650dSSadaf Ebrahimi else
9283*22dc650dSSadaf Ebrahimi read_char(common, 0, 255, NULL, 0);
9284*22dc650dSSadaf Ebrahimi #endif
9285*22dc650dSSadaf Ebrahimi
9286*22dc650dSSadaf Ebrahimi if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9287*22dc650dSSadaf Ebrahimi return cc + 32 / sizeof(PCRE2_UCHAR);
9288*22dc650dSSadaf Ebrahimi
9289*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9290*22dc650dSSadaf Ebrahimi jump[0] = NULL;
9291*22dc650dSSadaf Ebrahimi if (common->utf)
9292*22dc650dSSadaf Ebrahimi {
9293*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9294*22dc650dSSadaf Ebrahimi if (type == OP_CLASS)
9295*22dc650dSSadaf Ebrahimi {
9296*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, jump[0]);
9297*22dc650dSSadaf Ebrahimi jump[0] = NULL;
9298*22dc650dSSadaf Ebrahimi }
9299*22dc650dSSadaf Ebrahimi }
9300*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH != 8
9301*22dc650dSSadaf Ebrahimi jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9302*22dc650dSSadaf Ebrahimi if (type == OP_CLASS)
9303*22dc650dSSadaf Ebrahimi {
9304*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, jump[0]);
9305*22dc650dSSadaf Ebrahimi jump[0] = NULL;
9306*22dc650dSSadaf Ebrahimi }
9307*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9308*22dc650dSSadaf Ebrahimi
9309*22dc650dSSadaf Ebrahimi OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9310*22dc650dSSadaf Ebrahimi OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9311*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9312*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9313*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9314*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9315*22dc650dSSadaf Ebrahimi
9316*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9317*22dc650dSSadaf Ebrahimi if (jump[0] != NULL)
9318*22dc650dSSadaf Ebrahimi JUMPHERE(jump[0]);
9319*22dc650dSSadaf Ebrahimi #endif
9320*22dc650dSSadaf Ebrahimi return cc + 32 / sizeof(PCRE2_UCHAR);
9321*22dc650dSSadaf Ebrahimi
9322*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9323*22dc650dSSadaf Ebrahimi case OP_XCLASS:
9324*22dc650dSSadaf Ebrahimi if (check_str_ptr)
9325*22dc650dSSadaf Ebrahimi detect_partial_match(common, backtracks);
9326*22dc650dSSadaf Ebrahimi compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9327*22dc650dSSadaf Ebrahimi return cc + GET(cc, 0) - 1;
9328*22dc650dSSadaf Ebrahimi #endif
9329*22dc650dSSadaf Ebrahimi }
9330*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
9331*22dc650dSSadaf Ebrahimi return cc;
9332*22dc650dSSadaf Ebrahimi }
9333*22dc650dSSadaf Ebrahimi
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)9334*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9335*22dc650dSSadaf Ebrahimi {
9336*22dc650dSSadaf Ebrahimi /* This function consumes at least one input character. */
9337*22dc650dSSadaf Ebrahimi /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9338*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9339*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin = cc;
9340*22dc650dSSadaf Ebrahimi compare_context context;
9341*22dc650dSSadaf Ebrahimi int size;
9342*22dc650dSSadaf Ebrahimi
9343*22dc650dSSadaf Ebrahimi context.length = 0;
9344*22dc650dSSadaf Ebrahimi do
9345*22dc650dSSadaf Ebrahimi {
9346*22dc650dSSadaf Ebrahimi if (cc >= ccend)
9347*22dc650dSSadaf Ebrahimi break;
9348*22dc650dSSadaf Ebrahimi
9349*22dc650dSSadaf Ebrahimi if (*cc == OP_CHAR)
9350*22dc650dSSadaf Ebrahimi {
9351*22dc650dSSadaf Ebrahimi size = 1;
9352*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9353*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(cc[1]))
9354*22dc650dSSadaf Ebrahimi size += GET_EXTRALEN(cc[1]);
9355*22dc650dSSadaf Ebrahimi #endif
9356*22dc650dSSadaf Ebrahimi }
9357*22dc650dSSadaf Ebrahimi else if (*cc == OP_CHARI)
9358*22dc650dSSadaf Ebrahimi {
9359*22dc650dSSadaf Ebrahimi size = 1;
9360*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
9361*22dc650dSSadaf Ebrahimi if (common->utf)
9362*22dc650dSSadaf Ebrahimi {
9363*22dc650dSSadaf Ebrahimi if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9364*22dc650dSSadaf Ebrahimi size = 0;
9365*22dc650dSSadaf Ebrahimi else if (HAS_EXTRALEN(cc[1]))
9366*22dc650dSSadaf Ebrahimi size += GET_EXTRALEN(cc[1]);
9367*22dc650dSSadaf Ebrahimi }
9368*22dc650dSSadaf Ebrahimi else
9369*22dc650dSSadaf Ebrahimi #endif
9370*22dc650dSSadaf Ebrahimi if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9371*22dc650dSSadaf Ebrahimi size = 0;
9372*22dc650dSSadaf Ebrahimi }
9373*22dc650dSSadaf Ebrahimi else
9374*22dc650dSSadaf Ebrahimi size = 0;
9375*22dc650dSSadaf Ebrahimi
9376*22dc650dSSadaf Ebrahimi cc += 1 + size;
9377*22dc650dSSadaf Ebrahimi context.length += IN_UCHARS(size);
9378*22dc650dSSadaf Ebrahimi }
9379*22dc650dSSadaf Ebrahimi while (size > 0 && context.length <= 128);
9380*22dc650dSSadaf Ebrahimi
9381*22dc650dSSadaf Ebrahimi cc = ccbegin;
9382*22dc650dSSadaf Ebrahimi if (context.length > 0)
9383*22dc650dSSadaf Ebrahimi {
9384*22dc650dSSadaf Ebrahimi /* We have a fixed-length byte sequence. */
9385*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9386*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9387*22dc650dSSadaf Ebrahimi
9388*22dc650dSSadaf Ebrahimi context.sourcereg = -1;
9389*22dc650dSSadaf Ebrahimi #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9390*22dc650dSSadaf Ebrahimi context.ucharptr = 0;
9391*22dc650dSSadaf Ebrahimi #endif
9392*22dc650dSSadaf Ebrahimi do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9393*22dc650dSSadaf Ebrahimi return cc;
9394*22dc650dSSadaf Ebrahimi }
9395*22dc650dSSadaf Ebrahimi
9396*22dc650dSSadaf Ebrahimi /* A non-fixed length character will be checked if length == 0. */
9397*22dc650dSSadaf Ebrahimi return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9398*22dc650dSSadaf Ebrahimi }
9399*22dc650dSSadaf Ebrahimi
9400*22dc650dSSadaf Ebrahimi /* Forward definitions. */
9401*22dc650dSSadaf Ebrahimi static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9402*22dc650dSSadaf Ebrahimi static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9403*22dc650dSSadaf Ebrahimi
9404*22dc650dSSadaf Ebrahimi #define PUSH_BACKTRACK(size, ccstart, error) \
9405*22dc650dSSadaf Ebrahimi do \
9406*22dc650dSSadaf Ebrahimi { \
9407*22dc650dSSadaf Ebrahimi backtrack = sljit_alloc_memory(compiler, (size)); \
9408*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9409*22dc650dSSadaf Ebrahimi return error; \
9410*22dc650dSSadaf Ebrahimi memset(backtrack, 0, size); \
9411*22dc650dSSadaf Ebrahimi backtrack->prev = parent->top; \
9412*22dc650dSSadaf Ebrahimi backtrack->cc = (ccstart); \
9413*22dc650dSSadaf Ebrahimi parent->top = backtrack; \
9414*22dc650dSSadaf Ebrahimi } \
9415*22dc650dSSadaf Ebrahimi while (0)
9416*22dc650dSSadaf Ebrahimi
9417*22dc650dSSadaf Ebrahimi #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9418*22dc650dSSadaf Ebrahimi do \
9419*22dc650dSSadaf Ebrahimi { \
9420*22dc650dSSadaf Ebrahimi backtrack = sljit_alloc_memory(compiler, (size)); \
9421*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9422*22dc650dSSadaf Ebrahimi return; \
9423*22dc650dSSadaf Ebrahimi memset(backtrack, 0, size); \
9424*22dc650dSSadaf Ebrahimi backtrack->prev = parent->top; \
9425*22dc650dSSadaf Ebrahimi backtrack->cc = (ccstart); \
9426*22dc650dSSadaf Ebrahimi parent->top = backtrack; \
9427*22dc650dSSadaf Ebrahimi } \
9428*22dc650dSSadaf Ebrahimi while (0)
9429*22dc650dSSadaf Ebrahimi
9430*22dc650dSSadaf Ebrahimi #define BACKTRACK_AS(type) ((type *)backtrack)
9431*22dc650dSSadaf Ebrahimi
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9432*22dc650dSSadaf Ebrahimi static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9433*22dc650dSSadaf Ebrahimi {
9434*22dc650dSSadaf Ebrahimi /* The OVECTOR offset goes to TMP2. */
9435*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9436*22dc650dSSadaf Ebrahimi int count = GET2(cc, 1 + IMM2_SIZE);
9437*22dc650dSSadaf Ebrahimi PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9438*22dc650dSSadaf Ebrahimi unsigned int offset;
9439*22dc650dSSadaf Ebrahimi jump_list *found = NULL;
9440*22dc650dSSadaf Ebrahimi
9441*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9442*22dc650dSSadaf Ebrahimi
9443*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9444*22dc650dSSadaf Ebrahimi
9445*22dc650dSSadaf Ebrahimi count--;
9446*22dc650dSSadaf Ebrahimi while (count-- > 0)
9447*22dc650dSSadaf Ebrahimi {
9448*22dc650dSSadaf Ebrahimi offset = GET2(slot, 0) << 1;
9449*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9450*22dc650dSSadaf Ebrahimi add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9451*22dc650dSSadaf Ebrahimi slot += common->name_entry_size;
9452*22dc650dSSadaf Ebrahimi }
9453*22dc650dSSadaf Ebrahimi
9454*22dc650dSSadaf Ebrahimi offset = GET2(slot, 0) << 1;
9455*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9456*22dc650dSSadaf Ebrahimi if (backtracks != NULL && !common->unset_backref)
9457*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9458*22dc650dSSadaf Ebrahimi
9459*22dc650dSSadaf Ebrahimi set_jumps(found, LABEL());
9460*22dc650dSSadaf Ebrahimi }
9461*22dc650dSSadaf Ebrahimi
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9462*22dc650dSSadaf Ebrahimi static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9463*22dc650dSSadaf Ebrahimi {
9464*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9465*22dc650dSSadaf Ebrahimi BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9466*22dc650dSSadaf Ebrahimi int offset = 0;
9467*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
9468*22dc650dSSadaf Ebrahimi struct sljit_jump *partial;
9469*22dc650dSSadaf Ebrahimi struct sljit_jump *nopartial;
9470*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
9471*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
9472*22dc650dSSadaf Ebrahimi struct sljit_label *caseless_loop;
9473*22dc650dSSadaf Ebrahimi jump_list *no_match = NULL;
9474*22dc650dSSadaf Ebrahimi int source_reg = COUNT_MATCH;
9475*22dc650dSSadaf Ebrahimi int source_end_reg = ARGUMENTS;
9476*22dc650dSSadaf Ebrahimi int char1_reg = STACK_LIMIT;
9477*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
9478*22dc650dSSadaf Ebrahimi
9479*22dc650dSSadaf Ebrahimi if (ref)
9480*22dc650dSSadaf Ebrahimi {
9481*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1) << 1;
9482*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483*22dc650dSSadaf Ebrahimi /* OVECTOR(1) contains the "string begin - 1" constant. */
9484*22dc650dSSadaf Ebrahimi if (withchecks && !common->unset_backref)
9485*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9486*22dc650dSSadaf Ebrahimi }
9487*22dc650dSSadaf Ebrahimi else
9488*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9489*22dc650dSSadaf Ebrahimi
9490*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
9491*22dc650dSSadaf Ebrahimi if (common->utf && *cc == OP_REFI)
9492*22dc650dSSadaf Ebrahimi {
9493*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->iref_ptr != 0);
9494*22dc650dSSadaf Ebrahimi
9495*22dc650dSSadaf Ebrahimi if (ref)
9496*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9497*22dc650dSSadaf Ebrahimi else
9498*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9499*22dc650dSSadaf Ebrahimi
9500*22dc650dSSadaf Ebrahimi if (withchecks && emptyfail)
9501*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9502*22dc650dSSadaf Ebrahimi
9503*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9504*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9505*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9506*22dc650dSSadaf Ebrahimi
9507*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9508*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9509*22dc650dSSadaf Ebrahimi
9510*22dc650dSSadaf Ebrahimi loop = LABEL();
9511*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9512*22dc650dSSadaf Ebrahimi partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9513*22dc650dSSadaf Ebrahimi
9514*22dc650dSSadaf Ebrahimi /* Read original character. It must be a valid UTF character. */
9515*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9516*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9517*22dc650dSSadaf Ebrahimi
9518*22dc650dSSadaf Ebrahimi read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9519*22dc650dSSadaf Ebrahimi
9520*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9521*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9522*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9523*22dc650dSSadaf Ebrahimi
9524*22dc650dSSadaf Ebrahimi /* Read second character. */
9525*22dc650dSSadaf Ebrahimi read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9526*22dc650dSSadaf Ebrahimi
9527*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9528*22dc650dSSadaf Ebrahimi
9529*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9530*22dc650dSSadaf Ebrahimi
9531*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9532*22dc650dSSadaf Ebrahimi
9533*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9534*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9535*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9536*22dc650dSSadaf Ebrahimi
9537*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9538*22dc650dSSadaf Ebrahimi
9539*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9540*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9541*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9542*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9543*22dc650dSSadaf Ebrahimi
9544*22dc650dSSadaf Ebrahimi add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9545*22dc650dSSadaf Ebrahimi OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9546*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9547*22dc650dSSadaf Ebrahimi
9548*22dc650dSSadaf Ebrahimi caseless_loop = LABEL();
9549*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9550*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9551*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9552*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_EQUAL, loop);
9553*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_LESS, caseless_loop);
9554*22dc650dSSadaf Ebrahimi
9555*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
9556*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
9557*22dc650dSSadaf Ebrahimi JUMPHERE(partial);
9558*22dc650dSSadaf Ebrahimi
9559*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9560*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9561*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9562*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9563*22dc650dSSadaf Ebrahimi
9564*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
9565*22dc650dSSadaf Ebrahimi {
9566*22dc650dSSadaf Ebrahimi JUMPHERE(partial);
9567*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9568*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9569*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9570*22dc650dSSadaf Ebrahimi
9571*22dc650dSSadaf Ebrahimi check_partial(common, FALSE);
9572*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9573*22dc650dSSadaf Ebrahimi }
9574*22dc650dSSadaf Ebrahimi
9575*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
9576*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9577*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9578*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9579*22dc650dSSadaf Ebrahimi return;
9580*22dc650dSSadaf Ebrahimi }
9581*22dc650dSSadaf Ebrahimi else
9582*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
9583*22dc650dSSadaf Ebrahimi {
9584*22dc650dSSadaf Ebrahimi if (ref)
9585*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9586*22dc650dSSadaf Ebrahimi else
9587*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9588*22dc650dSSadaf Ebrahimi
9589*22dc650dSSadaf Ebrahimi if (withchecks)
9590*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_ZERO);
9591*22dc650dSSadaf Ebrahimi
9592*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9593*22dc650dSSadaf Ebrahimi partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9594*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
9595*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, partial);
9596*22dc650dSSadaf Ebrahimi
9597*22dc650dSSadaf Ebrahimi add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9598*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9599*22dc650dSSadaf Ebrahimi
9600*22dc650dSSadaf Ebrahimi if (common->mode != PCRE2_JIT_COMPLETE)
9601*22dc650dSSadaf Ebrahimi {
9602*22dc650dSSadaf Ebrahimi nopartial = JUMP(SLJIT_JUMP);
9603*22dc650dSSadaf Ebrahimi JUMPHERE(partial);
9604*22dc650dSSadaf Ebrahimi /* TMP2 -= STR_END - STR_PTR */
9605*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9606*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9607*22dc650dSSadaf Ebrahimi partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9608*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9609*22dc650dSSadaf Ebrahimi add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9610*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9611*22dc650dSSadaf Ebrahimi JUMPHERE(partial);
9612*22dc650dSSadaf Ebrahimi check_partial(common, FALSE);
9613*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9614*22dc650dSSadaf Ebrahimi JUMPHERE(nopartial);
9615*22dc650dSSadaf Ebrahimi }
9616*22dc650dSSadaf Ebrahimi }
9617*22dc650dSSadaf Ebrahimi
9618*22dc650dSSadaf Ebrahimi if (jump != NULL)
9619*22dc650dSSadaf Ebrahimi {
9620*22dc650dSSadaf Ebrahimi if (emptyfail)
9621*22dc650dSSadaf Ebrahimi add_jump(compiler, backtracks, jump);
9622*22dc650dSSadaf Ebrahimi else
9623*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
9624*22dc650dSSadaf Ebrahimi }
9625*22dc650dSSadaf Ebrahimi }
9626*22dc650dSSadaf Ebrahimi
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9627*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9628*22dc650dSSadaf Ebrahimi {
9629*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9630*22dc650dSSadaf Ebrahimi BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9631*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
9632*22dc650dSSadaf Ebrahimi PCRE2_UCHAR type;
9633*22dc650dSSadaf Ebrahimi int offset = 0;
9634*22dc650dSSadaf Ebrahimi struct sljit_label *label;
9635*22dc650dSSadaf Ebrahimi struct sljit_jump *zerolength;
9636*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
9637*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin = cc;
9638*22dc650dSSadaf Ebrahimi int min = 0, max = 0;
9639*22dc650dSSadaf Ebrahimi BOOL minimize;
9640*22dc650dSSadaf Ebrahimi
9641*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9642*22dc650dSSadaf Ebrahimi
9643*22dc650dSSadaf Ebrahimi if (ref)
9644*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1) << 1;
9645*22dc650dSSadaf Ebrahimi else
9646*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
9647*22dc650dSSadaf Ebrahimi type = cc[1 + IMM2_SIZE];
9648*22dc650dSSadaf Ebrahimi
9649*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9650*22dc650dSSadaf Ebrahimi minimize = (type & 0x1) != 0;
9651*22dc650dSSadaf Ebrahimi switch(type)
9652*22dc650dSSadaf Ebrahimi {
9653*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
9654*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
9655*22dc650dSSadaf Ebrahimi min = 0;
9656*22dc650dSSadaf Ebrahimi max = 0;
9657*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE + 1;
9658*22dc650dSSadaf Ebrahimi break;
9659*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
9660*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
9661*22dc650dSSadaf Ebrahimi min = 1;
9662*22dc650dSSadaf Ebrahimi max = 0;
9663*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE + 1;
9664*22dc650dSSadaf Ebrahimi break;
9665*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
9666*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
9667*22dc650dSSadaf Ebrahimi min = 0;
9668*22dc650dSSadaf Ebrahimi max = 1;
9669*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE + 1;
9670*22dc650dSSadaf Ebrahimi break;
9671*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
9672*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
9673*22dc650dSSadaf Ebrahimi min = GET2(cc, 1 + IMM2_SIZE + 1);
9674*22dc650dSSadaf Ebrahimi max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9675*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9676*22dc650dSSadaf Ebrahimi break;
9677*22dc650dSSadaf Ebrahimi default:
9678*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
9679*22dc650dSSadaf Ebrahimi break;
9680*22dc650dSSadaf Ebrahimi }
9681*22dc650dSSadaf Ebrahimi
9682*22dc650dSSadaf Ebrahimi if (!minimize)
9683*22dc650dSSadaf Ebrahimi {
9684*22dc650dSSadaf Ebrahimi if (min == 0)
9685*22dc650dSSadaf Ebrahimi {
9686*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
9687*22dc650dSSadaf Ebrahimi if (ref)
9688*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9689*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9690*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9691*22dc650dSSadaf Ebrahimi /* Temporary release of STR_PTR. */
9692*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9693*22dc650dSSadaf Ebrahimi /* Handles both invalid and empty cases. Since the minimum repeat,
9694*22dc650dSSadaf Ebrahimi is zero the invalid case is basically the same as an empty case. */
9695*22dc650dSSadaf Ebrahimi if (ref)
9696*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9697*22dc650dSSadaf Ebrahimi else
9698*22dc650dSSadaf Ebrahimi {
9699*22dc650dSSadaf Ebrahimi compile_dnref_search(common, ccbegin, NULL);
9700*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9701*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9702*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9703*22dc650dSSadaf Ebrahimi }
9704*22dc650dSSadaf Ebrahimi /* Restore if not zero length. */
9705*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9706*22dc650dSSadaf Ebrahimi }
9707*22dc650dSSadaf Ebrahimi else
9708*22dc650dSSadaf Ebrahimi {
9709*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
9710*22dc650dSSadaf Ebrahimi if (ref)
9711*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9712*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9713*22dc650dSSadaf Ebrahimi
9714*22dc650dSSadaf Ebrahimi if (ref)
9715*22dc650dSSadaf Ebrahimi {
9716*22dc650dSSadaf Ebrahimi if (!common->unset_backref)
9717*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9718*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9719*22dc650dSSadaf Ebrahimi }
9720*22dc650dSSadaf Ebrahimi else
9721*22dc650dSSadaf Ebrahimi {
9722*22dc650dSSadaf Ebrahimi compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9723*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9724*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9725*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9726*22dc650dSSadaf Ebrahimi }
9727*22dc650dSSadaf Ebrahimi }
9728*22dc650dSSadaf Ebrahimi
9729*22dc650dSSadaf Ebrahimi if (min > 1 || max > 1)
9730*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9731*22dc650dSSadaf Ebrahimi
9732*22dc650dSSadaf Ebrahimi label = LABEL();
9733*22dc650dSSadaf Ebrahimi if (!ref)
9734*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9735*22dc650dSSadaf Ebrahimi compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9736*22dc650dSSadaf Ebrahimi
9737*22dc650dSSadaf Ebrahimi if (min > 1 || max > 1)
9738*22dc650dSSadaf Ebrahimi {
9739*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9740*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9741*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9742*22dc650dSSadaf Ebrahimi if (min > 1)
9743*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9744*22dc650dSSadaf Ebrahimi if (max > 1)
9745*22dc650dSSadaf Ebrahimi {
9746*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9747*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
9748*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9749*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, label);
9750*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
9751*22dc650dSSadaf Ebrahimi }
9752*22dc650dSSadaf Ebrahimi }
9753*22dc650dSSadaf Ebrahimi
9754*22dc650dSSadaf Ebrahimi if (max == 0)
9755*22dc650dSSadaf Ebrahimi {
9756*22dc650dSSadaf Ebrahimi /* Includes min > 1 case as well. */
9757*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
9758*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9759*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, label);
9760*22dc650dSSadaf Ebrahimi }
9761*22dc650dSSadaf Ebrahimi
9762*22dc650dSSadaf Ebrahimi JUMPHERE(zerolength);
9763*22dc650dSSadaf Ebrahimi BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9764*22dc650dSSadaf Ebrahimi
9765*22dc650dSSadaf Ebrahimi count_match(common);
9766*22dc650dSSadaf Ebrahimi return cc;
9767*22dc650dSSadaf Ebrahimi }
9768*22dc650dSSadaf Ebrahimi
9769*22dc650dSSadaf Ebrahimi allocate_stack(common, ref ? 2 : 3);
9770*22dc650dSSadaf Ebrahimi if (ref)
9771*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9772*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9773*22dc650dSSadaf Ebrahimi if (type != OP_CRMINSTAR)
9774*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9775*22dc650dSSadaf Ebrahimi
9776*22dc650dSSadaf Ebrahimi if (min == 0)
9777*22dc650dSSadaf Ebrahimi {
9778*22dc650dSSadaf Ebrahimi /* Handles both invalid and empty cases. Since the minimum repeat,
9779*22dc650dSSadaf Ebrahimi is zero the invalid case is basically the same as an empty case. */
9780*22dc650dSSadaf Ebrahimi if (ref)
9781*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9782*22dc650dSSadaf Ebrahimi else
9783*22dc650dSSadaf Ebrahimi {
9784*22dc650dSSadaf Ebrahimi compile_dnref_search(common, ccbegin, NULL);
9785*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9786*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9787*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9788*22dc650dSSadaf Ebrahimi }
9789*22dc650dSSadaf Ebrahimi /* Length is non-zero, we can match real repeats. */
9790*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9791*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
9792*22dc650dSSadaf Ebrahimi }
9793*22dc650dSSadaf Ebrahimi else
9794*22dc650dSSadaf Ebrahimi {
9795*22dc650dSSadaf Ebrahimi if (ref)
9796*22dc650dSSadaf Ebrahimi {
9797*22dc650dSSadaf Ebrahimi if (!common->unset_backref)
9798*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9799*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9800*22dc650dSSadaf Ebrahimi }
9801*22dc650dSSadaf Ebrahimi else
9802*22dc650dSSadaf Ebrahimi {
9803*22dc650dSSadaf Ebrahimi compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9804*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9805*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9806*22dc650dSSadaf Ebrahimi zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9807*22dc650dSSadaf Ebrahimi }
9808*22dc650dSSadaf Ebrahimi }
9809*22dc650dSSadaf Ebrahimi
9810*22dc650dSSadaf Ebrahimi BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9811*22dc650dSSadaf Ebrahimi if (max > 0)
9812*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9813*22dc650dSSadaf Ebrahimi
9814*22dc650dSSadaf Ebrahimi if (!ref)
9815*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9816*22dc650dSSadaf Ebrahimi compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9817*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9818*22dc650dSSadaf Ebrahimi
9819*22dc650dSSadaf Ebrahimi if (min > 1)
9820*22dc650dSSadaf Ebrahimi {
9821*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9822*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9823*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9824*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9825*22dc650dSSadaf Ebrahimi }
9826*22dc650dSSadaf Ebrahimi else if (max > 0)
9827*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9828*22dc650dSSadaf Ebrahimi
9829*22dc650dSSadaf Ebrahimi if (jump != NULL)
9830*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
9831*22dc650dSSadaf Ebrahimi JUMPHERE(zerolength);
9832*22dc650dSSadaf Ebrahimi
9833*22dc650dSSadaf Ebrahimi count_match(common);
9834*22dc650dSSadaf Ebrahimi return cc;
9835*22dc650dSSadaf Ebrahimi }
9836*22dc650dSSadaf Ebrahimi
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9837*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9838*22dc650dSSadaf Ebrahimi {
9839*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9840*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
9841*22dc650dSSadaf Ebrahimi recurse_entry *entry = common->entries;
9842*22dc650dSSadaf Ebrahimi recurse_entry *prev = NULL;
9843*22dc650dSSadaf Ebrahimi sljit_sw start = GET(cc, 1);
9844*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_cc;
9845*22dc650dSSadaf Ebrahimi BOOL needs_control_head;
9846*22dc650dSSadaf Ebrahimi
9847*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9848*22dc650dSSadaf Ebrahimi
9849*22dc650dSSadaf Ebrahimi /* Inlining simple patterns. */
9850*22dc650dSSadaf Ebrahimi if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9851*22dc650dSSadaf Ebrahimi {
9852*22dc650dSSadaf Ebrahimi start_cc = common->start + start;
9853*22dc650dSSadaf Ebrahimi compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9854*22dc650dSSadaf Ebrahimi BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9855*22dc650dSSadaf Ebrahimi return cc + 1 + LINK_SIZE;
9856*22dc650dSSadaf Ebrahimi }
9857*22dc650dSSadaf Ebrahimi
9858*22dc650dSSadaf Ebrahimi while (entry != NULL)
9859*22dc650dSSadaf Ebrahimi {
9860*22dc650dSSadaf Ebrahimi if (entry->start == start)
9861*22dc650dSSadaf Ebrahimi break;
9862*22dc650dSSadaf Ebrahimi prev = entry;
9863*22dc650dSSadaf Ebrahimi entry = entry->next;
9864*22dc650dSSadaf Ebrahimi }
9865*22dc650dSSadaf Ebrahimi
9866*22dc650dSSadaf Ebrahimi if (entry == NULL)
9867*22dc650dSSadaf Ebrahimi {
9868*22dc650dSSadaf Ebrahimi entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9869*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9870*22dc650dSSadaf Ebrahimi return NULL;
9871*22dc650dSSadaf Ebrahimi entry->next = NULL;
9872*22dc650dSSadaf Ebrahimi entry->entry_label = NULL;
9873*22dc650dSSadaf Ebrahimi entry->backtrack_label = NULL;
9874*22dc650dSSadaf Ebrahimi entry->entry_calls = NULL;
9875*22dc650dSSadaf Ebrahimi entry->backtrack_calls = NULL;
9876*22dc650dSSadaf Ebrahimi entry->start = start;
9877*22dc650dSSadaf Ebrahimi
9878*22dc650dSSadaf Ebrahimi if (prev != NULL)
9879*22dc650dSSadaf Ebrahimi prev->next = entry;
9880*22dc650dSSadaf Ebrahimi else
9881*22dc650dSSadaf Ebrahimi common->entries = entry;
9882*22dc650dSSadaf Ebrahimi }
9883*22dc650dSSadaf Ebrahimi
9884*22dc650dSSadaf Ebrahimi BACKTRACK_AS(recurse_backtrack)->entry = entry;
9885*22dc650dSSadaf Ebrahimi
9886*22dc650dSSadaf Ebrahimi if (entry->entry_label == NULL)
9887*22dc650dSSadaf Ebrahimi add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9888*22dc650dSSadaf Ebrahimi else
9889*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9890*22dc650dSSadaf Ebrahimi /* Leave if the match is failed. */
9891*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9892*22dc650dSSadaf Ebrahimi BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9893*22dc650dSSadaf Ebrahimi return cc + 1 + LINK_SIZE;
9894*22dc650dSSadaf Ebrahimi }
9895*22dc650dSSadaf Ebrahimi
do_callout_jit(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9896*22dc650dSSadaf Ebrahimi static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9897*22dc650dSSadaf Ebrahimi {
9898*22dc650dSSadaf Ebrahimi PCRE2_SPTR begin;
9899*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector;
9900*22dc650dSSadaf Ebrahimi sljit_u32 oveccount, capture_top;
9901*22dc650dSSadaf Ebrahimi
9902*22dc650dSSadaf Ebrahimi if (arguments->callout == NULL)
9903*22dc650dSSadaf Ebrahimi return 0;
9904*22dc650dSSadaf Ebrahimi
9905*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9906*22dc650dSSadaf Ebrahimi
9907*22dc650dSSadaf Ebrahimi begin = arguments->begin;
9908*22dc650dSSadaf Ebrahimi ovector = (PCRE2_SIZE*)(callout_block + 1);
9909*22dc650dSSadaf Ebrahimi oveccount = callout_block->capture_top;
9910*22dc650dSSadaf Ebrahimi
9911*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(oveccount >= 1);
9912*22dc650dSSadaf Ebrahimi
9913*22dc650dSSadaf Ebrahimi callout_block->version = 2;
9914*22dc650dSSadaf Ebrahimi callout_block->callout_flags = 0;
9915*22dc650dSSadaf Ebrahimi
9916*22dc650dSSadaf Ebrahimi /* Offsets in subject. */
9917*22dc650dSSadaf Ebrahimi callout_block->subject_length = arguments->end - arguments->begin;
9918*22dc650dSSadaf Ebrahimi callout_block->start_match = jit_ovector[0] - begin;
9919*22dc650dSSadaf Ebrahimi callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9920*22dc650dSSadaf Ebrahimi callout_block->subject = begin;
9921*22dc650dSSadaf Ebrahimi
9922*22dc650dSSadaf Ebrahimi /* Convert and copy the JIT offset vector to the ovector array. */
9923*22dc650dSSadaf Ebrahimi callout_block->capture_top = 1;
9924*22dc650dSSadaf Ebrahimi callout_block->offset_vector = ovector;
9925*22dc650dSSadaf Ebrahimi
9926*22dc650dSSadaf Ebrahimi ovector[0] = PCRE2_UNSET;
9927*22dc650dSSadaf Ebrahimi ovector[1] = PCRE2_UNSET;
9928*22dc650dSSadaf Ebrahimi ovector += 2;
9929*22dc650dSSadaf Ebrahimi jit_ovector += 2;
9930*22dc650dSSadaf Ebrahimi capture_top = 1;
9931*22dc650dSSadaf Ebrahimi
9932*22dc650dSSadaf Ebrahimi /* Convert pointers to sizes. */
9933*22dc650dSSadaf Ebrahimi while (--oveccount != 0)
9934*22dc650dSSadaf Ebrahimi {
9935*22dc650dSSadaf Ebrahimi capture_top++;
9936*22dc650dSSadaf Ebrahimi
9937*22dc650dSSadaf Ebrahimi ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9938*22dc650dSSadaf Ebrahimi ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9939*22dc650dSSadaf Ebrahimi
9940*22dc650dSSadaf Ebrahimi if (ovector[0] != PCRE2_UNSET)
9941*22dc650dSSadaf Ebrahimi callout_block->capture_top = capture_top;
9942*22dc650dSSadaf Ebrahimi
9943*22dc650dSSadaf Ebrahimi ovector += 2;
9944*22dc650dSSadaf Ebrahimi jit_ovector += 2;
9945*22dc650dSSadaf Ebrahimi }
9946*22dc650dSSadaf Ebrahimi
9947*22dc650dSSadaf Ebrahimi return (arguments->callout)(callout_block, arguments->callout_data);
9948*22dc650dSSadaf Ebrahimi }
9949*22dc650dSSadaf Ebrahimi
9950*22dc650dSSadaf Ebrahimi #define CALLOUT_ARG_OFFSET(arg) \
9951*22dc650dSSadaf Ebrahimi SLJIT_OFFSETOF(pcre2_callout_block, arg)
9952*22dc650dSSadaf Ebrahimi
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9953*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9954*22dc650dSSadaf Ebrahimi {
9955*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
9956*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
9957*22dc650dSSadaf Ebrahimi sljit_s32 mov_opcode;
9958*22dc650dSSadaf Ebrahimi unsigned int callout_length = (*cc == OP_CALLOUT)
9959*22dc650dSSadaf Ebrahimi ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9960*22dc650dSSadaf Ebrahimi sljit_sw value1;
9961*22dc650dSSadaf Ebrahimi sljit_sw value2;
9962*22dc650dSSadaf Ebrahimi sljit_sw value3;
9963*22dc650dSSadaf Ebrahimi sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9964*22dc650dSSadaf Ebrahimi
9965*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9966*22dc650dSSadaf Ebrahimi
9967*22dc650dSSadaf Ebrahimi callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9968*22dc650dSSadaf Ebrahimi
9969*22dc650dSSadaf Ebrahimi allocate_stack(common, callout_arg_size);
9970*22dc650dSSadaf Ebrahimi
9971*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->capture_last_ptr != 0);
9972*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9973*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9974*22dc650dSSadaf Ebrahimi value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9975*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9976*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9977*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9978*22dc650dSSadaf Ebrahimi
9979*22dc650dSSadaf Ebrahimi /* These pointer sized fields temporarly stores internal variables. */
9980*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9981*22dc650dSSadaf Ebrahimi
9982*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
9983*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9984*22dc650dSSadaf Ebrahimi mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9985*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9986*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9987*22dc650dSSadaf Ebrahimi
9988*22dc650dSSadaf Ebrahimi if (*cc == OP_CALLOUT)
9989*22dc650dSSadaf Ebrahimi {
9990*22dc650dSSadaf Ebrahimi value1 = 0;
9991*22dc650dSSadaf Ebrahimi value2 = 0;
9992*22dc650dSSadaf Ebrahimi value3 = 0;
9993*22dc650dSSadaf Ebrahimi }
9994*22dc650dSSadaf Ebrahimi else
9995*22dc650dSSadaf Ebrahimi {
9996*22dc650dSSadaf Ebrahimi value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9997*22dc650dSSadaf Ebrahimi value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9998*22dc650dSSadaf Ebrahimi value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9999*22dc650dSSadaf Ebrahimi }
10000*22dc650dSSadaf Ebrahimi
10001*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
10002*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
10003*22dc650dSSadaf Ebrahimi OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
10004*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
10005*22dc650dSSadaf Ebrahimi
10006*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10007*22dc650dSSadaf Ebrahimi
10008*22dc650dSSadaf Ebrahimi /* Needed to save important temporary registers. */
10009*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
10010*22dc650dSSadaf Ebrahimi /* SLJIT_R0 = arguments */
10011*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
10012*22dc650dSSadaf Ebrahimi GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
10013*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
10014*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10015*22dc650dSSadaf Ebrahimi free_stack(common, callout_arg_size);
10016*22dc650dSSadaf Ebrahimi
10017*22dc650dSSadaf Ebrahimi /* Check return value. */
10018*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10019*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
10020*22dc650dSSadaf Ebrahimi if (common->abort_label == NULL)
10021*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
10022*22dc650dSSadaf Ebrahimi else
10023*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
10024*22dc650dSSadaf Ebrahimi return cc + callout_length;
10025*22dc650dSSadaf Ebrahimi }
10026*22dc650dSSadaf Ebrahimi
10027*22dc650dSSadaf Ebrahimi #undef CALLOUT_ARG_SIZE
10028*22dc650dSSadaf Ebrahimi #undef CALLOUT_ARG_OFFSET
10029*22dc650dSSadaf Ebrahimi
compile_reverse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10030*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10031*22dc650dSSadaf Ebrahimi {
10032*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10033*22dc650dSSadaf Ebrahimi backtrack_common *backtrack = NULL;
10034*22dc650dSSadaf Ebrahimi jump_list **reverse_failed;
10035*22dc650dSSadaf Ebrahimi unsigned int lmin, lmax;
10036*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
10037*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
10038*22dc650dSSadaf Ebrahimi struct sljit_label *label;
10039*22dc650dSSadaf Ebrahimi #endif
10040*22dc650dSSadaf Ebrahimi
10041*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(parent->top == NULL);
10042*22dc650dSSadaf Ebrahimi
10043*22dc650dSSadaf Ebrahimi if (*cc == OP_REVERSE)
10044*22dc650dSSadaf Ebrahimi {
10045*22dc650dSSadaf Ebrahimi reverse_failed = &parent->own_backtracks;
10046*22dc650dSSadaf Ebrahimi lmin = GET2(cc, 1);
10047*22dc650dSSadaf Ebrahimi lmax = lmin;
10048*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
10049*22dc650dSSadaf Ebrahimi
10050*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(lmin > 0);
10051*22dc650dSSadaf Ebrahimi }
10052*22dc650dSSadaf Ebrahimi else
10053*22dc650dSSadaf Ebrahimi {
10054*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == OP_VREVERSE);
10055*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
10056*22dc650dSSadaf Ebrahimi
10057*22dc650dSSadaf Ebrahimi reverse_failed = &backtrack->own_backtracks;
10058*22dc650dSSadaf Ebrahimi lmin = GET2(cc, 1);
10059*22dc650dSSadaf Ebrahimi lmax = GET2(cc, 1 + IMM2_SIZE);
10060*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
10061*22dc650dSSadaf Ebrahimi
10062*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(lmin < lmax);
10063*22dc650dSSadaf Ebrahimi }
10064*22dc650dSSadaf Ebrahimi
10065*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
10066*22dc650dSSadaf Ebrahimi {
10067*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10068*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10069*22dc650dSSadaf Ebrahimi }
10070*22dc650dSSadaf Ebrahimi else
10071*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10072*22dc650dSSadaf Ebrahimi
10073*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
10074*22dc650dSSadaf Ebrahimi if (common->utf)
10075*22dc650dSSadaf Ebrahimi {
10076*22dc650dSSadaf Ebrahimi if (lmin > 0)
10077*22dc650dSSadaf Ebrahimi {
10078*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10079*22dc650dSSadaf Ebrahimi label = LABEL();
10080*22dc650dSSadaf Ebrahimi add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10081*22dc650dSSadaf Ebrahimi move_back(common, reverse_failed, FALSE);
10082*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10083*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
10084*22dc650dSSadaf Ebrahimi }
10085*22dc650dSSadaf Ebrahimi
10086*22dc650dSSadaf Ebrahimi if (lmin < lmax)
10087*22dc650dSSadaf Ebrahimi {
10088*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10089*22dc650dSSadaf Ebrahimi
10090*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10091*22dc650dSSadaf Ebrahimi label = LABEL();
10092*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10093*22dc650dSSadaf Ebrahimi move_back(common, reverse_failed, FALSE);
10094*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10095*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
10096*22dc650dSSadaf Ebrahimi
10097*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
10098*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10099*22dc650dSSadaf Ebrahimi }
10100*22dc650dSSadaf Ebrahimi }
10101*22dc650dSSadaf Ebrahimi else
10102*22dc650dSSadaf Ebrahimi #endif
10103*22dc650dSSadaf Ebrahimi {
10104*22dc650dSSadaf Ebrahimi if (lmin > 0)
10105*22dc650dSSadaf Ebrahimi {
10106*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10107*22dc650dSSadaf Ebrahimi add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10108*22dc650dSSadaf Ebrahimi }
10109*22dc650dSSadaf Ebrahimi
10110*22dc650dSSadaf Ebrahimi if (lmin < lmax)
10111*22dc650dSSadaf Ebrahimi {
10112*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10113*22dc650dSSadaf Ebrahimi
10114*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10115*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10116*22dc650dSSadaf Ebrahimi SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10117*22dc650dSSadaf Ebrahimi
10118*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10119*22dc650dSSadaf Ebrahimi }
10120*22dc650dSSadaf Ebrahimi }
10121*22dc650dSSadaf Ebrahimi
10122*22dc650dSSadaf Ebrahimi check_start_used_ptr(common);
10123*22dc650dSSadaf Ebrahimi
10124*22dc650dSSadaf Ebrahimi if (lmin < lmax)
10125*22dc650dSSadaf Ebrahimi BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10126*22dc650dSSadaf Ebrahimi
10127*22dc650dSSadaf Ebrahimi return cc;
10128*22dc650dSSadaf Ebrahimi }
10129*22dc650dSSadaf Ebrahimi
assert_needs_str_ptr_saving(PCRE2_SPTR cc)10130*22dc650dSSadaf Ebrahimi static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10131*22dc650dSSadaf Ebrahimi {
10132*22dc650dSSadaf Ebrahimi while (TRUE)
10133*22dc650dSSadaf Ebrahimi {
10134*22dc650dSSadaf Ebrahimi switch (*cc)
10135*22dc650dSSadaf Ebrahimi {
10136*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
10137*22dc650dSSadaf Ebrahimi cc += GET(cc, 1 + 2*LINK_SIZE);
10138*22dc650dSSadaf Ebrahimi break;
10139*22dc650dSSadaf Ebrahimi
10140*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
10141*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
10142*22dc650dSSadaf Ebrahimi case OP_CIRC:
10143*22dc650dSSadaf Ebrahimi case OP_CIRCM:
10144*22dc650dSSadaf Ebrahimi case OP_DOLL:
10145*22dc650dSSadaf Ebrahimi case OP_DOLLM:
10146*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
10147*22dc650dSSadaf Ebrahimi case OP_ALT:
10148*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
10149*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
10150*22dc650dSSadaf Ebrahimi cc += PRIV(OP_lengths)[*cc];
10151*22dc650dSSadaf Ebrahimi break;
10152*22dc650dSSadaf Ebrahimi
10153*22dc650dSSadaf Ebrahimi case OP_KET:
10154*22dc650dSSadaf Ebrahimi return FALSE;
10155*22dc650dSSadaf Ebrahimi
10156*22dc650dSSadaf Ebrahimi default:
10157*22dc650dSSadaf Ebrahimi return TRUE;
10158*22dc650dSSadaf Ebrahimi }
10159*22dc650dSSadaf Ebrahimi }
10160*22dc650dSSadaf Ebrahimi }
10161*22dc650dSSadaf Ebrahimi
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)10162*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10163*22dc650dSSadaf Ebrahimi {
10164*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10165*22dc650dSSadaf Ebrahimi int framesize;
10166*22dc650dSSadaf Ebrahimi int extrasize;
10167*22dc650dSSadaf Ebrahimi BOOL local_quit_available = FALSE;
10168*22dc650dSSadaf Ebrahimi BOOL needs_control_head;
10169*22dc650dSSadaf Ebrahimi BOOL end_block_size = 0;
10170*22dc650dSSadaf Ebrahimi BOOL has_vreverse;
10171*22dc650dSSadaf Ebrahimi int private_data_ptr;
10172*22dc650dSSadaf Ebrahimi backtrack_common altbacktrack;
10173*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin;
10174*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode;
10175*22dc650dSSadaf Ebrahimi PCRE2_UCHAR bra = OP_BRA;
10176*22dc650dSSadaf Ebrahimi jump_list *tmp = NULL;
10177*22dc650dSSadaf Ebrahimi jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10178*22dc650dSSadaf Ebrahimi jump_list **found;
10179*22dc650dSSadaf Ebrahimi /* Saving previous accept variables. */
10180*22dc650dSSadaf Ebrahimi BOOL save_local_quit_available = common->local_quit_available;
10181*22dc650dSSadaf Ebrahimi BOOL save_in_positive_assertion = common->in_positive_assertion;
10182*22dc650dSSadaf Ebrahimi then_trap_backtrack *save_then_trap = common->then_trap;
10183*22dc650dSSadaf Ebrahimi struct sljit_label *save_quit_label = common->quit_label;
10184*22dc650dSSadaf Ebrahimi struct sljit_label *save_accept_label = common->accept_label;
10185*22dc650dSSadaf Ebrahimi jump_list *save_quit = common->quit;
10186*22dc650dSSadaf Ebrahimi jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10187*22dc650dSSadaf Ebrahimi jump_list *save_accept = common->accept;
10188*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
10189*22dc650dSSadaf Ebrahimi struct sljit_jump *brajump = NULL;
10190*22dc650dSSadaf Ebrahimi
10191*22dc650dSSadaf Ebrahimi /* Assert captures then. */
10192*22dc650dSSadaf Ebrahimi common->then_trap = NULL;
10193*22dc650dSSadaf Ebrahimi
10194*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10195*22dc650dSSadaf Ebrahimi {
10196*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!conditional);
10197*22dc650dSSadaf Ebrahimi bra = *cc;
10198*22dc650dSSadaf Ebrahimi cc++;
10199*22dc650dSSadaf Ebrahimi }
10200*22dc650dSSadaf Ebrahimi
10201*22dc650dSSadaf Ebrahimi private_data_ptr = PRIVATE_DATA(cc);
10202*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr != 0);
10203*22dc650dSSadaf Ebrahimi framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10204*22dc650dSSadaf Ebrahimi backtrack->framesize = framesize;
10205*22dc650dSSadaf Ebrahimi backtrack->private_data_ptr = private_data_ptr;
10206*22dc650dSSadaf Ebrahimi opcode = *cc;
10207*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10208*22dc650dSSadaf Ebrahimi found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10209*22dc650dSSadaf Ebrahimi ccbegin = cc;
10210*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
10211*22dc650dSSadaf Ebrahimi
10212*22dc650dSSadaf Ebrahimi if (bra == OP_BRAMINZERO)
10213*22dc650dSSadaf Ebrahimi {
10214*22dc650dSSadaf Ebrahimi /* This is a braminzero backtrack path. */
10215*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10216*22dc650dSSadaf Ebrahimi free_stack(common, 1);
10217*22dc650dSSadaf Ebrahimi brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10218*22dc650dSSadaf Ebrahimi }
10219*22dc650dSSadaf Ebrahimi
10220*22dc650dSSadaf Ebrahimi if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10221*22dc650dSSadaf Ebrahimi end_block_size = 3;
10222*22dc650dSSadaf Ebrahimi
10223*22dc650dSSadaf Ebrahimi if (framesize < 0)
10224*22dc650dSSadaf Ebrahimi {
10225*22dc650dSSadaf Ebrahimi extrasize = 1;
10226*22dc650dSSadaf Ebrahimi if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10227*22dc650dSSadaf Ebrahimi extrasize = 0;
10228*22dc650dSSadaf Ebrahimi
10229*22dc650dSSadaf Ebrahimi extrasize += end_block_size;
10230*22dc650dSSadaf Ebrahimi
10231*22dc650dSSadaf Ebrahimi if (needs_control_head)
10232*22dc650dSSadaf Ebrahimi extrasize++;
10233*22dc650dSSadaf Ebrahimi
10234*22dc650dSSadaf Ebrahimi if (framesize == no_frame)
10235*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10236*22dc650dSSadaf Ebrahimi
10237*22dc650dSSadaf Ebrahimi if (extrasize > 0)
10238*22dc650dSSadaf Ebrahimi allocate_stack(common, extrasize);
10239*22dc650dSSadaf Ebrahimi
10240*22dc650dSSadaf Ebrahimi if (needs_control_head)
10241*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10242*22dc650dSSadaf Ebrahimi
10243*22dc650dSSadaf Ebrahimi if (extrasize > 0)
10244*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10245*22dc650dSSadaf Ebrahimi
10246*22dc650dSSadaf Ebrahimi if (needs_control_head)
10247*22dc650dSSadaf Ebrahimi {
10248*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(extrasize == end_block_size + 2);
10249*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10250*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10251*22dc650dSSadaf Ebrahimi }
10252*22dc650dSSadaf Ebrahimi }
10253*22dc650dSSadaf Ebrahimi else
10254*22dc650dSSadaf Ebrahimi {
10255*22dc650dSSadaf Ebrahimi extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10256*22dc650dSSadaf Ebrahimi
10257*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10258*22dc650dSSadaf Ebrahimi allocate_stack(common, framesize + extrasize);
10259*22dc650dSSadaf Ebrahimi
10260*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10261*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10262*22dc650dSSadaf Ebrahimi if (needs_control_head)
10263*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10264*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10265*22dc650dSSadaf Ebrahimi
10266*22dc650dSSadaf Ebrahimi if (needs_control_head)
10267*22dc650dSSadaf Ebrahimi {
10268*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10269*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10270*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10271*22dc650dSSadaf Ebrahimi }
10272*22dc650dSSadaf Ebrahimi else
10273*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10274*22dc650dSSadaf Ebrahimi
10275*22dc650dSSadaf Ebrahimi init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10276*22dc650dSSadaf Ebrahimi }
10277*22dc650dSSadaf Ebrahimi
10278*22dc650dSSadaf Ebrahimi if (end_block_size > 0)
10279*22dc650dSSadaf Ebrahimi {
10280*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10281*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10282*22dc650dSSadaf Ebrahimi }
10283*22dc650dSSadaf Ebrahimi
10284*22dc650dSSadaf Ebrahimi memset(&altbacktrack, 0, sizeof(backtrack_common));
10285*22dc650dSSadaf Ebrahimi if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10286*22dc650dSSadaf Ebrahimi {
10287*22dc650dSSadaf Ebrahimi /* Control verbs cannot escape from these asserts. */
10288*22dc650dSSadaf Ebrahimi local_quit_available = TRUE;
10289*22dc650dSSadaf Ebrahimi common->local_quit_available = TRUE;
10290*22dc650dSSadaf Ebrahimi common->quit_label = NULL;
10291*22dc650dSSadaf Ebrahimi common->quit = NULL;
10292*22dc650dSSadaf Ebrahimi }
10293*22dc650dSSadaf Ebrahimi
10294*22dc650dSSadaf Ebrahimi common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10295*22dc650dSSadaf Ebrahimi common->positive_assertion_quit = NULL;
10296*22dc650dSSadaf Ebrahimi
10297*22dc650dSSadaf Ebrahimi while (1)
10298*22dc650dSSadaf Ebrahimi {
10299*22dc650dSSadaf Ebrahimi common->accept_label = NULL;
10300*22dc650dSSadaf Ebrahimi common->accept = NULL;
10301*22dc650dSSadaf Ebrahimi altbacktrack.top = NULL;
10302*22dc650dSSadaf Ebrahimi altbacktrack.own_backtracks = NULL;
10303*22dc650dSSadaf Ebrahimi
10304*22dc650dSSadaf Ebrahimi if (*ccbegin == OP_ALT && extrasize > 0)
10305*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10306*22dc650dSSadaf Ebrahimi
10307*22dc650dSSadaf Ebrahimi altbacktrack.cc = ccbegin;
10308*22dc650dSSadaf Ebrahimi ccbegin += 1 + LINK_SIZE;
10309*22dc650dSSadaf Ebrahimi
10310*22dc650dSSadaf Ebrahimi has_vreverse = (*ccbegin == OP_VREVERSE);
10311*22dc650dSSadaf Ebrahimi if (*ccbegin == OP_REVERSE || has_vreverse)
10312*22dc650dSSadaf Ebrahimi ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10313*22dc650dSSadaf Ebrahimi
10314*22dc650dSSadaf Ebrahimi compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10315*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10316*22dc650dSSadaf Ebrahimi {
10317*22dc650dSSadaf Ebrahimi if (local_quit_available)
10318*22dc650dSSadaf Ebrahimi {
10319*22dc650dSSadaf Ebrahimi common->local_quit_available = save_local_quit_available;
10320*22dc650dSSadaf Ebrahimi common->quit_label = save_quit_label;
10321*22dc650dSSadaf Ebrahimi common->quit = save_quit;
10322*22dc650dSSadaf Ebrahimi }
10323*22dc650dSSadaf Ebrahimi common->in_positive_assertion = save_in_positive_assertion;
10324*22dc650dSSadaf Ebrahimi common->then_trap = save_then_trap;
10325*22dc650dSSadaf Ebrahimi common->accept_label = save_accept_label;
10326*22dc650dSSadaf Ebrahimi common->positive_assertion_quit = save_positive_assertion_quit;
10327*22dc650dSSadaf Ebrahimi common->accept = save_accept;
10328*22dc650dSSadaf Ebrahimi return NULL;
10329*22dc650dSSadaf Ebrahimi }
10330*22dc650dSSadaf Ebrahimi
10331*22dc650dSSadaf Ebrahimi if (has_vreverse)
10332*22dc650dSSadaf Ebrahimi {
10333*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(altbacktrack.top != NULL);
10334*22dc650dSSadaf Ebrahimi add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10335*22dc650dSSadaf Ebrahimi }
10336*22dc650dSSadaf Ebrahimi
10337*22dc650dSSadaf Ebrahimi common->accept_label = LABEL();
10338*22dc650dSSadaf Ebrahimi if (common->accept != NULL)
10339*22dc650dSSadaf Ebrahimi set_jumps(common->accept, common->accept_label);
10340*22dc650dSSadaf Ebrahimi
10341*22dc650dSSadaf Ebrahimi /* Reset stack. */
10342*22dc650dSSadaf Ebrahimi if (framesize < 0)
10343*22dc650dSSadaf Ebrahimi {
10344*22dc650dSSadaf Ebrahimi if (framesize == no_frame)
10345*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10346*22dc650dSSadaf Ebrahimi else if (extrasize > 0)
10347*22dc650dSSadaf Ebrahimi free_stack(common, extrasize);
10348*22dc650dSSadaf Ebrahimi
10349*22dc650dSSadaf Ebrahimi if (end_block_size > 0)
10350*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10351*22dc650dSSadaf Ebrahimi
10352*22dc650dSSadaf Ebrahimi if (needs_control_head)
10353*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10354*22dc650dSSadaf Ebrahimi }
10355*22dc650dSSadaf Ebrahimi else
10356*22dc650dSSadaf Ebrahimi {
10357*22dc650dSSadaf Ebrahimi if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10358*22dc650dSSadaf Ebrahimi {
10359*22dc650dSSadaf Ebrahimi /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10360*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10361*22dc650dSSadaf Ebrahimi
10362*22dc650dSSadaf Ebrahimi if (end_block_size > 0)
10363*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10364*22dc650dSSadaf Ebrahimi
10365*22dc650dSSadaf Ebrahimi if (needs_control_head)
10366*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10367*22dc650dSSadaf Ebrahimi }
10368*22dc650dSSadaf Ebrahimi else
10369*22dc650dSSadaf Ebrahimi {
10370*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10371*22dc650dSSadaf Ebrahimi
10372*22dc650dSSadaf Ebrahimi if (end_block_size > 0)
10373*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10374*22dc650dSSadaf Ebrahimi
10375*22dc650dSSadaf Ebrahimi if (needs_control_head)
10376*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10377*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10378*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10379*22dc650dSSadaf Ebrahimi }
10380*22dc650dSSadaf Ebrahimi }
10381*22dc650dSSadaf Ebrahimi
10382*22dc650dSSadaf Ebrahimi if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10383*22dc650dSSadaf Ebrahimi {
10384*22dc650dSSadaf Ebrahimi /* We know that STR_PTR was stored on the top of the stack. */
10385*22dc650dSSadaf Ebrahimi if (conditional)
10386*22dc650dSSadaf Ebrahimi {
10387*22dc650dSSadaf Ebrahimi if (extrasize > 0)
10388*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10389*22dc650dSSadaf Ebrahimi }
10390*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAZERO)
10391*22dc650dSSadaf Ebrahimi {
10392*22dc650dSSadaf Ebrahimi if (framesize < 0)
10393*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10394*22dc650dSSadaf Ebrahimi else
10395*22dc650dSSadaf Ebrahimi {
10396*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10397*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10398*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10399*22dc650dSSadaf Ebrahimi }
10400*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10401*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10402*22dc650dSSadaf Ebrahimi }
10403*22dc650dSSadaf Ebrahimi else if (framesize >= 0)
10404*22dc650dSSadaf Ebrahimi {
10405*22dc650dSSadaf Ebrahimi /* For OP_BRA and OP_BRAMINZERO. */
10406*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10407*22dc650dSSadaf Ebrahimi }
10408*22dc650dSSadaf Ebrahimi }
10409*22dc650dSSadaf Ebrahimi add_jump(compiler, found, JUMP(SLJIT_JUMP));
10410*22dc650dSSadaf Ebrahimi
10411*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, altbacktrack.top);
10412*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10413*22dc650dSSadaf Ebrahimi {
10414*22dc650dSSadaf Ebrahimi if (local_quit_available)
10415*22dc650dSSadaf Ebrahimi {
10416*22dc650dSSadaf Ebrahimi common->local_quit_available = save_local_quit_available;
10417*22dc650dSSadaf Ebrahimi common->quit_label = save_quit_label;
10418*22dc650dSSadaf Ebrahimi common->quit = save_quit;
10419*22dc650dSSadaf Ebrahimi }
10420*22dc650dSSadaf Ebrahimi common->in_positive_assertion = save_in_positive_assertion;
10421*22dc650dSSadaf Ebrahimi common->then_trap = save_then_trap;
10422*22dc650dSSadaf Ebrahimi common->accept_label = save_accept_label;
10423*22dc650dSSadaf Ebrahimi common->positive_assertion_quit = save_positive_assertion_quit;
10424*22dc650dSSadaf Ebrahimi common->accept = save_accept;
10425*22dc650dSSadaf Ebrahimi return NULL;
10426*22dc650dSSadaf Ebrahimi }
10427*22dc650dSSadaf Ebrahimi set_jumps(altbacktrack.own_backtracks, LABEL());
10428*22dc650dSSadaf Ebrahimi
10429*22dc650dSSadaf Ebrahimi if (*cc != OP_ALT)
10430*22dc650dSSadaf Ebrahimi break;
10431*22dc650dSSadaf Ebrahimi
10432*22dc650dSSadaf Ebrahimi ccbegin = cc;
10433*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
10434*22dc650dSSadaf Ebrahimi }
10435*22dc650dSSadaf Ebrahimi
10436*22dc650dSSadaf Ebrahimi if (local_quit_available)
10437*22dc650dSSadaf Ebrahimi {
10438*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10439*22dc650dSSadaf Ebrahimi /* Makes the check less complicated below. */
10440*22dc650dSSadaf Ebrahimi common->positive_assertion_quit = common->quit;
10441*22dc650dSSadaf Ebrahimi }
10442*22dc650dSSadaf Ebrahimi
10443*22dc650dSSadaf Ebrahimi /* None of them matched. */
10444*22dc650dSSadaf Ebrahimi if (common->positive_assertion_quit != NULL)
10445*22dc650dSSadaf Ebrahimi {
10446*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
10447*22dc650dSSadaf Ebrahimi set_jumps(common->positive_assertion_quit, LABEL());
10448*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(framesize != no_stack);
10449*22dc650dSSadaf Ebrahimi if (framesize < 0)
10450*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10451*22dc650dSSadaf Ebrahimi else
10452*22dc650dSSadaf Ebrahimi {
10453*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10455*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10456*22dc650dSSadaf Ebrahimi }
10457*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
10458*22dc650dSSadaf Ebrahimi }
10459*22dc650dSSadaf Ebrahimi
10460*22dc650dSSadaf Ebrahimi if (end_block_size > 0)
10461*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10462*22dc650dSSadaf Ebrahimi
10463*22dc650dSSadaf Ebrahimi if (needs_control_head)
10464*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10465*22dc650dSSadaf Ebrahimi
10466*22dc650dSSadaf Ebrahimi if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10467*22dc650dSSadaf Ebrahimi {
10468*22dc650dSSadaf Ebrahimi /* Assert is failed. */
10469*22dc650dSSadaf Ebrahimi if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10470*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10471*22dc650dSSadaf Ebrahimi
10472*22dc650dSSadaf Ebrahimi if (framesize < 0)
10473*22dc650dSSadaf Ebrahimi {
10474*22dc650dSSadaf Ebrahimi /* The topmost item should be 0. */
10475*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10476*22dc650dSSadaf Ebrahimi {
10477*22dc650dSSadaf Ebrahimi if (extrasize >= 2)
10478*22dc650dSSadaf Ebrahimi free_stack(common, extrasize - 1);
10479*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10480*22dc650dSSadaf Ebrahimi }
10481*22dc650dSSadaf Ebrahimi else if (extrasize > 0)
10482*22dc650dSSadaf Ebrahimi free_stack(common, extrasize);
10483*22dc650dSSadaf Ebrahimi }
10484*22dc650dSSadaf Ebrahimi else
10485*22dc650dSSadaf Ebrahimi {
10486*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10487*22dc650dSSadaf Ebrahimi /* The topmost item should be 0. */
10488*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10489*22dc650dSSadaf Ebrahimi {
10490*22dc650dSSadaf Ebrahimi free_stack(common, framesize + extrasize - 1);
10491*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10492*22dc650dSSadaf Ebrahimi }
10493*22dc650dSSadaf Ebrahimi else
10494*22dc650dSSadaf Ebrahimi free_stack(common, framesize + extrasize);
10495*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10496*22dc650dSSadaf Ebrahimi }
10497*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
10498*22dc650dSSadaf Ebrahimi if (bra != OP_BRAZERO)
10499*22dc650dSSadaf Ebrahimi add_jump(compiler, target, jump);
10500*22dc650dSSadaf Ebrahimi
10501*22dc650dSSadaf Ebrahimi /* Assert is successful. */
10502*22dc650dSSadaf Ebrahimi set_jumps(tmp, LABEL());
10503*22dc650dSSadaf Ebrahimi if (framesize < 0)
10504*22dc650dSSadaf Ebrahimi {
10505*22dc650dSSadaf Ebrahimi /* We know that STR_PTR was stored on the top of the stack. */
10506*22dc650dSSadaf Ebrahimi if (extrasize > 0)
10507*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10508*22dc650dSSadaf Ebrahimi
10509*22dc650dSSadaf Ebrahimi /* Keep the STR_PTR on the top of the stack. */
10510*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10511*22dc650dSSadaf Ebrahimi {
10512*22dc650dSSadaf Ebrahimi /* This allocation is always successful. */
10513*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10514*22dc650dSSadaf Ebrahimi if (extrasize >= 2)
10515*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10516*22dc650dSSadaf Ebrahimi }
10517*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAMINZERO)
10518*22dc650dSSadaf Ebrahimi {
10519*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10520*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10521*22dc650dSSadaf Ebrahimi }
10522*22dc650dSSadaf Ebrahimi }
10523*22dc650dSSadaf Ebrahimi else
10524*22dc650dSSadaf Ebrahimi {
10525*22dc650dSSadaf Ebrahimi if (bra == OP_BRA)
10526*22dc650dSSadaf Ebrahimi {
10527*22dc650dSSadaf Ebrahimi /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10528*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10529*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10530*22dc650dSSadaf Ebrahimi }
10531*22dc650dSSadaf Ebrahimi else
10532*22dc650dSSadaf Ebrahimi {
10533*22dc650dSSadaf Ebrahimi /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10534*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10535*22dc650dSSadaf Ebrahimi
10536*22dc650dSSadaf Ebrahimi if (extrasize == 2 + end_block_size)
10537*22dc650dSSadaf Ebrahimi {
10538*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10539*22dc650dSSadaf Ebrahimi if (bra == OP_BRAMINZERO)
10540*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10541*22dc650dSSadaf Ebrahimi }
10542*22dc650dSSadaf Ebrahimi else
10543*22dc650dSSadaf Ebrahimi {
10544*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(extrasize == 3 + end_block_size);
10545*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10546*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10547*22dc650dSSadaf Ebrahimi }
10548*22dc650dSSadaf Ebrahimi }
10549*22dc650dSSadaf Ebrahimi }
10550*22dc650dSSadaf Ebrahimi
10551*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10552*22dc650dSSadaf Ebrahimi {
10553*22dc650dSSadaf Ebrahimi backtrack->matchingpath = LABEL();
10554*22dc650dSSadaf Ebrahimi SET_LABEL(jump, backtrack->matchingpath);
10555*22dc650dSSadaf Ebrahimi }
10556*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAMINZERO)
10557*22dc650dSSadaf Ebrahimi {
10558*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10559*22dc650dSSadaf Ebrahimi JUMPHERE(brajump);
10560*22dc650dSSadaf Ebrahimi if (framesize >= 0)
10561*22dc650dSSadaf Ebrahimi {
10562*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10563*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10564*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10565*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10566*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10567*22dc650dSSadaf Ebrahimi }
10568*22dc650dSSadaf Ebrahimi set_jumps(backtrack->common.own_backtracks, LABEL());
10569*22dc650dSSadaf Ebrahimi }
10570*22dc650dSSadaf Ebrahimi }
10571*22dc650dSSadaf Ebrahimi else
10572*22dc650dSSadaf Ebrahimi {
10573*22dc650dSSadaf Ebrahimi /* AssertNot is successful. */
10574*22dc650dSSadaf Ebrahimi if (framesize < 0)
10575*22dc650dSSadaf Ebrahimi {
10576*22dc650dSSadaf Ebrahimi if (extrasize > 0)
10577*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10578*22dc650dSSadaf Ebrahimi
10579*22dc650dSSadaf Ebrahimi if (bra != OP_BRA)
10580*22dc650dSSadaf Ebrahimi {
10581*22dc650dSSadaf Ebrahimi if (extrasize >= 2)
10582*22dc650dSSadaf Ebrahimi free_stack(common, extrasize - 1);
10583*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10584*22dc650dSSadaf Ebrahimi }
10585*22dc650dSSadaf Ebrahimi else if (extrasize > 0)
10586*22dc650dSSadaf Ebrahimi free_stack(common, extrasize);
10587*22dc650dSSadaf Ebrahimi }
10588*22dc650dSSadaf Ebrahimi else
10589*22dc650dSSadaf Ebrahimi {
10590*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10591*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10592*22dc650dSSadaf Ebrahimi /* The topmost item should be 0. */
10593*22dc650dSSadaf Ebrahimi if (bra != OP_BRA)
10594*22dc650dSSadaf Ebrahimi {
10595*22dc650dSSadaf Ebrahimi free_stack(common, framesize + extrasize - 1);
10596*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10597*22dc650dSSadaf Ebrahimi }
10598*22dc650dSSadaf Ebrahimi else
10599*22dc650dSSadaf Ebrahimi free_stack(common, framesize + extrasize);
10600*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10601*22dc650dSSadaf Ebrahimi }
10602*22dc650dSSadaf Ebrahimi
10603*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10604*22dc650dSSadaf Ebrahimi backtrack->matchingpath = LABEL();
10605*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAMINZERO)
10606*22dc650dSSadaf Ebrahimi {
10607*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10608*22dc650dSSadaf Ebrahimi JUMPHERE(brajump);
10609*22dc650dSSadaf Ebrahimi }
10610*22dc650dSSadaf Ebrahimi
10611*22dc650dSSadaf Ebrahimi if (bra != OP_BRA)
10612*22dc650dSSadaf Ebrahimi {
10613*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10614*22dc650dSSadaf Ebrahimi set_jumps(backtrack->common.own_backtracks, LABEL());
10615*22dc650dSSadaf Ebrahimi backtrack->common.own_backtracks = NULL;
10616*22dc650dSSadaf Ebrahimi }
10617*22dc650dSSadaf Ebrahimi }
10618*22dc650dSSadaf Ebrahimi
10619*22dc650dSSadaf Ebrahimi if (local_quit_available)
10620*22dc650dSSadaf Ebrahimi {
10621*22dc650dSSadaf Ebrahimi common->local_quit_available = save_local_quit_available;
10622*22dc650dSSadaf Ebrahimi common->quit_label = save_quit_label;
10623*22dc650dSSadaf Ebrahimi common->quit = save_quit;
10624*22dc650dSSadaf Ebrahimi }
10625*22dc650dSSadaf Ebrahimi common->in_positive_assertion = save_in_positive_assertion;
10626*22dc650dSSadaf Ebrahimi common->then_trap = save_then_trap;
10627*22dc650dSSadaf Ebrahimi common->accept_label = save_accept_label;
10628*22dc650dSSadaf Ebrahimi common->positive_assertion_quit = save_positive_assertion_quit;
10629*22dc650dSSadaf Ebrahimi common->accept = save_accept;
10630*22dc650dSSadaf Ebrahimi return cc + 1 + LINK_SIZE;
10631*22dc650dSSadaf Ebrahimi }
10632*22dc650dSSadaf Ebrahimi
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10633*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10634*22dc650dSSadaf Ebrahimi {
10635*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10636*22dc650dSSadaf Ebrahimi int stacksize;
10637*22dc650dSSadaf Ebrahimi
10638*22dc650dSSadaf Ebrahimi if (framesize < 0)
10639*22dc650dSSadaf Ebrahimi {
10640*22dc650dSSadaf Ebrahimi if (framesize == no_frame)
10641*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10642*22dc650dSSadaf Ebrahimi else
10643*22dc650dSSadaf Ebrahimi {
10644*22dc650dSSadaf Ebrahimi stacksize = needs_control_head ? 1 : 0;
10645*22dc650dSSadaf Ebrahimi if (ket != OP_KET || has_alternatives)
10646*22dc650dSSadaf Ebrahimi stacksize++;
10647*22dc650dSSadaf Ebrahimi
10648*22dc650dSSadaf Ebrahimi if (stacksize > 0)
10649*22dc650dSSadaf Ebrahimi free_stack(common, stacksize);
10650*22dc650dSSadaf Ebrahimi }
10651*22dc650dSSadaf Ebrahimi
10652*22dc650dSSadaf Ebrahimi if (needs_control_head)
10653*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10654*22dc650dSSadaf Ebrahimi
10655*22dc650dSSadaf Ebrahimi /* TMP2 which is set here used by OP_KETRMAX below. */
10656*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX)
10657*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10658*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMIN)
10659*22dc650dSSadaf Ebrahimi {
10660*22dc650dSSadaf Ebrahimi /* Move the STR_PTR to the private_data_ptr. */
10661*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10662*22dc650dSSadaf Ebrahimi }
10663*22dc650dSSadaf Ebrahimi }
10664*22dc650dSSadaf Ebrahimi else
10665*22dc650dSSadaf Ebrahimi {
10666*22dc650dSSadaf Ebrahimi stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10667*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10668*22dc650dSSadaf Ebrahimi if (needs_control_head)
10669*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10670*22dc650dSSadaf Ebrahimi
10671*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX)
10672*22dc650dSSadaf Ebrahimi {
10673*22dc650dSSadaf Ebrahimi /* TMP2 which is set here used by OP_KETRMAX below. */
10674*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10675*22dc650dSSadaf Ebrahimi }
10676*22dc650dSSadaf Ebrahimi }
10677*22dc650dSSadaf Ebrahimi if (needs_control_head)
10678*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10679*22dc650dSSadaf Ebrahimi }
10680*22dc650dSSadaf Ebrahimi
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10681*22dc650dSSadaf Ebrahimi static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10682*22dc650dSSadaf Ebrahimi {
10683*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10684*22dc650dSSadaf Ebrahimi
10685*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
10686*22dc650dSSadaf Ebrahimi {
10687*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10688*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10689*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10690*22dc650dSSadaf Ebrahimi stacksize++;
10691*22dc650dSSadaf Ebrahimi }
10692*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset >> 1] == 0)
10693*22dc650dSSadaf Ebrahimi {
10694*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10695*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10696*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10697*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10698*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10699*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10700*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10701*22dc650dSSadaf Ebrahimi stacksize += 2;
10702*22dc650dSSadaf Ebrahimi }
10703*22dc650dSSadaf Ebrahimi return stacksize;
10704*22dc650dSSadaf Ebrahimi }
10705*22dc650dSSadaf Ebrahimi
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10706*22dc650dSSadaf Ebrahimi static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10707*22dc650dSSadaf Ebrahimi {
10708*22dc650dSSadaf Ebrahimi if (PRIV(script_run)(ptr, endptr, FALSE))
10709*22dc650dSSadaf Ebrahimi return endptr;
10710*22dc650dSSadaf Ebrahimi return NULL;
10711*22dc650dSSadaf Ebrahimi }
10712*22dc650dSSadaf Ebrahimi
10713*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
10714*22dc650dSSadaf Ebrahimi
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10715*22dc650dSSadaf Ebrahimi static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10716*22dc650dSSadaf Ebrahimi {
10717*22dc650dSSadaf Ebrahimi if (PRIV(script_run)(ptr, endptr, TRUE))
10718*22dc650dSSadaf Ebrahimi return endptr;
10719*22dc650dSSadaf Ebrahimi return NULL;
10720*22dc650dSSadaf Ebrahimi }
10721*22dc650dSSadaf Ebrahimi
10722*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
10723*22dc650dSSadaf Ebrahimi
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10724*22dc650dSSadaf Ebrahimi static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10725*22dc650dSSadaf Ebrahimi {
10726*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10727*22dc650dSSadaf Ebrahimi
10728*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10729*22dc650dSSadaf Ebrahimi
10730*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10731*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
10732*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10733*22dc650dSSadaf Ebrahimi common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10734*22dc650dSSadaf Ebrahimi #else
10735*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10736*22dc650dSSadaf Ebrahimi #endif
10737*22dc650dSSadaf Ebrahimi
10738*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10739*22dc650dSSadaf Ebrahimi add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10740*22dc650dSSadaf Ebrahimi }
10741*22dc650dSSadaf Ebrahimi
10742*22dc650dSSadaf Ebrahimi /*
10743*22dc650dSSadaf Ebrahimi Handling bracketed expressions is probably the most complex part.
10744*22dc650dSSadaf Ebrahimi
10745*22dc650dSSadaf Ebrahimi Stack layout naming characters:
10746*22dc650dSSadaf Ebrahimi S - Push the current STR_PTR
10747*22dc650dSSadaf Ebrahimi 0 - Push a 0 (NULL)
10748*22dc650dSSadaf Ebrahimi A - Push the current STR_PTR. Needed for restoring the STR_PTR
10749*22dc650dSSadaf Ebrahimi before the next alternative. Not pushed if there are no alternatives.
10750*22dc650dSSadaf Ebrahimi M - Any values pushed by the current alternative. Can be empty, or anything.
10751*22dc650dSSadaf Ebrahimi C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10752*22dc650dSSadaf Ebrahimi L - Push the previous local (pointed by localptr) to the stack
10753*22dc650dSSadaf Ebrahimi () - opional values stored on the stack
10754*22dc650dSSadaf Ebrahimi ()* - optonal, can be stored multiple times
10755*22dc650dSSadaf Ebrahimi
10756*22dc650dSSadaf Ebrahimi The following list shows the regular expression templates, their PCRE byte codes
10757*22dc650dSSadaf Ebrahimi and stack layout supported by pcre-sljit.
10758*22dc650dSSadaf Ebrahimi
10759*22dc650dSSadaf Ebrahimi (?:) OP_BRA | OP_KET A M
10760*22dc650dSSadaf Ebrahimi () OP_CBRA | OP_KET C M
10761*22dc650dSSadaf Ebrahimi (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10762*22dc650dSSadaf Ebrahimi OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10763*22dc650dSSadaf Ebrahimi (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10764*22dc650dSSadaf Ebrahimi OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10765*22dc650dSSadaf Ebrahimi ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10766*22dc650dSSadaf Ebrahimi OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10767*22dc650dSSadaf Ebrahimi ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10768*22dc650dSSadaf Ebrahimi OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10769*22dc650dSSadaf Ebrahimi (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10770*22dc650dSSadaf Ebrahimi (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10771*22dc650dSSadaf Ebrahimi ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10772*22dc650dSSadaf Ebrahimi ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10773*22dc650dSSadaf Ebrahimi (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10774*22dc650dSSadaf Ebrahimi OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10775*22dc650dSSadaf Ebrahimi (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10776*22dc650dSSadaf Ebrahimi OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10777*22dc650dSSadaf Ebrahimi ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10778*22dc650dSSadaf Ebrahimi OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10779*22dc650dSSadaf Ebrahimi ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10780*22dc650dSSadaf Ebrahimi OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10781*22dc650dSSadaf Ebrahimi
10782*22dc650dSSadaf Ebrahimi
10783*22dc650dSSadaf Ebrahimi Stack layout naming characters:
10784*22dc650dSSadaf Ebrahimi A - Push the alternative index (starting from 0) on the stack.
10785*22dc650dSSadaf Ebrahimi Not pushed if there is no alternatives.
10786*22dc650dSSadaf Ebrahimi M - Any values pushed by the current alternative. Can be empty, or anything.
10787*22dc650dSSadaf Ebrahimi
10788*22dc650dSSadaf Ebrahimi The next list shows the possible content of a bracket:
10789*22dc650dSSadaf Ebrahimi (|) OP_*BRA | OP_ALT ... M A
10790*22dc650dSSadaf Ebrahimi (?()|) OP_*COND | OP_ALT M A
10791*22dc650dSSadaf Ebrahimi (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10792*22dc650dSSadaf Ebrahimi Or nothing, if trace is unnecessary
10793*22dc650dSSadaf Ebrahimi */
10794*22dc650dSSadaf Ebrahimi
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10795*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10796*22dc650dSSadaf Ebrahimi {
10797*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
10798*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
10799*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode;
10800*22dc650dSSadaf Ebrahimi int private_data_ptr = 0;
10801*22dc650dSSadaf Ebrahimi int offset = 0;
10802*22dc650dSSadaf Ebrahimi int i, stacksize;
10803*22dc650dSSadaf Ebrahimi int repeat_ptr = 0, repeat_length = 0;
10804*22dc650dSSadaf Ebrahimi int repeat_type = 0, repeat_count = 0;
10805*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin;
10806*22dc650dSSadaf Ebrahimi PCRE2_SPTR matchingpath;
10807*22dc650dSSadaf Ebrahimi PCRE2_SPTR slot;
10808*22dc650dSSadaf Ebrahimi PCRE2_UCHAR bra = OP_BRA;
10809*22dc650dSSadaf Ebrahimi PCRE2_UCHAR ket;
10810*22dc650dSSadaf Ebrahimi assert_backtrack *assert;
10811*22dc650dSSadaf Ebrahimi BOOL has_alternatives;
10812*22dc650dSSadaf Ebrahimi BOOL needs_control_head = FALSE;
10813*22dc650dSSadaf Ebrahimi BOOL has_vreverse = FALSE;
10814*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
10815*22dc650dSSadaf Ebrahimi struct sljit_jump *skip;
10816*22dc650dSSadaf Ebrahimi struct sljit_label *rmax_label = NULL;
10817*22dc650dSSadaf Ebrahimi struct sljit_jump *braminzero = NULL;
10818*22dc650dSSadaf Ebrahimi
10819*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10820*22dc650dSSadaf Ebrahimi
10821*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10822*22dc650dSSadaf Ebrahimi {
10823*22dc650dSSadaf Ebrahimi bra = *cc;
10824*22dc650dSSadaf Ebrahimi cc++;
10825*22dc650dSSadaf Ebrahimi opcode = *cc;
10826*22dc650dSSadaf Ebrahimi }
10827*22dc650dSSadaf Ebrahimi
10828*22dc650dSSadaf Ebrahimi opcode = *cc;
10829*22dc650dSSadaf Ebrahimi ccbegin = cc;
10830*22dc650dSSadaf Ebrahimi matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10831*22dc650dSSadaf Ebrahimi ket = *matchingpath;
10832*22dc650dSSadaf Ebrahimi if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10833*22dc650dSSadaf Ebrahimi {
10834*22dc650dSSadaf Ebrahimi repeat_ptr = PRIVATE_DATA(matchingpath);
10835*22dc650dSSadaf Ebrahimi repeat_length = PRIVATE_DATA(matchingpath + 1);
10836*22dc650dSSadaf Ebrahimi repeat_type = PRIVATE_DATA(matchingpath + 2);
10837*22dc650dSSadaf Ebrahimi repeat_count = PRIVATE_DATA(matchingpath + 3);
10838*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10839*22dc650dSSadaf Ebrahimi if (repeat_type == OP_UPTO)
10840*22dc650dSSadaf Ebrahimi ket = OP_KETRMAX;
10841*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
10842*22dc650dSSadaf Ebrahimi ket = OP_KETRMIN;
10843*22dc650dSSadaf Ebrahimi }
10844*22dc650dSSadaf Ebrahimi
10845*22dc650dSSadaf Ebrahimi matchingpath = ccbegin + 1 + LINK_SIZE;
10846*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10847*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10848*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
10849*22dc650dSSadaf Ebrahimi
10850*22dc650dSSadaf Ebrahimi has_alternatives = *cc == OP_ALT;
10851*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10852*22dc650dSSadaf Ebrahimi {
10853*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10854*22dc650dSSadaf Ebrahimi compile_time_checks_must_be_grouped_together);
10855*22dc650dSSadaf Ebrahimi has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10856*22dc650dSSadaf Ebrahimi }
10857*22dc650dSSadaf Ebrahimi
10858*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10859*22dc650dSSadaf Ebrahimi opcode = OP_SCOND;
10860*22dc650dSSadaf Ebrahimi
10861*22dc650dSSadaf Ebrahimi if (opcode == OP_CBRA || opcode == OP_SCBRA)
10862*22dc650dSSadaf Ebrahimi {
10863*22dc650dSSadaf Ebrahimi /* Capturing brackets has a pre-allocated space. */
10864*22dc650dSSadaf Ebrahimi offset = GET2(ccbegin, 1 + LINK_SIZE);
10865*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset] == 0)
10866*22dc650dSSadaf Ebrahimi {
10867*22dc650dSSadaf Ebrahimi private_data_ptr = OVECTOR_PRIV(offset);
10868*22dc650dSSadaf Ebrahimi offset <<= 1;
10869*22dc650dSSadaf Ebrahimi }
10870*22dc650dSSadaf Ebrahimi else
10871*22dc650dSSadaf Ebrahimi {
10872*22dc650dSSadaf Ebrahimi offset <<= 1;
10873*22dc650dSSadaf Ebrahimi private_data_ptr = OVECTOR(offset);
10874*22dc650dSSadaf Ebrahimi }
10875*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10876*22dc650dSSadaf Ebrahimi matchingpath += IMM2_SIZE;
10877*22dc650dSSadaf Ebrahimi }
10878*22dc650dSSadaf Ebrahimi else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10879*22dc650dSSadaf Ebrahimi {
10880*22dc650dSSadaf Ebrahimi /* Other brackets simply allocate the next entry. */
10881*22dc650dSSadaf Ebrahimi private_data_ptr = PRIVATE_DATA(ccbegin);
10882*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr != 0);
10883*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10884*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
10885*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10886*22dc650dSSadaf Ebrahimi }
10887*22dc650dSSadaf Ebrahimi
10888*22dc650dSSadaf Ebrahimi /* Instructions before the first alternative. */
10889*22dc650dSSadaf Ebrahimi stacksize = 0;
10890*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10891*22dc650dSSadaf Ebrahimi stacksize++;
10892*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10893*22dc650dSSadaf Ebrahimi stacksize++;
10894*22dc650dSSadaf Ebrahimi
10895*22dc650dSSadaf Ebrahimi if (stacksize > 0)
10896*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
10897*22dc650dSSadaf Ebrahimi
10898*22dc650dSSadaf Ebrahimi stacksize = 0;
10899*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10900*22dc650dSSadaf Ebrahimi {
10901*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10902*22dc650dSSadaf Ebrahimi stacksize++;
10903*22dc650dSSadaf Ebrahimi }
10904*22dc650dSSadaf Ebrahimi
10905*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
10906*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10907*22dc650dSSadaf Ebrahimi
10908*22dc650dSSadaf Ebrahimi if (bra == OP_BRAMINZERO)
10909*22dc650dSSadaf Ebrahimi {
10910*22dc650dSSadaf Ebrahimi /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10911*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10912*22dc650dSSadaf Ebrahimi if (ket != OP_KETRMIN)
10913*22dc650dSSadaf Ebrahimi {
10914*22dc650dSSadaf Ebrahimi free_stack(common, 1);
10915*22dc650dSSadaf Ebrahimi braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10916*22dc650dSSadaf Ebrahimi }
10917*22dc650dSSadaf Ebrahimi else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10918*22dc650dSSadaf Ebrahimi {
10919*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10920*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10921*22dc650dSSadaf Ebrahimi /* Nothing stored during the first run. */
10922*22dc650dSSadaf Ebrahimi skip = JUMP(SLJIT_JUMP);
10923*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
10924*22dc650dSSadaf Ebrahimi /* Checking zero-length iteration. */
10925*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10926*22dc650dSSadaf Ebrahimi {
10927*22dc650dSSadaf Ebrahimi /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10928*22dc650dSSadaf Ebrahimi braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10929*22dc650dSSadaf Ebrahimi }
10930*22dc650dSSadaf Ebrahimi else
10931*22dc650dSSadaf Ebrahimi {
10932*22dc650dSSadaf Ebrahimi /* Except when the whole stack frame must be saved. */
10933*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10934*22dc650dSSadaf Ebrahimi braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10935*22dc650dSSadaf Ebrahimi }
10936*22dc650dSSadaf Ebrahimi JUMPHERE(skip);
10937*22dc650dSSadaf Ebrahimi }
10938*22dc650dSSadaf Ebrahimi else
10939*22dc650dSSadaf Ebrahimi {
10940*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10941*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10942*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
10943*22dc650dSSadaf Ebrahimi }
10944*22dc650dSSadaf Ebrahimi }
10945*22dc650dSSadaf Ebrahimi
10946*22dc650dSSadaf Ebrahimi if (repeat_type != 0)
10947*22dc650dSSadaf Ebrahimi {
10948*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10949*22dc650dSSadaf Ebrahimi if (repeat_type == OP_EXACT)
10950*22dc650dSSadaf Ebrahimi rmax_label = LABEL();
10951*22dc650dSSadaf Ebrahimi }
10952*22dc650dSSadaf Ebrahimi
10953*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMIN)
10954*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10955*22dc650dSSadaf Ebrahimi
10956*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX)
10957*22dc650dSSadaf Ebrahimi {
10958*22dc650dSSadaf Ebrahimi rmax_label = LABEL();
10959*22dc650dSSadaf Ebrahimi if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10960*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10961*22dc650dSSadaf Ebrahimi }
10962*22dc650dSSadaf Ebrahimi
10963*22dc650dSSadaf Ebrahimi /* Handling capturing brackets and alternatives. */
10964*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
10965*22dc650dSSadaf Ebrahimi {
10966*22dc650dSSadaf Ebrahimi stacksize = 0;
10967*22dc650dSSadaf Ebrahimi if (needs_control_head)
10968*22dc650dSSadaf Ebrahimi {
10969*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10970*22dc650dSSadaf Ebrahimi stacksize++;
10971*22dc650dSSadaf Ebrahimi }
10972*22dc650dSSadaf Ebrahimi
10973*22dc650dSSadaf Ebrahimi if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10974*22dc650dSSadaf Ebrahimi {
10975*22dc650dSSadaf Ebrahimi /* Neither capturing brackets nor recursions are found in the block. */
10976*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMIN)
10977*22dc650dSSadaf Ebrahimi {
10978*22dc650dSSadaf Ebrahimi stacksize += 2;
10979*22dc650dSSadaf Ebrahimi if (!needs_control_head)
10980*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10981*22dc650dSSadaf Ebrahimi }
10982*22dc650dSSadaf Ebrahimi else
10983*22dc650dSSadaf Ebrahimi {
10984*22dc650dSSadaf Ebrahimi if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10985*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10986*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX || has_alternatives)
10987*22dc650dSSadaf Ebrahimi stacksize++;
10988*22dc650dSSadaf Ebrahimi }
10989*22dc650dSSadaf Ebrahimi
10990*22dc650dSSadaf Ebrahimi if (stacksize > 0)
10991*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
10992*22dc650dSSadaf Ebrahimi
10993*22dc650dSSadaf Ebrahimi stacksize = 0;
10994*22dc650dSSadaf Ebrahimi if (needs_control_head)
10995*22dc650dSSadaf Ebrahimi {
10996*22dc650dSSadaf Ebrahimi stacksize++;
10997*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10998*22dc650dSSadaf Ebrahimi }
10999*22dc650dSSadaf Ebrahimi
11000*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMIN)
11001*22dc650dSSadaf Ebrahimi {
11002*22dc650dSSadaf Ebrahimi if (needs_control_head)
11003*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11004*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11005*22dc650dSSadaf Ebrahimi if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
11006*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
11007*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
11008*22dc650dSSadaf Ebrahimi }
11009*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMAX || has_alternatives)
11010*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11011*22dc650dSSadaf Ebrahimi }
11012*22dc650dSSadaf Ebrahimi else
11013*22dc650dSSadaf Ebrahimi {
11014*22dc650dSSadaf Ebrahimi if (ket != OP_KET || has_alternatives)
11015*22dc650dSSadaf Ebrahimi stacksize++;
11016*22dc650dSSadaf Ebrahimi
11017*22dc650dSSadaf Ebrahimi stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
11018*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
11019*22dc650dSSadaf Ebrahimi
11020*22dc650dSSadaf Ebrahimi if (needs_control_head)
11021*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11022*22dc650dSSadaf Ebrahimi
11023*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11024*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11025*22dc650dSSadaf Ebrahimi
11026*22dc650dSSadaf Ebrahimi stacksize = needs_control_head ? 1 : 0;
11027*22dc650dSSadaf Ebrahimi if (ket != OP_KET || has_alternatives)
11028*22dc650dSSadaf Ebrahimi {
11029*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11030*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11031*22dc650dSSadaf Ebrahimi stacksize++;
11032*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11033*22dc650dSSadaf Ebrahimi }
11034*22dc650dSSadaf Ebrahimi else
11035*22dc650dSSadaf Ebrahimi {
11036*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11037*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11038*22dc650dSSadaf Ebrahimi }
11039*22dc650dSSadaf Ebrahimi init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
11040*22dc650dSSadaf Ebrahimi }
11041*22dc650dSSadaf Ebrahimi }
11042*22dc650dSSadaf Ebrahimi else if (opcode == OP_CBRA || opcode == OP_SCBRA)
11043*22dc650dSSadaf Ebrahimi {
11044*22dc650dSSadaf Ebrahimi /* Saving the previous values. */
11045*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset >> 1] != 0)
11046*22dc650dSSadaf Ebrahimi {
11047*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
11048*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
11049*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11050*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11051*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11052*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11053*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11054*22dc650dSSadaf Ebrahimi }
11055*22dc650dSSadaf Ebrahimi else
11056*22dc650dSSadaf Ebrahimi {
11057*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11058*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
11059*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11060*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11061*22dc650dSSadaf Ebrahimi }
11062*22dc650dSSadaf Ebrahimi }
11063*22dc650dSSadaf Ebrahimi else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11064*22dc650dSSadaf Ebrahimi {
11065*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11066*22dc650dSSadaf Ebrahimi allocate_stack(common, 4);
11067*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11068*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11069*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11070*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11071*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11072*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11073*22dc650dSSadaf Ebrahimi
11074*22dc650dSSadaf Ebrahimi has_vreverse = (*matchingpath == OP_VREVERSE);
11075*22dc650dSSadaf Ebrahimi if (*matchingpath == OP_REVERSE || has_vreverse)
11076*22dc650dSSadaf Ebrahimi matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11077*22dc650dSSadaf Ebrahimi }
11078*22dc650dSSadaf Ebrahimi else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11079*22dc650dSSadaf Ebrahimi {
11080*22dc650dSSadaf Ebrahimi /* Saving the previous value. */
11081*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11082*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
11083*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11084*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11085*22dc650dSSadaf Ebrahimi
11086*22dc650dSSadaf Ebrahimi if (*matchingpath == OP_REVERSE)
11087*22dc650dSSadaf Ebrahimi matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11088*22dc650dSSadaf Ebrahimi }
11089*22dc650dSSadaf Ebrahimi else if (has_alternatives)
11090*22dc650dSSadaf Ebrahimi {
11091*22dc650dSSadaf Ebrahimi /* Pushing the starting string pointer. */
11092*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
11093*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11094*22dc650dSSadaf Ebrahimi }
11095*22dc650dSSadaf Ebrahimi
11096*22dc650dSSadaf Ebrahimi /* Generating code for the first alternative. */
11097*22dc650dSSadaf Ebrahimi if (opcode == OP_COND || opcode == OP_SCOND)
11098*22dc650dSSadaf Ebrahimi {
11099*22dc650dSSadaf Ebrahimi if (*matchingpath == OP_CREF)
11100*22dc650dSSadaf Ebrahimi {
11101*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(has_alternatives);
11102*22dc650dSSadaf Ebrahimi add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11103*22dc650dSSadaf Ebrahimi CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11104*22dc650dSSadaf Ebrahimi matchingpath += 1 + IMM2_SIZE;
11105*22dc650dSSadaf Ebrahimi }
11106*22dc650dSSadaf Ebrahimi else if (*matchingpath == OP_DNCREF)
11107*22dc650dSSadaf Ebrahimi {
11108*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(has_alternatives);
11109*22dc650dSSadaf Ebrahimi
11110*22dc650dSSadaf Ebrahimi i = GET2(matchingpath, 1 + IMM2_SIZE);
11111*22dc650dSSadaf Ebrahimi slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11112*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11113*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11114*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11115*22dc650dSSadaf Ebrahimi slot += common->name_entry_size;
11116*22dc650dSSadaf Ebrahimi i--;
11117*22dc650dSSadaf Ebrahimi while (i-- > 0)
11118*22dc650dSSadaf Ebrahimi {
11119*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11120*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11121*22dc650dSSadaf Ebrahimi slot += common->name_entry_size;
11122*22dc650dSSadaf Ebrahimi }
11123*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11124*22dc650dSSadaf Ebrahimi add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11125*22dc650dSSadaf Ebrahimi matchingpath += 1 + 2 * IMM2_SIZE;
11126*22dc650dSSadaf Ebrahimi }
11127*22dc650dSSadaf Ebrahimi else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11128*22dc650dSSadaf Ebrahimi {
11129*22dc650dSSadaf Ebrahimi /* Never has other case. */
11130*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11131*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!has_alternatives);
11132*22dc650dSSadaf Ebrahimi
11133*22dc650dSSadaf Ebrahimi if (*matchingpath == OP_TRUE)
11134*22dc650dSSadaf Ebrahimi {
11135*22dc650dSSadaf Ebrahimi stacksize = 1;
11136*22dc650dSSadaf Ebrahimi matchingpath++;
11137*22dc650dSSadaf Ebrahimi }
11138*22dc650dSSadaf Ebrahimi else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11139*22dc650dSSadaf Ebrahimi stacksize = 0;
11140*22dc650dSSadaf Ebrahimi else if (*matchingpath == OP_RREF)
11141*22dc650dSSadaf Ebrahimi {
11142*22dc650dSSadaf Ebrahimi stacksize = GET2(matchingpath, 1);
11143*22dc650dSSadaf Ebrahimi if (common->currententry == NULL)
11144*22dc650dSSadaf Ebrahimi stacksize = 0;
11145*22dc650dSSadaf Ebrahimi else if (stacksize == RREF_ANY)
11146*22dc650dSSadaf Ebrahimi stacksize = 1;
11147*22dc650dSSadaf Ebrahimi else if (common->currententry->start == 0)
11148*22dc650dSSadaf Ebrahimi stacksize = stacksize == 0;
11149*22dc650dSSadaf Ebrahimi else
11150*22dc650dSSadaf Ebrahimi stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11151*22dc650dSSadaf Ebrahimi
11152*22dc650dSSadaf Ebrahimi if (stacksize != 0)
11153*22dc650dSSadaf Ebrahimi matchingpath += 1 + IMM2_SIZE;
11154*22dc650dSSadaf Ebrahimi }
11155*22dc650dSSadaf Ebrahimi else
11156*22dc650dSSadaf Ebrahimi {
11157*22dc650dSSadaf Ebrahimi if (common->currententry == NULL || common->currententry->start == 0)
11158*22dc650dSSadaf Ebrahimi stacksize = 0;
11159*22dc650dSSadaf Ebrahimi else
11160*22dc650dSSadaf Ebrahimi {
11161*22dc650dSSadaf Ebrahimi stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11162*22dc650dSSadaf Ebrahimi slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11163*22dc650dSSadaf Ebrahimi i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11164*22dc650dSSadaf Ebrahimi while (stacksize > 0)
11165*22dc650dSSadaf Ebrahimi {
11166*22dc650dSSadaf Ebrahimi if ((int)GET2(slot, 0) == i)
11167*22dc650dSSadaf Ebrahimi break;
11168*22dc650dSSadaf Ebrahimi slot += common->name_entry_size;
11169*22dc650dSSadaf Ebrahimi stacksize--;
11170*22dc650dSSadaf Ebrahimi }
11171*22dc650dSSadaf Ebrahimi }
11172*22dc650dSSadaf Ebrahimi
11173*22dc650dSSadaf Ebrahimi if (stacksize != 0)
11174*22dc650dSSadaf Ebrahimi matchingpath += 1 + 2 * IMM2_SIZE;
11175*22dc650dSSadaf Ebrahimi }
11176*22dc650dSSadaf Ebrahimi
11177*22dc650dSSadaf Ebrahimi /* The stacksize == 0 is a common "else" case. */
11178*22dc650dSSadaf Ebrahimi if (stacksize == 0)
11179*22dc650dSSadaf Ebrahimi {
11180*22dc650dSSadaf Ebrahimi if (*cc == OP_ALT)
11181*22dc650dSSadaf Ebrahimi {
11182*22dc650dSSadaf Ebrahimi matchingpath = cc + 1 + LINK_SIZE;
11183*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
11184*22dc650dSSadaf Ebrahimi }
11185*22dc650dSSadaf Ebrahimi else
11186*22dc650dSSadaf Ebrahimi matchingpath = cc;
11187*22dc650dSSadaf Ebrahimi }
11188*22dc650dSSadaf Ebrahimi }
11189*22dc650dSSadaf Ebrahimi else
11190*22dc650dSSadaf Ebrahimi {
11191*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11192*22dc650dSSadaf Ebrahimi /* Similar code as PUSH_BACKTRACK macro. */
11193*22dc650dSSadaf Ebrahimi assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11194*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11195*22dc650dSSadaf Ebrahimi return NULL;
11196*22dc650dSSadaf Ebrahimi memset(assert, 0, sizeof(assert_backtrack));
11197*22dc650dSSadaf Ebrahimi assert->common.cc = matchingpath;
11198*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11199*22dc650dSSadaf Ebrahimi matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11200*22dc650dSSadaf Ebrahimi }
11201*22dc650dSSadaf Ebrahimi }
11202*22dc650dSSadaf Ebrahimi
11203*22dc650dSSadaf Ebrahimi compile_matchingpath(common, matchingpath, cc, backtrack);
11204*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11205*22dc650dSSadaf Ebrahimi return NULL;
11206*22dc650dSSadaf Ebrahimi
11207*22dc650dSSadaf Ebrahimi switch (opcode)
11208*22dc650dSSadaf Ebrahimi {
11209*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
11210*22dc650dSSadaf Ebrahimi if (has_vreverse)
11211*22dc650dSSadaf Ebrahimi {
11212*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11213*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11214*22dc650dSSadaf Ebrahimi }
11215*22dc650dSSadaf Ebrahimi
11216*22dc650dSSadaf Ebrahimi if (PRIVATE_DATA(ccbegin + 1))
11217*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11218*22dc650dSSadaf Ebrahimi break;
11219*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
11220*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11221*22dc650dSSadaf Ebrahimi break;
11222*22dc650dSSadaf Ebrahimi case OP_ONCE:
11223*22dc650dSSadaf Ebrahimi match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11224*22dc650dSSadaf Ebrahimi break;
11225*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
11226*22dc650dSSadaf Ebrahimi match_script_run_common(common, private_data_ptr, backtrack);
11227*22dc650dSSadaf Ebrahimi break;
11228*22dc650dSSadaf Ebrahimi }
11229*22dc650dSSadaf Ebrahimi
11230*22dc650dSSadaf Ebrahimi stacksize = 0;
11231*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
11232*22dc650dSSadaf Ebrahimi {
11233*22dc650dSSadaf Ebrahimi /* We need to preserve the counter. TMP2 will be used below. */
11234*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11235*22dc650dSSadaf Ebrahimi stacksize++;
11236*22dc650dSSadaf Ebrahimi }
11237*22dc650dSSadaf Ebrahimi if (ket != OP_KET || bra != OP_BRA)
11238*22dc650dSSadaf Ebrahimi stacksize++;
11239*22dc650dSSadaf Ebrahimi if (offset != 0)
11240*22dc650dSSadaf Ebrahimi {
11241*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11242*22dc650dSSadaf Ebrahimi stacksize++;
11243*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset >> 1] == 0)
11244*22dc650dSSadaf Ebrahimi stacksize += 2;
11245*22dc650dSSadaf Ebrahimi }
11246*22dc650dSSadaf Ebrahimi if (has_alternatives && opcode != OP_ONCE)
11247*22dc650dSSadaf Ebrahimi stacksize++;
11248*22dc650dSSadaf Ebrahimi
11249*22dc650dSSadaf Ebrahimi if (stacksize > 0)
11250*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
11251*22dc650dSSadaf Ebrahimi
11252*22dc650dSSadaf Ebrahimi stacksize = 0;
11253*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
11254*22dc650dSSadaf Ebrahimi {
11255*22dc650dSSadaf Ebrahimi /* TMP2 was set above. */
11256*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11257*22dc650dSSadaf Ebrahimi stacksize++;
11258*22dc650dSSadaf Ebrahimi }
11259*22dc650dSSadaf Ebrahimi
11260*22dc650dSSadaf Ebrahimi if (ket != OP_KET || bra != OP_BRA)
11261*22dc650dSSadaf Ebrahimi {
11262*22dc650dSSadaf Ebrahimi if (ket != OP_KET)
11263*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11264*22dc650dSSadaf Ebrahimi else
11265*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11266*22dc650dSSadaf Ebrahimi stacksize++;
11267*22dc650dSSadaf Ebrahimi }
11268*22dc650dSSadaf Ebrahimi
11269*22dc650dSSadaf Ebrahimi if (offset != 0)
11270*22dc650dSSadaf Ebrahimi stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11271*22dc650dSSadaf Ebrahimi
11272*22dc650dSSadaf Ebrahimi /* Skip and count the other alternatives. */
11273*22dc650dSSadaf Ebrahimi i = 1;
11274*22dc650dSSadaf Ebrahimi while (*cc == OP_ALT)
11275*22dc650dSSadaf Ebrahimi {
11276*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
11277*22dc650dSSadaf Ebrahimi i++;
11278*22dc650dSSadaf Ebrahimi }
11279*22dc650dSSadaf Ebrahimi
11280*22dc650dSSadaf Ebrahimi if (has_alternatives)
11281*22dc650dSSadaf Ebrahimi {
11282*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
11283*22dc650dSSadaf Ebrahimi {
11284*22dc650dSSadaf Ebrahimi if (i <= 3)
11285*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286*22dc650dSSadaf Ebrahimi else
11287*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11288*22dc650dSSadaf Ebrahimi }
11289*22dc650dSSadaf Ebrahimi if (ket != OP_KETRMAX)
11290*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11291*22dc650dSSadaf Ebrahimi }
11292*22dc650dSSadaf Ebrahimi
11293*22dc650dSSadaf Ebrahimi /* Must be after the matchingpath label. */
11294*22dc650dSSadaf Ebrahimi if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11295*22dc650dSSadaf Ebrahimi {
11296*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11297*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11298*22dc650dSSadaf Ebrahimi }
11299*22dc650dSSadaf Ebrahimi
11300*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX)
11301*22dc650dSSadaf Ebrahimi {
11302*22dc650dSSadaf Ebrahimi if (repeat_type != 0)
11303*22dc650dSSadaf Ebrahimi {
11304*22dc650dSSadaf Ebrahimi if (has_alternatives)
11305*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11306*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11307*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11308*22dc650dSSadaf Ebrahimi /* Drop STR_PTR for greedy plus quantifier. */
11309*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
11310*22dc650dSSadaf Ebrahimi free_stack(common, 1);
11311*22dc650dSSadaf Ebrahimi }
11312*22dc650dSSadaf Ebrahimi else if (opcode < OP_BRA || opcode >= OP_SBRA)
11313*22dc650dSSadaf Ebrahimi {
11314*22dc650dSSadaf Ebrahimi if (has_alternatives)
11315*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11316*22dc650dSSadaf Ebrahimi
11317*22dc650dSSadaf Ebrahimi /* Checking zero-length iteration. */
11318*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
11319*22dc650dSSadaf Ebrahimi {
11320*22dc650dSSadaf Ebrahimi /* This case includes opcodes such as OP_SCRIPT_RUN. */
11321*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11322*22dc650dSSadaf Ebrahimi /* Drop STR_PTR for greedy plus quantifier. */
11323*22dc650dSSadaf Ebrahimi if (bra != OP_BRAZERO)
11324*22dc650dSSadaf Ebrahimi free_stack(common, 1);
11325*22dc650dSSadaf Ebrahimi }
11326*22dc650dSSadaf Ebrahimi else
11327*22dc650dSSadaf Ebrahimi /* TMP2 must contain the starting STR_PTR. */
11328*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11329*22dc650dSSadaf Ebrahimi }
11330*22dc650dSSadaf Ebrahimi else
11331*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, rmax_label);
11332*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11333*22dc650dSSadaf Ebrahimi }
11334*22dc650dSSadaf Ebrahimi
11335*22dc650dSSadaf Ebrahimi if (repeat_type == OP_EXACT)
11336*22dc650dSSadaf Ebrahimi {
11337*22dc650dSSadaf Ebrahimi count_match(common);
11338*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11339*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11340*22dc650dSSadaf Ebrahimi }
11341*22dc650dSSadaf Ebrahimi else if (repeat_type == OP_UPTO)
11342*22dc650dSSadaf Ebrahimi {
11343*22dc650dSSadaf Ebrahimi /* We need to preserve the counter. */
11344*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11345*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
11346*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11347*22dc650dSSadaf Ebrahimi }
11348*22dc650dSSadaf Ebrahimi
11349*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
11350*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11351*22dc650dSSadaf Ebrahimi
11352*22dc650dSSadaf Ebrahimi if (bra == OP_BRAMINZERO)
11353*22dc650dSSadaf Ebrahimi {
11354*22dc650dSSadaf Ebrahimi /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11355*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11356*22dc650dSSadaf Ebrahimi if (braminzero != NULL)
11357*22dc650dSSadaf Ebrahimi {
11358*22dc650dSSadaf Ebrahimi JUMPHERE(braminzero);
11359*22dc650dSSadaf Ebrahimi /* We need to release the end pointer to perform the
11360*22dc650dSSadaf Ebrahimi backtrack for the zero-length iteration. When
11361*22dc650dSSadaf Ebrahimi framesize is < 0, OP_ONCE will do the release itself. */
11362*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11363*22dc650dSSadaf Ebrahimi {
11364*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11365*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11366*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11367*22dc650dSSadaf Ebrahimi }
11368*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11369*22dc650dSSadaf Ebrahimi free_stack(common, 1);
11370*22dc650dSSadaf Ebrahimi }
11371*22dc650dSSadaf Ebrahimi /* Continue to the normal backtrack. */
11372*22dc650dSSadaf Ebrahimi }
11373*22dc650dSSadaf Ebrahimi
11374*22dc650dSSadaf Ebrahimi if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
11375*22dc650dSSadaf Ebrahimi count_match(common);
11376*22dc650dSSadaf Ebrahimi
11377*22dc650dSSadaf Ebrahimi cc += 1 + LINK_SIZE;
11378*22dc650dSSadaf Ebrahimi
11379*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
11380*22dc650dSSadaf Ebrahimi {
11381*22dc650dSSadaf Ebrahimi int data;
11382*22dc650dSSadaf Ebrahimi int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
11383*22dc650dSSadaf Ebrahimi
11384*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
11385*22dc650dSSadaf Ebrahimi /* We temporarily encode the needs_control_head in the lowest bit.
11386*22dc650dSSadaf Ebrahimi The real value should be short enough for this operation to work
11387*22dc650dSSadaf Ebrahimi without triggering Undefined Behaviour. */
11388*22dc650dSSadaf Ebrahimi data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
11389*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
11390*22dc650dSSadaf Ebrahimi }
11391*22dc650dSSadaf Ebrahimi return cc + repeat_length;
11392*22dc650dSSadaf Ebrahimi }
11393*22dc650dSSadaf Ebrahimi
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11394*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11395*22dc650dSSadaf Ebrahimi {
11396*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
11397*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
11398*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode;
11399*22dc650dSSadaf Ebrahimi int private_data_ptr;
11400*22dc650dSSadaf Ebrahimi int cbraprivptr = 0;
11401*22dc650dSSadaf Ebrahimi BOOL needs_control_head;
11402*22dc650dSSadaf Ebrahimi int framesize;
11403*22dc650dSSadaf Ebrahimi int stacksize;
11404*22dc650dSSadaf Ebrahimi int offset = 0;
11405*22dc650dSSadaf Ebrahimi BOOL zero = FALSE;
11406*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin = NULL;
11407*22dc650dSSadaf Ebrahimi int stack; /* Also contains the offset of control head. */
11408*22dc650dSSadaf Ebrahimi struct sljit_label *loop = NULL;
11409*22dc650dSSadaf Ebrahimi struct jump_list *emptymatch = NULL;
11410*22dc650dSSadaf Ebrahimi
11411*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11412*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAPOSZERO)
11413*22dc650dSSadaf Ebrahimi {
11414*22dc650dSSadaf Ebrahimi zero = TRUE;
11415*22dc650dSSadaf Ebrahimi cc++;
11416*22dc650dSSadaf Ebrahimi }
11417*22dc650dSSadaf Ebrahimi
11418*22dc650dSSadaf Ebrahimi opcode = *cc;
11419*22dc650dSSadaf Ebrahimi private_data_ptr = PRIVATE_DATA(cc);
11420*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr != 0);
11421*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11422*22dc650dSSadaf Ebrahimi switch(opcode)
11423*22dc650dSSadaf Ebrahimi {
11424*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
11425*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
11426*22dc650dSSadaf Ebrahimi ccbegin = cc + 1 + LINK_SIZE;
11427*22dc650dSSadaf Ebrahimi break;
11428*22dc650dSSadaf Ebrahimi
11429*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
11430*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
11431*22dc650dSSadaf Ebrahimi offset = GET2(cc, 1 + LINK_SIZE);
11432*22dc650dSSadaf Ebrahimi /* This case cannot be optimized in the same way as
11433*22dc650dSSadaf Ebrahimi normal capturing brackets. */
11434*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11435*22dc650dSSadaf Ebrahimi cbraprivptr = OVECTOR_PRIV(offset);
11436*22dc650dSSadaf Ebrahimi offset <<= 1;
11437*22dc650dSSadaf Ebrahimi ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11438*22dc650dSSadaf Ebrahimi break;
11439*22dc650dSSadaf Ebrahimi
11440*22dc650dSSadaf Ebrahimi default:
11441*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
11442*22dc650dSSadaf Ebrahimi break;
11443*22dc650dSSadaf Ebrahimi }
11444*22dc650dSSadaf Ebrahimi
11445*22dc650dSSadaf Ebrahimi framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11446*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11447*22dc650dSSadaf Ebrahimi if (framesize < 0)
11448*22dc650dSSadaf Ebrahimi {
11449*22dc650dSSadaf Ebrahimi if (offset != 0)
11450*22dc650dSSadaf Ebrahimi {
11451*22dc650dSSadaf Ebrahimi stacksize = 2;
11452*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11453*22dc650dSSadaf Ebrahimi stacksize++;
11454*22dc650dSSadaf Ebrahimi }
11455*22dc650dSSadaf Ebrahimi else
11456*22dc650dSSadaf Ebrahimi stacksize = 1;
11457*22dc650dSSadaf Ebrahimi
11458*22dc650dSSadaf Ebrahimi if (needs_control_head)
11459*22dc650dSSadaf Ebrahimi stacksize++;
11460*22dc650dSSadaf Ebrahimi if (!zero)
11461*22dc650dSSadaf Ebrahimi stacksize++;
11462*22dc650dSSadaf Ebrahimi
11463*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11464*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
11465*22dc650dSSadaf Ebrahimi if (framesize == no_frame)
11466*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11467*22dc650dSSadaf Ebrahimi
11468*22dc650dSSadaf Ebrahimi stack = 0;
11469*22dc650dSSadaf Ebrahimi if (offset != 0)
11470*22dc650dSSadaf Ebrahimi {
11471*22dc650dSSadaf Ebrahimi stack = 2;
11472*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11473*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11474*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11475*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11476*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11477*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11478*22dc650dSSadaf Ebrahimi if (needs_control_head)
11479*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11480*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11481*22dc650dSSadaf Ebrahimi {
11482*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11483*22dc650dSSadaf Ebrahimi stack = 3;
11484*22dc650dSSadaf Ebrahimi }
11485*22dc650dSSadaf Ebrahimi }
11486*22dc650dSSadaf Ebrahimi else
11487*22dc650dSSadaf Ebrahimi {
11488*22dc650dSSadaf Ebrahimi if (needs_control_head)
11489*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11490*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11491*22dc650dSSadaf Ebrahimi stack = 1;
11492*22dc650dSSadaf Ebrahimi }
11493*22dc650dSSadaf Ebrahimi
11494*22dc650dSSadaf Ebrahimi if (needs_control_head)
11495*22dc650dSSadaf Ebrahimi stack++;
11496*22dc650dSSadaf Ebrahimi if (!zero)
11497*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11498*22dc650dSSadaf Ebrahimi if (needs_control_head)
11499*22dc650dSSadaf Ebrahimi {
11500*22dc650dSSadaf Ebrahimi stack--;
11501*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11502*22dc650dSSadaf Ebrahimi }
11503*22dc650dSSadaf Ebrahimi }
11504*22dc650dSSadaf Ebrahimi else
11505*22dc650dSSadaf Ebrahimi {
11506*22dc650dSSadaf Ebrahimi stacksize = framesize + 1;
11507*22dc650dSSadaf Ebrahimi if (!zero)
11508*22dc650dSSadaf Ebrahimi stacksize++;
11509*22dc650dSSadaf Ebrahimi if (needs_control_head)
11510*22dc650dSSadaf Ebrahimi stacksize++;
11511*22dc650dSSadaf Ebrahimi if (offset == 0)
11512*22dc650dSSadaf Ebrahimi stacksize++;
11513*22dc650dSSadaf Ebrahimi BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11514*22dc650dSSadaf Ebrahimi
11515*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
11516*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11517*22dc650dSSadaf Ebrahimi if (needs_control_head)
11518*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11519*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11520*22dc650dSSadaf Ebrahimi
11521*22dc650dSSadaf Ebrahimi stack = 0;
11522*22dc650dSSadaf Ebrahimi if (!zero)
11523*22dc650dSSadaf Ebrahimi {
11524*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11525*22dc650dSSadaf Ebrahimi stack = 1;
11526*22dc650dSSadaf Ebrahimi }
11527*22dc650dSSadaf Ebrahimi if (needs_control_head)
11528*22dc650dSSadaf Ebrahimi {
11529*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11530*22dc650dSSadaf Ebrahimi stack++;
11531*22dc650dSSadaf Ebrahimi }
11532*22dc650dSSadaf Ebrahimi if (offset == 0)
11533*22dc650dSSadaf Ebrahimi {
11534*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11535*22dc650dSSadaf Ebrahimi stack++;
11536*22dc650dSSadaf Ebrahimi }
11537*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11538*22dc650dSSadaf Ebrahimi init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11539*22dc650dSSadaf Ebrahimi stack -= 1 + (offset == 0);
11540*22dc650dSSadaf Ebrahimi }
11541*22dc650dSSadaf Ebrahimi
11542*22dc650dSSadaf Ebrahimi if (offset != 0)
11543*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11544*22dc650dSSadaf Ebrahimi
11545*22dc650dSSadaf Ebrahimi loop = LABEL();
11546*22dc650dSSadaf Ebrahimi while (*cc != OP_KETRPOS)
11547*22dc650dSSadaf Ebrahimi {
11548*22dc650dSSadaf Ebrahimi backtrack->top = NULL;
11549*22dc650dSSadaf Ebrahimi backtrack->own_backtracks = NULL;
11550*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
11551*22dc650dSSadaf Ebrahimi
11552*22dc650dSSadaf Ebrahimi compile_matchingpath(common, ccbegin, cc, backtrack);
11553*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11554*22dc650dSSadaf Ebrahimi return NULL;
11555*22dc650dSSadaf Ebrahimi
11556*22dc650dSSadaf Ebrahimi if (framesize < 0)
11557*22dc650dSSadaf Ebrahimi {
11558*22dc650dSSadaf Ebrahimi if (framesize == no_frame)
11559*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11560*22dc650dSSadaf Ebrahimi
11561*22dc650dSSadaf Ebrahimi if (offset != 0)
11562*22dc650dSSadaf Ebrahimi {
11563*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11564*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11565*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11566*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11567*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11568*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11569*22dc650dSSadaf Ebrahimi }
11570*22dc650dSSadaf Ebrahimi else
11571*22dc650dSSadaf Ebrahimi {
11572*22dc650dSSadaf Ebrahimi if (opcode == OP_SBRAPOS)
11573*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11574*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11575*22dc650dSSadaf Ebrahimi }
11576*22dc650dSSadaf Ebrahimi
11577*22dc650dSSadaf Ebrahimi /* Even if the match is empty, we need to reset the control head. */
11578*22dc650dSSadaf Ebrahimi if (needs_control_head)
11579*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11580*22dc650dSSadaf Ebrahimi
11581*22dc650dSSadaf Ebrahimi if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11582*22dc650dSSadaf Ebrahimi add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11583*22dc650dSSadaf Ebrahimi
11584*22dc650dSSadaf Ebrahimi if (!zero)
11585*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11586*22dc650dSSadaf Ebrahimi }
11587*22dc650dSSadaf Ebrahimi else
11588*22dc650dSSadaf Ebrahimi {
11589*22dc650dSSadaf Ebrahimi if (offset != 0)
11590*22dc650dSSadaf Ebrahimi {
11591*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11592*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11593*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11594*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11595*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
11596*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11597*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11598*22dc650dSSadaf Ebrahimi }
11599*22dc650dSSadaf Ebrahimi else
11600*22dc650dSSadaf Ebrahimi {
11601*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11602*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11603*22dc650dSSadaf Ebrahimi if (opcode == OP_SBRAPOS)
11604*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11605*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11606*22dc650dSSadaf Ebrahimi }
11607*22dc650dSSadaf Ebrahimi
11608*22dc650dSSadaf Ebrahimi /* Even if the match is empty, we need to reset the control head. */
11609*22dc650dSSadaf Ebrahimi if (needs_control_head)
11610*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11611*22dc650dSSadaf Ebrahimi
11612*22dc650dSSadaf Ebrahimi if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11613*22dc650dSSadaf Ebrahimi add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11614*22dc650dSSadaf Ebrahimi
11615*22dc650dSSadaf Ebrahimi if (!zero)
11616*22dc650dSSadaf Ebrahimi {
11617*22dc650dSSadaf Ebrahimi if (framesize < 0)
11618*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11619*22dc650dSSadaf Ebrahimi else
11620*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11621*22dc650dSSadaf Ebrahimi }
11622*22dc650dSSadaf Ebrahimi }
11623*22dc650dSSadaf Ebrahimi
11624*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, loop);
11625*22dc650dSSadaf Ebrahimi flush_stubs(common);
11626*22dc650dSSadaf Ebrahimi
11627*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, backtrack->top);
11628*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11629*22dc650dSSadaf Ebrahimi return NULL;
11630*22dc650dSSadaf Ebrahimi set_jumps(backtrack->own_backtracks, LABEL());
11631*22dc650dSSadaf Ebrahimi
11632*22dc650dSSadaf Ebrahimi if (framesize < 0)
11633*22dc650dSSadaf Ebrahimi {
11634*22dc650dSSadaf Ebrahimi if (offset != 0)
11635*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11636*22dc650dSSadaf Ebrahimi else
11637*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11638*22dc650dSSadaf Ebrahimi }
11639*22dc650dSSadaf Ebrahimi else
11640*22dc650dSSadaf Ebrahimi {
11641*22dc650dSSadaf Ebrahimi if (offset != 0)
11642*22dc650dSSadaf Ebrahimi {
11643*22dc650dSSadaf Ebrahimi /* Last alternative. */
11644*22dc650dSSadaf Ebrahimi if (*cc == OP_KETRPOS)
11645*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11646*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11647*22dc650dSSadaf Ebrahimi }
11648*22dc650dSSadaf Ebrahimi else
11649*22dc650dSSadaf Ebrahimi {
11650*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11651*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11652*22dc650dSSadaf Ebrahimi }
11653*22dc650dSSadaf Ebrahimi }
11654*22dc650dSSadaf Ebrahimi
11655*22dc650dSSadaf Ebrahimi if (*cc == OP_KETRPOS)
11656*22dc650dSSadaf Ebrahimi break;
11657*22dc650dSSadaf Ebrahimi ccbegin = cc + 1 + LINK_SIZE;
11658*22dc650dSSadaf Ebrahimi }
11659*22dc650dSSadaf Ebrahimi
11660*22dc650dSSadaf Ebrahimi /* We don't have to restore the control head in case of a failed match. */
11661*22dc650dSSadaf Ebrahimi
11662*22dc650dSSadaf Ebrahimi backtrack->own_backtracks = NULL;
11663*22dc650dSSadaf Ebrahimi if (!zero)
11664*22dc650dSSadaf Ebrahimi {
11665*22dc650dSSadaf Ebrahimi if (framesize < 0)
11666*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11667*22dc650dSSadaf Ebrahimi else /* TMP2 is set to [private_data_ptr] above. */
11668*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11669*22dc650dSSadaf Ebrahimi }
11670*22dc650dSSadaf Ebrahimi
11671*22dc650dSSadaf Ebrahimi /* None of them matched. */
11672*22dc650dSSadaf Ebrahimi set_jumps(emptymatch, LABEL());
11673*22dc650dSSadaf Ebrahimi count_match(common);
11674*22dc650dSSadaf Ebrahimi return cc + 1 + LINK_SIZE;
11675*22dc650dSSadaf Ebrahimi }
11676*22dc650dSSadaf Ebrahimi
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11677*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11678*22dc650dSSadaf Ebrahimi {
11679*22dc650dSSadaf Ebrahimi int class_len;
11680*22dc650dSSadaf Ebrahimi
11681*22dc650dSSadaf Ebrahimi *opcode = *cc;
11682*22dc650dSSadaf Ebrahimi *exact = 0;
11683*22dc650dSSadaf Ebrahimi
11684*22dc650dSSadaf Ebrahimi if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11685*22dc650dSSadaf Ebrahimi {
11686*22dc650dSSadaf Ebrahimi cc++;
11687*22dc650dSSadaf Ebrahimi *type = OP_CHAR;
11688*22dc650dSSadaf Ebrahimi }
11689*22dc650dSSadaf Ebrahimi else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11690*22dc650dSSadaf Ebrahimi {
11691*22dc650dSSadaf Ebrahimi cc++;
11692*22dc650dSSadaf Ebrahimi *type = OP_CHARI;
11693*22dc650dSSadaf Ebrahimi *opcode -= OP_STARI - OP_STAR;
11694*22dc650dSSadaf Ebrahimi }
11695*22dc650dSSadaf Ebrahimi else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11696*22dc650dSSadaf Ebrahimi {
11697*22dc650dSSadaf Ebrahimi cc++;
11698*22dc650dSSadaf Ebrahimi *type = OP_NOT;
11699*22dc650dSSadaf Ebrahimi *opcode -= OP_NOTSTAR - OP_STAR;
11700*22dc650dSSadaf Ebrahimi }
11701*22dc650dSSadaf Ebrahimi else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11702*22dc650dSSadaf Ebrahimi {
11703*22dc650dSSadaf Ebrahimi cc++;
11704*22dc650dSSadaf Ebrahimi *type = OP_NOTI;
11705*22dc650dSSadaf Ebrahimi *opcode -= OP_NOTSTARI - OP_STAR;
11706*22dc650dSSadaf Ebrahimi }
11707*22dc650dSSadaf Ebrahimi else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11708*22dc650dSSadaf Ebrahimi {
11709*22dc650dSSadaf Ebrahimi cc++;
11710*22dc650dSSadaf Ebrahimi *opcode -= OP_TYPESTAR - OP_STAR;
11711*22dc650dSSadaf Ebrahimi *type = OP_END;
11712*22dc650dSSadaf Ebrahimi }
11713*22dc650dSSadaf Ebrahimi else
11714*22dc650dSSadaf Ebrahimi {
11715*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11716*22dc650dSSadaf Ebrahimi *type = *opcode;
11717*22dc650dSSadaf Ebrahimi cc++;
11718*22dc650dSSadaf Ebrahimi class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11719*22dc650dSSadaf Ebrahimi *opcode = cc[class_len - 1];
11720*22dc650dSSadaf Ebrahimi
11721*22dc650dSSadaf Ebrahimi if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11722*22dc650dSSadaf Ebrahimi {
11723*22dc650dSSadaf Ebrahimi *opcode -= OP_CRSTAR - OP_STAR;
11724*22dc650dSSadaf Ebrahimi *end = cc + class_len;
11725*22dc650dSSadaf Ebrahimi
11726*22dc650dSSadaf Ebrahimi if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11727*22dc650dSSadaf Ebrahimi {
11728*22dc650dSSadaf Ebrahimi *exact = 1;
11729*22dc650dSSadaf Ebrahimi *opcode -= OP_PLUS - OP_STAR;
11730*22dc650dSSadaf Ebrahimi }
11731*22dc650dSSadaf Ebrahimi }
11732*22dc650dSSadaf Ebrahimi else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11733*22dc650dSSadaf Ebrahimi {
11734*22dc650dSSadaf Ebrahimi *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11735*22dc650dSSadaf Ebrahimi *end = cc + class_len;
11736*22dc650dSSadaf Ebrahimi
11737*22dc650dSSadaf Ebrahimi if (*opcode == OP_POSPLUS)
11738*22dc650dSSadaf Ebrahimi {
11739*22dc650dSSadaf Ebrahimi *exact = 1;
11740*22dc650dSSadaf Ebrahimi *opcode = OP_POSSTAR;
11741*22dc650dSSadaf Ebrahimi }
11742*22dc650dSSadaf Ebrahimi }
11743*22dc650dSSadaf Ebrahimi else
11744*22dc650dSSadaf Ebrahimi {
11745*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11746*22dc650dSSadaf Ebrahimi *max = GET2(cc, (class_len + IMM2_SIZE));
11747*22dc650dSSadaf Ebrahimi *exact = GET2(cc, class_len);
11748*22dc650dSSadaf Ebrahimi
11749*22dc650dSSadaf Ebrahimi if (*max == 0)
11750*22dc650dSSadaf Ebrahimi {
11751*22dc650dSSadaf Ebrahimi if (*opcode == OP_CRPOSRANGE)
11752*22dc650dSSadaf Ebrahimi *opcode = OP_POSSTAR;
11753*22dc650dSSadaf Ebrahimi else
11754*22dc650dSSadaf Ebrahimi *opcode -= OP_CRRANGE - OP_STAR;
11755*22dc650dSSadaf Ebrahimi }
11756*22dc650dSSadaf Ebrahimi else
11757*22dc650dSSadaf Ebrahimi {
11758*22dc650dSSadaf Ebrahimi *max -= *exact;
11759*22dc650dSSadaf Ebrahimi if (*max == 0)
11760*22dc650dSSadaf Ebrahimi *opcode = OP_EXACT;
11761*22dc650dSSadaf Ebrahimi else if (*max == 1)
11762*22dc650dSSadaf Ebrahimi {
11763*22dc650dSSadaf Ebrahimi if (*opcode == OP_CRPOSRANGE)
11764*22dc650dSSadaf Ebrahimi *opcode = OP_POSQUERY;
11765*22dc650dSSadaf Ebrahimi else
11766*22dc650dSSadaf Ebrahimi *opcode -= OP_CRRANGE - OP_QUERY;
11767*22dc650dSSadaf Ebrahimi }
11768*22dc650dSSadaf Ebrahimi else
11769*22dc650dSSadaf Ebrahimi {
11770*22dc650dSSadaf Ebrahimi if (*opcode == OP_CRPOSRANGE)
11771*22dc650dSSadaf Ebrahimi *opcode = OP_POSUPTO;
11772*22dc650dSSadaf Ebrahimi else
11773*22dc650dSSadaf Ebrahimi *opcode -= OP_CRRANGE - OP_UPTO;
11774*22dc650dSSadaf Ebrahimi }
11775*22dc650dSSadaf Ebrahimi }
11776*22dc650dSSadaf Ebrahimi *end = cc + class_len + 2 * IMM2_SIZE;
11777*22dc650dSSadaf Ebrahimi }
11778*22dc650dSSadaf Ebrahimi return cc;
11779*22dc650dSSadaf Ebrahimi }
11780*22dc650dSSadaf Ebrahimi
11781*22dc650dSSadaf Ebrahimi switch(*opcode)
11782*22dc650dSSadaf Ebrahimi {
11783*22dc650dSSadaf Ebrahimi case OP_EXACT:
11784*22dc650dSSadaf Ebrahimi *exact = GET2(cc, 0);
11785*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
11786*22dc650dSSadaf Ebrahimi break;
11787*22dc650dSSadaf Ebrahimi
11788*22dc650dSSadaf Ebrahimi case OP_PLUS:
11789*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
11790*22dc650dSSadaf Ebrahimi *exact = 1;
11791*22dc650dSSadaf Ebrahimi *opcode -= OP_PLUS - OP_STAR;
11792*22dc650dSSadaf Ebrahimi break;
11793*22dc650dSSadaf Ebrahimi
11794*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
11795*22dc650dSSadaf Ebrahimi *exact = 1;
11796*22dc650dSSadaf Ebrahimi *opcode = OP_POSSTAR;
11797*22dc650dSSadaf Ebrahimi break;
11798*22dc650dSSadaf Ebrahimi
11799*22dc650dSSadaf Ebrahimi case OP_UPTO:
11800*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
11801*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
11802*22dc650dSSadaf Ebrahimi *max = GET2(cc, 0);
11803*22dc650dSSadaf Ebrahimi cc += IMM2_SIZE;
11804*22dc650dSSadaf Ebrahimi break;
11805*22dc650dSSadaf Ebrahimi }
11806*22dc650dSSadaf Ebrahimi
11807*22dc650dSSadaf Ebrahimi if (*type == OP_END)
11808*22dc650dSSadaf Ebrahimi {
11809*22dc650dSSadaf Ebrahimi *type = *cc;
11810*22dc650dSSadaf Ebrahimi *end = next_opcode(common, cc);
11811*22dc650dSSadaf Ebrahimi cc++;
11812*22dc650dSSadaf Ebrahimi return cc;
11813*22dc650dSSadaf Ebrahimi }
11814*22dc650dSSadaf Ebrahimi
11815*22dc650dSSadaf Ebrahimi *end = cc + 1;
11816*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
11817*22dc650dSSadaf Ebrahimi if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11818*22dc650dSSadaf Ebrahimi #endif
11819*22dc650dSSadaf Ebrahimi return cc;
11820*22dc650dSSadaf Ebrahimi }
11821*22dc650dSSadaf Ebrahimi
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11822*22dc650dSSadaf Ebrahimi static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11823*22dc650dSSadaf Ebrahimi {
11824*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
11825*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
11826*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode;
11827*22dc650dSSadaf Ebrahimi PCRE2_UCHAR type;
11828*22dc650dSSadaf Ebrahimi sljit_u32 max = 0, exact;
11829*22dc650dSSadaf Ebrahimi sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11830*22dc650dSSadaf Ebrahimi sljit_s32 early_fail_type;
11831*22dc650dSSadaf Ebrahimi BOOL charpos_enabled;
11832*22dc650dSSadaf Ebrahimi PCRE2_UCHAR charpos_char;
11833*22dc650dSSadaf Ebrahimi unsigned int charpos_othercasebit;
11834*22dc650dSSadaf Ebrahimi PCRE2_SPTR end;
11835*22dc650dSSadaf Ebrahimi jump_list *no_match = NULL;
11836*22dc650dSSadaf Ebrahimi jump_list *no_char1_match = NULL;
11837*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
11838*22dc650dSSadaf Ebrahimi struct sljit_label *label;
11839*22dc650dSSadaf Ebrahimi int private_data_ptr = PRIVATE_DATA(cc);
11840*22dc650dSSadaf Ebrahimi int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11841*22dc650dSSadaf Ebrahimi int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11842*22dc650dSSadaf Ebrahimi int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11843*22dc650dSSadaf Ebrahimi int tmp_base, tmp_offset;
11844*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11845*22dc650dSSadaf Ebrahimi BOOL use_tmp;
11846*22dc650dSSadaf Ebrahimi #endif
11847*22dc650dSSadaf Ebrahimi
11848*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11849*22dc650dSSadaf Ebrahimi
11850*22dc650dSSadaf Ebrahimi early_fail_type = (early_fail_ptr & 0x7);
11851*22dc650dSSadaf Ebrahimi early_fail_ptr >>= 3;
11852*22dc650dSSadaf Ebrahimi
11853*22dc650dSSadaf Ebrahimi /* During recursion, these optimizations are disabled. */
11854*22dc650dSSadaf Ebrahimi if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11855*22dc650dSSadaf Ebrahimi {
11856*22dc650dSSadaf Ebrahimi early_fail_ptr = 0;
11857*22dc650dSSadaf Ebrahimi early_fail_type = type_skip;
11858*22dc650dSSadaf Ebrahimi }
11859*22dc650dSSadaf Ebrahimi
11860*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11861*22dc650dSSadaf Ebrahimi || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11862*22dc650dSSadaf Ebrahimi
11863*22dc650dSSadaf Ebrahimi if (early_fail_type == type_fail)
11864*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11865*22dc650dSSadaf Ebrahimi
11866*22dc650dSSadaf Ebrahimi cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11867*22dc650dSSadaf Ebrahimi
11868*22dc650dSSadaf Ebrahimi if (type != OP_EXTUNI)
11869*22dc650dSSadaf Ebrahimi {
11870*22dc650dSSadaf Ebrahimi tmp_base = TMP3;
11871*22dc650dSSadaf Ebrahimi tmp_offset = 0;
11872*22dc650dSSadaf Ebrahimi }
11873*22dc650dSSadaf Ebrahimi else
11874*22dc650dSSadaf Ebrahimi {
11875*22dc650dSSadaf Ebrahimi tmp_base = SLJIT_MEM1(SLJIT_SP);
11876*22dc650dSSadaf Ebrahimi tmp_offset = POSSESSIVE0;
11877*22dc650dSSadaf Ebrahimi }
11878*22dc650dSSadaf Ebrahimi
11879*22dc650dSSadaf Ebrahimi /* Handle fixed part first. */
11880*22dc650dSSadaf Ebrahimi if (exact > 1)
11881*22dc650dSSadaf Ebrahimi {
11882*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
11883*22dc650dSSadaf Ebrahimi
11884*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE
11885*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
11886*22dc650dSSadaf Ebrahimi && !common->utf
11887*22dc650dSSadaf Ebrahimi #endif
11888*22dc650dSSadaf Ebrahimi && type != OP_ANYNL && type != OP_EXTUNI)
11889*22dc650dSSadaf Ebrahimi {
11890*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11891*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11892*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11893*22dc650dSSadaf Ebrahimi label = LABEL();
11894*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11895*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11896*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
11897*22dc650dSSadaf Ebrahimi }
11898*22dc650dSSadaf Ebrahimi else
11899*22dc650dSSadaf Ebrahimi {
11900*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11901*22dc650dSSadaf Ebrahimi label = LABEL();
11902*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11903*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11904*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
11905*22dc650dSSadaf Ebrahimi }
11906*22dc650dSSadaf Ebrahimi }
11907*22dc650dSSadaf Ebrahimi else if (exact == 1)
11908*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11909*22dc650dSSadaf Ebrahimi
11910*22dc650dSSadaf Ebrahimi if (early_fail_type == type_fail_range)
11911*22dc650dSSadaf Ebrahimi {
11912*22dc650dSSadaf Ebrahimi /* Range end first, followed by range start. */
11913*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11914*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11915*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11916*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11917*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11918*22dc650dSSadaf Ebrahimi
11919*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11920*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11921*22dc650dSSadaf Ebrahimi }
11922*22dc650dSSadaf Ebrahimi
11923*22dc650dSSadaf Ebrahimi switch(opcode)
11924*22dc650dSSadaf Ebrahimi {
11925*22dc650dSSadaf Ebrahimi case OP_STAR:
11926*22dc650dSSadaf Ebrahimi case OP_UPTO:
11927*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11928*22dc650dSSadaf Ebrahimi
11929*22dc650dSSadaf Ebrahimi if (type == OP_ANYNL || type == OP_EXTUNI)
11930*22dc650dSSadaf Ebrahimi {
11931*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr == 0);
11932*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
11933*22dc650dSSadaf Ebrahimi
11934*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
11935*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11936*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11937*22dc650dSSadaf Ebrahimi
11938*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
11939*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11940*22dc650dSSadaf Ebrahimi
11941*22dc650dSSadaf Ebrahimi label = LABEL();
11942*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11943*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
11944*22dc650dSSadaf Ebrahimi {
11945*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11946*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11947*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_ZERO);
11948*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11949*22dc650dSSadaf Ebrahimi }
11950*22dc650dSSadaf Ebrahimi
11951*22dc650dSSadaf Ebrahimi /* We cannot use TMP3 because of allocate_stack. */
11952*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
11953*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11954*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, label);
11955*22dc650dSSadaf Ebrahimi if (jump != NULL)
11956*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
11957*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11958*22dc650dSSadaf Ebrahimi break;
11959*22dc650dSSadaf Ebrahimi }
11960*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
11961*22dc650dSSadaf Ebrahimi else if (type == OP_ALLANY && !common->invalid_utf)
11962*22dc650dSSadaf Ebrahimi #else
11963*22dc650dSSadaf Ebrahimi else if (type == OP_ALLANY)
11964*22dc650dSSadaf Ebrahimi #endif
11965*22dc650dSSadaf Ebrahimi {
11966*22dc650dSSadaf Ebrahimi if (opcode == OP_STAR)
11967*22dc650dSSadaf Ebrahimi {
11968*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
11969*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
11970*22dc650dSSadaf Ebrahimi
11971*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11972*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11973*22dc650dSSadaf Ebrahimi
11974*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11975*22dc650dSSadaf Ebrahimi process_partial_match(common);
11976*22dc650dSSadaf Ebrahimi
11977*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
11978*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11979*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11980*22dc650dSSadaf Ebrahimi break;
11981*22dc650dSSadaf Ebrahimi }
11982*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
11983*22dc650dSSadaf Ebrahimi else if (!common->utf)
11984*22dc650dSSadaf Ebrahimi #else
11985*22dc650dSSadaf Ebrahimi else
11986*22dc650dSSadaf Ebrahimi #endif
11987*22dc650dSSadaf Ebrahimi {
11988*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
11989*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
11990*22dc650dSSadaf Ebrahimi
11991*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11992*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11993*22dc650dSSadaf Ebrahimi
11994*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
11995*22dc650dSSadaf Ebrahimi {
11996*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11997*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11998*22dc650dSSadaf Ebrahimi }
11999*22dc650dSSadaf Ebrahimi else
12000*22dc650dSSadaf Ebrahimi {
12001*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12002*22dc650dSSadaf Ebrahimi process_partial_match(common);
12003*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12004*22dc650dSSadaf Ebrahimi }
12005*22dc650dSSadaf Ebrahimi
12006*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12007*22dc650dSSadaf Ebrahimi
12008*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12009*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12010*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12011*22dc650dSSadaf Ebrahimi break;
12012*22dc650dSSadaf Ebrahimi }
12013*22dc650dSSadaf Ebrahimi }
12014*22dc650dSSadaf Ebrahimi
12015*22dc650dSSadaf Ebrahimi charpos_enabled = FALSE;
12016*22dc650dSSadaf Ebrahimi charpos_char = 0;
12017*22dc650dSSadaf Ebrahimi charpos_othercasebit = 0;
12018*22dc650dSSadaf Ebrahimi
12019*22dc650dSSadaf Ebrahimi if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
12020*22dc650dSSadaf Ebrahimi {
12021*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
12022*22dc650dSSadaf Ebrahimi charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
12023*22dc650dSSadaf Ebrahimi #else
12024*22dc650dSSadaf Ebrahimi charpos_enabled = TRUE;
12025*22dc650dSSadaf Ebrahimi #endif
12026*22dc650dSSadaf Ebrahimi if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
12027*22dc650dSSadaf Ebrahimi {
12028*22dc650dSSadaf Ebrahimi charpos_othercasebit = char_get_othercase_bit(common, end + 1);
12029*22dc650dSSadaf Ebrahimi if (charpos_othercasebit == 0)
12030*22dc650dSSadaf Ebrahimi charpos_enabled = FALSE;
12031*22dc650dSSadaf Ebrahimi }
12032*22dc650dSSadaf Ebrahimi
12033*22dc650dSSadaf Ebrahimi if (charpos_enabled)
12034*22dc650dSSadaf Ebrahimi {
12035*22dc650dSSadaf Ebrahimi charpos_char = end[1];
12036*22dc650dSSadaf Ebrahimi /* Consume the OP_CHAR opcode. */
12037*22dc650dSSadaf Ebrahimi end += 2;
12038*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
12039*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
12040*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12041*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
12042*22dc650dSSadaf Ebrahimi if ((charpos_othercasebit & 0x100) != 0)
12043*22dc650dSSadaf Ebrahimi charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
12044*22dc650dSSadaf Ebrahimi #endif
12045*22dc650dSSadaf Ebrahimi if (charpos_othercasebit != 0)
12046*22dc650dSSadaf Ebrahimi charpos_char |= charpos_othercasebit;
12047*22dc650dSSadaf Ebrahimi
12048*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
12049*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
12050*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
12051*22dc650dSSadaf Ebrahimi }
12052*22dc650dSSadaf Ebrahimi }
12053*22dc650dSSadaf Ebrahimi
12054*22dc650dSSadaf Ebrahimi if (charpos_enabled)
12055*22dc650dSSadaf Ebrahimi {
12056*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
12057*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
12058*22dc650dSSadaf Ebrahimi
12059*22dc650dSSadaf Ebrahimi /* Search the first instance of charpos_char. */
12060*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
12061*22dc650dSSadaf Ebrahimi label = LABEL();
12062*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
12063*22dc650dSSadaf Ebrahimi {
12064*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12065*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12066*22dc650dSSadaf Ebrahimi }
12067*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12068*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12069*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12070*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12071*22dc650dSSadaf Ebrahimi
12072*22dc650dSSadaf Ebrahimi detect_partial_match(common, &backtrack->own_backtracks);
12073*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12074*22dc650dSSadaf Ebrahimi if (charpos_othercasebit != 0)
12075*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12076*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12077*22dc650dSSadaf Ebrahimi
12078*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12079*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
12080*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12081*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12082*22dc650dSSadaf Ebrahimi
12083*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
12084*22dc650dSSadaf Ebrahimi {
12085*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12086*22dc650dSSadaf Ebrahimi add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12087*22dc650dSSadaf Ebrahimi }
12088*22dc650dSSadaf Ebrahimi
12089*22dc650dSSadaf Ebrahimi /* Search the last instance of charpos_char. */
12090*22dc650dSSadaf Ebrahimi label = LABEL();
12091*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12092*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12093*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12094*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12095*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12096*22dc650dSSadaf Ebrahimi if (charpos_othercasebit != 0)
12097*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12098*22dc650dSSadaf Ebrahimi
12099*22dc650dSSadaf Ebrahimi if (opcode == OP_STAR)
12100*22dc650dSSadaf Ebrahimi {
12101*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12102*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12103*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, label);
12104*22dc650dSSadaf Ebrahimi }
12105*22dc650dSSadaf Ebrahimi else
12106*22dc650dSSadaf Ebrahimi {
12107*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12108*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12109*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12110*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12111*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, label);
12112*22dc650dSSadaf Ebrahimi }
12113*22dc650dSSadaf Ebrahimi
12114*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12115*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12116*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12117*22dc650dSSadaf Ebrahimi }
12118*22dc650dSSadaf Ebrahimi else
12119*22dc650dSSadaf Ebrahimi {
12120*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12121*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
12122*22dc650dSSadaf Ebrahimi
12123*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12124*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12125*22dc650dSSadaf Ebrahimi use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12126*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12127*22dc650dSSadaf Ebrahimi
12128*22dc650dSSadaf Ebrahimi if (common->utf)
12129*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12130*22dc650dSSadaf Ebrahimi #endif
12131*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
12132*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12133*22dc650dSSadaf Ebrahimi
12134*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12135*22dc650dSSadaf Ebrahimi label = LABEL();
12136*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12137*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12138*22dc650dSSadaf Ebrahimi if (common->utf)
12139*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12140*22dc650dSSadaf Ebrahimi #endif
12141*22dc650dSSadaf Ebrahimi
12142*22dc650dSSadaf Ebrahimi if (opcode == OP_UPTO)
12143*22dc650dSSadaf Ebrahimi {
12144*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12145*22dc650dSSadaf Ebrahimi add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12146*22dc650dSSadaf Ebrahimi }
12147*22dc650dSSadaf Ebrahimi
12148*22dc650dSSadaf Ebrahimi detect_partial_match_to(common, label);
12149*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12150*22dc650dSSadaf Ebrahimi
12151*22dc650dSSadaf Ebrahimi set_jumps(no_char1_match, LABEL());
12152*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12153*22dc650dSSadaf Ebrahimi if (common->utf)
12154*22dc650dSSadaf Ebrahimi {
12155*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12156*22dc650dSSadaf Ebrahimi if (use_tmp)
12157*22dc650dSSadaf Ebrahimi {
12158*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12159*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12160*22dc650dSSadaf Ebrahimi }
12161*22dc650dSSadaf Ebrahimi else
12162*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12163*22dc650dSSadaf Ebrahimi }
12164*22dc650dSSadaf Ebrahimi else
12165*22dc650dSSadaf Ebrahimi #endif
12166*22dc650dSSadaf Ebrahimi {
12167*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12168*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12169*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12170*22dc650dSSadaf Ebrahimi }
12171*22dc650dSSadaf Ebrahimi
12172*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12173*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12174*22dc650dSSadaf Ebrahimi }
12175*22dc650dSSadaf Ebrahimi
12176*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12177*22dc650dSSadaf Ebrahimi break;
12178*22dc650dSSadaf Ebrahimi
12179*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
12180*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12181*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
12182*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12183*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12184*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12185*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12186*22dc650dSSadaf Ebrahimi break;
12187*22dc650dSSadaf Ebrahimi
12188*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
12189*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
12190*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12191*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
12192*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12194*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12195*22dc650dSSadaf Ebrahimi break;
12196*22dc650dSSadaf Ebrahimi
12197*22dc650dSSadaf Ebrahimi case OP_QUERY:
12198*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
12199*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
12200*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12201*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
12202*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12203*22dc650dSSadaf Ebrahimi if (opcode == OP_QUERY)
12204*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12205*22dc650dSSadaf Ebrahimi BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12206*22dc650dSSadaf Ebrahimi break;
12207*22dc650dSSadaf Ebrahimi
12208*22dc650dSSadaf Ebrahimi case OP_EXACT:
12209*22dc650dSSadaf Ebrahimi break;
12210*22dc650dSSadaf Ebrahimi
12211*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
12212*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE
12213*22dc650dSSadaf Ebrahimi if (type == OP_ALLANY && !common->invalid_utf)
12214*22dc650dSSadaf Ebrahimi #else
12215*22dc650dSSadaf Ebrahimi if (type == OP_ALLANY)
12216*22dc650dSSadaf Ebrahimi #endif
12217*22dc650dSSadaf Ebrahimi {
12218*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12219*22dc650dSSadaf Ebrahimi process_partial_match(common);
12220*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12221*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12222*22dc650dSSadaf Ebrahimi break;
12223*22dc650dSSadaf Ebrahimi }
12224*22dc650dSSadaf Ebrahimi
12225*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12226*22dc650dSSadaf Ebrahimi if (type == OP_EXTUNI || common->utf)
12227*22dc650dSSadaf Ebrahimi {
12228*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12229*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12230*22dc650dSSadaf Ebrahimi label = LABEL();
12231*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12232*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12233*22dc650dSSadaf Ebrahimi detect_partial_match_to(common, label);
12234*22dc650dSSadaf Ebrahimi
12235*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12236*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12237*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12238*22dc650dSSadaf Ebrahimi {
12239*22dc650dSSadaf Ebrahimi if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12240*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12241*22dc650dSSadaf Ebrahimi else
12242*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12243*22dc650dSSadaf Ebrahimi }
12244*22dc650dSSadaf Ebrahimi break;
12245*22dc650dSSadaf Ebrahimi }
12246*22dc650dSSadaf Ebrahimi #endif
12247*22dc650dSSadaf Ebrahimi
12248*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12249*22dc650dSSadaf Ebrahimi label = LABEL();
12250*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12251*22dc650dSSadaf Ebrahimi detect_partial_match_to(common, label);
12252*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12253*22dc650dSSadaf Ebrahimi
12254*22dc650dSSadaf Ebrahimi set_jumps(no_char1_match, LABEL());
12255*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12256*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12257*22dc650dSSadaf Ebrahimi if (early_fail_ptr != 0)
12258*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12259*22dc650dSSadaf Ebrahimi break;
12260*22dc650dSSadaf Ebrahimi
12261*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
12262*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
12263*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12264*22dc650dSSadaf Ebrahimi if (common->utf)
12265*22dc650dSSadaf Ebrahimi {
12266*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12267*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12268*22dc650dSSadaf Ebrahimi
12269*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12270*22dc650dSSadaf Ebrahimi label = LABEL();
12271*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12272*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12273*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12274*22dc650dSSadaf Ebrahimi add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12275*22dc650dSSadaf Ebrahimi detect_partial_match_to(common, label);
12276*22dc650dSSadaf Ebrahimi
12277*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12278*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12279*22dc650dSSadaf Ebrahimi break;
12280*22dc650dSSadaf Ebrahimi }
12281*22dc650dSSadaf Ebrahimi #endif
12282*22dc650dSSadaf Ebrahimi
12283*22dc650dSSadaf Ebrahimi if (type == OP_ALLANY)
12284*22dc650dSSadaf Ebrahimi {
12285*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12286*22dc650dSSadaf Ebrahimi
12287*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
12288*22dc650dSSadaf Ebrahimi {
12289*22dc650dSSadaf Ebrahimi OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12290*22dc650dSSadaf Ebrahimi SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12291*22dc650dSSadaf Ebrahimi }
12292*22dc650dSSadaf Ebrahimi else
12293*22dc650dSSadaf Ebrahimi {
12294*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12295*22dc650dSSadaf Ebrahimi process_partial_match(common);
12296*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12297*22dc650dSSadaf Ebrahimi }
12298*22dc650dSSadaf Ebrahimi break;
12299*22dc650dSSadaf Ebrahimi }
12300*22dc650dSSadaf Ebrahimi
12301*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12302*22dc650dSSadaf Ebrahimi
12303*22dc650dSSadaf Ebrahimi detect_partial_match(common, &no_match);
12304*22dc650dSSadaf Ebrahimi label = LABEL();
12305*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12306*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12307*22dc650dSSadaf Ebrahimi add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12308*22dc650dSSadaf Ebrahimi detect_partial_match_to(common, label);
12309*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12310*22dc650dSSadaf Ebrahimi
12311*22dc650dSSadaf Ebrahimi set_jumps(no_char1_match, LABEL());
12312*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12313*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12314*22dc650dSSadaf Ebrahimi break;
12315*22dc650dSSadaf Ebrahimi
12316*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
12317*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(early_fail_ptr == 0);
12318*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12319*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12320*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12321*22dc650dSSadaf Ebrahimi set_jumps(no_match, LABEL());
12322*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12323*22dc650dSSadaf Ebrahimi break;
12324*22dc650dSSadaf Ebrahimi
12325*22dc650dSSadaf Ebrahimi default:
12326*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
12327*22dc650dSSadaf Ebrahimi break;
12328*22dc650dSSadaf Ebrahimi }
12329*22dc650dSSadaf Ebrahimi
12330*22dc650dSSadaf Ebrahimi count_match(common);
12331*22dc650dSSadaf Ebrahimi return end;
12332*22dc650dSSadaf Ebrahimi }
12333*22dc650dSSadaf Ebrahimi
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12334*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12335*22dc650dSSadaf Ebrahimi {
12336*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12337*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
12338*22dc650dSSadaf Ebrahimi
12339*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12340*22dc650dSSadaf Ebrahimi
12341*22dc650dSSadaf Ebrahimi if (*cc == OP_FAIL)
12342*22dc650dSSadaf Ebrahimi {
12343*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12344*22dc650dSSadaf Ebrahimi return cc + 1;
12345*22dc650dSSadaf Ebrahimi }
12346*22dc650dSSadaf Ebrahimi
12347*22dc650dSSadaf Ebrahimi if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12348*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12349*22dc650dSSadaf Ebrahimi
12350*22dc650dSSadaf Ebrahimi if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12351*22dc650dSSadaf Ebrahimi {
12352*22dc650dSSadaf Ebrahimi /* No need to check notempty conditions. */
12353*22dc650dSSadaf Ebrahimi if (common->accept_label == NULL)
12354*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12355*22dc650dSSadaf Ebrahimi else
12356*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->accept_label);
12357*22dc650dSSadaf Ebrahimi return cc + 1;
12358*22dc650dSSadaf Ebrahimi }
12359*22dc650dSSadaf Ebrahimi
12360*22dc650dSSadaf Ebrahimi if (common->accept_label == NULL)
12361*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12362*22dc650dSSadaf Ebrahimi else
12363*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12364*22dc650dSSadaf Ebrahimi
12365*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
12366*22dc650dSSadaf Ebrahimi {
12367*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12368*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12369*22dc650dSSadaf Ebrahimi }
12370*22dc650dSSadaf Ebrahimi else
12371*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12372*22dc650dSSadaf Ebrahimi
12373*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12374*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12375*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12376*22dc650dSSadaf Ebrahimi if (common->accept_label == NULL)
12377*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12378*22dc650dSSadaf Ebrahimi else
12379*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_ZERO, common->accept_label);
12380*22dc650dSSadaf Ebrahimi
12381*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12382*22dc650dSSadaf Ebrahimi if (common->accept_label == NULL)
12383*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12384*22dc650dSSadaf Ebrahimi else
12385*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12386*22dc650dSSadaf Ebrahimi add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12387*22dc650dSSadaf Ebrahimi return cc + 1;
12388*22dc650dSSadaf Ebrahimi }
12389*22dc650dSSadaf Ebrahimi
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)12390*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12391*22dc650dSSadaf Ebrahimi {
12392*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12393*22dc650dSSadaf Ebrahimi int offset = GET2(cc, 1);
12394*22dc650dSSadaf Ebrahimi BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12395*22dc650dSSadaf Ebrahimi
12396*22dc650dSSadaf Ebrahimi /* Data will be discarded anyway... */
12397*22dc650dSSadaf Ebrahimi if (common->currententry != NULL)
12398*22dc650dSSadaf Ebrahimi return cc + 1 + IMM2_SIZE;
12399*22dc650dSSadaf Ebrahimi
12400*22dc650dSSadaf Ebrahimi if (!optimized_cbracket)
12401*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12402*22dc650dSSadaf Ebrahimi offset <<= 1;
12403*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12404*22dc650dSSadaf Ebrahimi if (!optimized_cbracket)
12405*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12406*22dc650dSSadaf Ebrahimi return cc + 1 + IMM2_SIZE;
12407*22dc650dSSadaf Ebrahimi }
12408*22dc650dSSadaf Ebrahimi
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)12409*22dc650dSSadaf Ebrahimi static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12410*22dc650dSSadaf Ebrahimi {
12411*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12412*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
12413*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode = *cc;
12414*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccend = cc + 1;
12415*22dc650dSSadaf Ebrahimi
12416*22dc650dSSadaf Ebrahimi if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12417*22dc650dSSadaf Ebrahimi opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12418*22dc650dSSadaf Ebrahimi ccend += 2 + cc[1];
12419*22dc650dSSadaf Ebrahimi
12420*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12421*22dc650dSSadaf Ebrahimi
12422*22dc650dSSadaf Ebrahimi if (opcode == OP_SKIP)
12423*22dc650dSSadaf Ebrahimi {
12424*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
12425*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12426*22dc650dSSadaf Ebrahimi return ccend;
12427*22dc650dSSadaf Ebrahimi }
12428*22dc650dSSadaf Ebrahimi
12429*22dc650dSSadaf Ebrahimi if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12430*22dc650dSSadaf Ebrahimi {
12431*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
12432*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12433*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12434*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12435*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12436*22dc650dSSadaf Ebrahimi }
12437*22dc650dSSadaf Ebrahimi
12438*22dc650dSSadaf Ebrahimi return ccend;
12439*22dc650dSSadaf Ebrahimi }
12440*22dc650dSSadaf Ebrahimi
12441*22dc650dSSadaf Ebrahimi static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12442*22dc650dSSadaf Ebrahimi
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12443*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12444*22dc650dSSadaf Ebrahimi {
12445*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12446*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
12447*22dc650dSSadaf Ebrahimi BOOL needs_control_head;
12448*22dc650dSSadaf Ebrahimi int size;
12449*22dc650dSSadaf Ebrahimi
12450*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12451*22dc650dSSadaf Ebrahimi common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12452*22dc650dSSadaf Ebrahimi BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12453*22dc650dSSadaf Ebrahimi BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12454*22dc650dSSadaf Ebrahimi BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12455*22dc650dSSadaf Ebrahimi
12456*22dc650dSSadaf Ebrahimi size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12457*22dc650dSSadaf Ebrahimi size = 3 + (size < 0 ? 0 : size);
12458*22dc650dSSadaf Ebrahimi
12459*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12460*22dc650dSSadaf Ebrahimi allocate_stack(common, size);
12461*22dc650dSSadaf Ebrahimi if (size > 3)
12462*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12463*22dc650dSSadaf Ebrahimi else
12464*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12465*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12466*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12467*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12468*22dc650dSSadaf Ebrahimi
12469*22dc650dSSadaf Ebrahimi size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12470*22dc650dSSadaf Ebrahimi if (size >= 0)
12471*22dc650dSSadaf Ebrahimi init_frame(common, cc, ccend, size - 1, 0);
12472*22dc650dSSadaf Ebrahimi }
12473*22dc650dSSadaf Ebrahimi
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)12474*22dc650dSSadaf Ebrahimi static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12475*22dc650dSSadaf Ebrahimi {
12476*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12477*22dc650dSSadaf Ebrahimi backtrack_common *backtrack;
12478*22dc650dSSadaf Ebrahimi BOOL has_then_trap = FALSE;
12479*22dc650dSSadaf Ebrahimi then_trap_backtrack *save_then_trap = NULL;
12480*22dc650dSSadaf Ebrahimi
12481*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12482*22dc650dSSadaf Ebrahimi
12483*22dc650dSSadaf Ebrahimi if (common->has_then && common->then_offsets[cc - common->start] != 0)
12484*22dc650dSSadaf Ebrahimi {
12485*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12486*22dc650dSSadaf Ebrahimi has_then_trap = TRUE;
12487*22dc650dSSadaf Ebrahimi save_then_trap = common->then_trap;
12488*22dc650dSSadaf Ebrahimi /* Tail item on backtrack. */
12489*22dc650dSSadaf Ebrahimi compile_then_trap_matchingpath(common, cc, ccend, parent);
12490*22dc650dSSadaf Ebrahimi }
12491*22dc650dSSadaf Ebrahimi
12492*22dc650dSSadaf Ebrahimi while (cc < ccend)
12493*22dc650dSSadaf Ebrahimi {
12494*22dc650dSSadaf Ebrahimi switch(*cc)
12495*22dc650dSSadaf Ebrahimi {
12496*22dc650dSSadaf Ebrahimi case OP_SOD:
12497*22dc650dSSadaf Ebrahimi case OP_SOM:
12498*22dc650dSSadaf Ebrahimi case OP_NOT_WORD_BOUNDARY:
12499*22dc650dSSadaf Ebrahimi case OP_WORD_BOUNDARY:
12500*22dc650dSSadaf Ebrahimi case OP_EODN:
12501*22dc650dSSadaf Ebrahimi case OP_EOD:
12502*22dc650dSSadaf Ebrahimi case OP_DOLL:
12503*22dc650dSSadaf Ebrahimi case OP_DOLLM:
12504*22dc650dSSadaf Ebrahimi case OP_CIRC:
12505*22dc650dSSadaf Ebrahimi case OP_CIRCM:
12506*22dc650dSSadaf Ebrahimi case OP_NOT_UCP_WORD_BOUNDARY:
12507*22dc650dSSadaf Ebrahimi case OP_UCP_WORD_BOUNDARY:
12508*22dc650dSSadaf Ebrahimi cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12509*22dc650dSSadaf Ebrahimi break;
12510*22dc650dSSadaf Ebrahimi
12511*22dc650dSSadaf Ebrahimi case OP_NOT_DIGIT:
12512*22dc650dSSadaf Ebrahimi case OP_DIGIT:
12513*22dc650dSSadaf Ebrahimi case OP_NOT_WHITESPACE:
12514*22dc650dSSadaf Ebrahimi case OP_WHITESPACE:
12515*22dc650dSSadaf Ebrahimi case OP_NOT_WORDCHAR:
12516*22dc650dSSadaf Ebrahimi case OP_WORDCHAR:
12517*22dc650dSSadaf Ebrahimi case OP_ANY:
12518*22dc650dSSadaf Ebrahimi case OP_ALLANY:
12519*22dc650dSSadaf Ebrahimi case OP_ANYBYTE:
12520*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
12521*22dc650dSSadaf Ebrahimi case OP_PROP:
12522*22dc650dSSadaf Ebrahimi case OP_ANYNL:
12523*22dc650dSSadaf Ebrahimi case OP_NOT_HSPACE:
12524*22dc650dSSadaf Ebrahimi case OP_HSPACE:
12525*22dc650dSSadaf Ebrahimi case OP_NOT_VSPACE:
12526*22dc650dSSadaf Ebrahimi case OP_VSPACE:
12527*22dc650dSSadaf Ebrahimi case OP_EXTUNI:
12528*22dc650dSSadaf Ebrahimi case OP_NOT:
12529*22dc650dSSadaf Ebrahimi case OP_NOTI:
12530*22dc650dSSadaf Ebrahimi cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12531*22dc650dSSadaf Ebrahimi break;
12532*22dc650dSSadaf Ebrahimi
12533*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
12534*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12535*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12536*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
12537*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12538*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12539*22dc650dSSadaf Ebrahimi cc++;
12540*22dc650dSSadaf Ebrahimi break;
12541*22dc650dSSadaf Ebrahimi
12542*22dc650dSSadaf Ebrahimi case OP_CHAR:
12543*22dc650dSSadaf Ebrahimi case OP_CHARI:
12544*22dc650dSSadaf Ebrahimi if (common->mode == PCRE2_JIT_COMPLETE)
12545*22dc650dSSadaf Ebrahimi cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12546*22dc650dSSadaf Ebrahimi else
12547*22dc650dSSadaf Ebrahimi cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12548*22dc650dSSadaf Ebrahimi break;
12549*22dc650dSSadaf Ebrahimi
12550*22dc650dSSadaf Ebrahimi case OP_STAR:
12551*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
12552*22dc650dSSadaf Ebrahimi case OP_PLUS:
12553*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
12554*22dc650dSSadaf Ebrahimi case OP_QUERY:
12555*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
12556*22dc650dSSadaf Ebrahimi case OP_UPTO:
12557*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
12558*22dc650dSSadaf Ebrahimi case OP_EXACT:
12559*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
12560*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
12561*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
12562*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
12563*22dc650dSSadaf Ebrahimi case OP_STARI:
12564*22dc650dSSadaf Ebrahimi case OP_MINSTARI:
12565*22dc650dSSadaf Ebrahimi case OP_PLUSI:
12566*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
12567*22dc650dSSadaf Ebrahimi case OP_QUERYI:
12568*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
12569*22dc650dSSadaf Ebrahimi case OP_UPTOI:
12570*22dc650dSSadaf Ebrahimi case OP_MINUPTOI:
12571*22dc650dSSadaf Ebrahimi case OP_EXACTI:
12572*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
12573*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
12574*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
12575*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
12576*22dc650dSSadaf Ebrahimi case OP_NOTSTAR:
12577*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR:
12578*22dc650dSSadaf Ebrahimi case OP_NOTPLUS:
12579*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS:
12580*22dc650dSSadaf Ebrahimi case OP_NOTQUERY:
12581*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY:
12582*22dc650dSSadaf Ebrahimi case OP_NOTUPTO:
12583*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO:
12584*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
12585*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
12586*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
12587*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
12588*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
12589*22dc650dSSadaf Ebrahimi case OP_NOTSTARI:
12590*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI:
12591*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
12592*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI:
12593*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI:
12594*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
12595*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI:
12596*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
12597*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
12598*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
12599*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
12600*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
12601*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
12602*22dc650dSSadaf Ebrahimi case OP_TYPESTAR:
12603*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR:
12604*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
12605*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS:
12606*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY:
12607*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
12608*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
12609*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
12610*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
12611*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
12612*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
12613*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
12614*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
12615*22dc650dSSadaf Ebrahimi cc = compile_iterator_matchingpath(common, cc, parent);
12616*22dc650dSSadaf Ebrahimi break;
12617*22dc650dSSadaf Ebrahimi
12618*22dc650dSSadaf Ebrahimi case OP_CLASS:
12619*22dc650dSSadaf Ebrahimi case OP_NCLASS:
12620*22dc650dSSadaf Ebrahimi if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12621*22dc650dSSadaf Ebrahimi cc = compile_iterator_matchingpath(common, cc, parent);
12622*22dc650dSSadaf Ebrahimi else
12623*22dc650dSSadaf Ebrahimi cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12624*22dc650dSSadaf Ebrahimi break;
12625*22dc650dSSadaf Ebrahimi
12626*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12627*22dc650dSSadaf Ebrahimi case OP_XCLASS:
12628*22dc650dSSadaf Ebrahimi if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12629*22dc650dSSadaf Ebrahimi cc = compile_iterator_matchingpath(common, cc, parent);
12630*22dc650dSSadaf Ebrahimi else
12631*22dc650dSSadaf Ebrahimi cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12632*22dc650dSSadaf Ebrahimi break;
12633*22dc650dSSadaf Ebrahimi #endif
12634*22dc650dSSadaf Ebrahimi
12635*22dc650dSSadaf Ebrahimi case OP_REF:
12636*22dc650dSSadaf Ebrahimi case OP_REFI:
12637*22dc650dSSadaf Ebrahimi if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12638*22dc650dSSadaf Ebrahimi cc = compile_ref_iterator_matchingpath(common, cc, parent);
12639*22dc650dSSadaf Ebrahimi else
12640*22dc650dSSadaf Ebrahimi {
12641*22dc650dSSadaf Ebrahimi compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12642*22dc650dSSadaf Ebrahimi cc += 1 + IMM2_SIZE;
12643*22dc650dSSadaf Ebrahimi }
12644*22dc650dSSadaf Ebrahimi break;
12645*22dc650dSSadaf Ebrahimi
12646*22dc650dSSadaf Ebrahimi case OP_DNREF:
12647*22dc650dSSadaf Ebrahimi case OP_DNREFI:
12648*22dc650dSSadaf Ebrahimi if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12649*22dc650dSSadaf Ebrahimi cc = compile_ref_iterator_matchingpath(common, cc, parent);
12650*22dc650dSSadaf Ebrahimi else
12651*22dc650dSSadaf Ebrahimi {
12652*22dc650dSSadaf Ebrahimi compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12653*22dc650dSSadaf Ebrahimi compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12654*22dc650dSSadaf Ebrahimi cc += 1 + 2 * IMM2_SIZE;
12655*22dc650dSSadaf Ebrahimi }
12656*22dc650dSSadaf Ebrahimi break;
12657*22dc650dSSadaf Ebrahimi
12658*22dc650dSSadaf Ebrahimi case OP_RECURSE:
12659*22dc650dSSadaf Ebrahimi cc = compile_recurse_matchingpath(common, cc, parent);
12660*22dc650dSSadaf Ebrahimi break;
12661*22dc650dSSadaf Ebrahimi
12662*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
12663*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
12664*22dc650dSSadaf Ebrahimi cc = compile_callout_matchingpath(common, cc, parent);
12665*22dc650dSSadaf Ebrahimi break;
12666*22dc650dSSadaf Ebrahimi
12667*22dc650dSSadaf Ebrahimi case OP_ASSERT:
12668*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
12669*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
12670*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
12671*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12672*22dc650dSSadaf Ebrahimi cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12673*22dc650dSSadaf Ebrahimi break;
12674*22dc650dSSadaf Ebrahimi
12675*22dc650dSSadaf Ebrahimi case OP_BRAMINZERO:
12676*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12677*22dc650dSSadaf Ebrahimi cc = bracketend(cc + 1);
12678*22dc650dSSadaf Ebrahimi if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12679*22dc650dSSadaf Ebrahimi {
12680*22dc650dSSadaf Ebrahimi allocate_stack(common, 1);
12681*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12682*22dc650dSSadaf Ebrahimi }
12683*22dc650dSSadaf Ebrahimi else
12684*22dc650dSSadaf Ebrahimi {
12685*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
12686*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12687*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12688*22dc650dSSadaf Ebrahimi }
12689*22dc650dSSadaf Ebrahimi BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12690*22dc650dSSadaf Ebrahimi count_match(common);
12691*22dc650dSSadaf Ebrahimi break;
12692*22dc650dSSadaf Ebrahimi
12693*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
12694*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
12695*22dc650dSSadaf Ebrahimi case OP_ONCE:
12696*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
12697*22dc650dSSadaf Ebrahimi case OP_BRA:
12698*22dc650dSSadaf Ebrahimi case OP_CBRA:
12699*22dc650dSSadaf Ebrahimi case OP_COND:
12700*22dc650dSSadaf Ebrahimi case OP_SBRA:
12701*22dc650dSSadaf Ebrahimi case OP_SCBRA:
12702*22dc650dSSadaf Ebrahimi case OP_SCOND:
12703*22dc650dSSadaf Ebrahimi cc = compile_bracket_matchingpath(common, cc, parent);
12704*22dc650dSSadaf Ebrahimi break;
12705*22dc650dSSadaf Ebrahimi
12706*22dc650dSSadaf Ebrahimi case OP_BRAZERO:
12707*22dc650dSSadaf Ebrahimi if (cc[1] > OP_ASSERTBACK_NOT)
12708*22dc650dSSadaf Ebrahimi cc = compile_bracket_matchingpath(common, cc, parent);
12709*22dc650dSSadaf Ebrahimi else
12710*22dc650dSSadaf Ebrahimi {
12711*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12712*22dc650dSSadaf Ebrahimi cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12713*22dc650dSSadaf Ebrahimi }
12714*22dc650dSSadaf Ebrahimi break;
12715*22dc650dSSadaf Ebrahimi
12716*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
12717*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
12718*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
12719*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
12720*22dc650dSSadaf Ebrahimi case OP_BRAPOSZERO:
12721*22dc650dSSadaf Ebrahimi cc = compile_bracketpos_matchingpath(common, cc, parent);
12722*22dc650dSSadaf Ebrahimi break;
12723*22dc650dSSadaf Ebrahimi
12724*22dc650dSSadaf Ebrahimi case OP_MARK:
12725*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12726*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mark_ptr != 0);
12727*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12728*22dc650dSSadaf Ebrahimi allocate_stack(common, common->has_skip_arg ? 5 : 1);
12729*22dc650dSSadaf Ebrahimi if (HAS_VIRTUAL_REGISTERS)
12730*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12731*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12732*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12733*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12734*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12735*22dc650dSSadaf Ebrahimi if (common->has_skip_arg)
12736*22dc650dSSadaf Ebrahimi {
12737*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12738*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12739*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12740*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12741*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12742*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12743*22dc650dSSadaf Ebrahimi }
12744*22dc650dSSadaf Ebrahimi cc += 1 + 2 + cc[1];
12745*22dc650dSSadaf Ebrahimi break;
12746*22dc650dSSadaf Ebrahimi
12747*22dc650dSSadaf Ebrahimi case OP_PRUNE:
12748*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
12749*22dc650dSSadaf Ebrahimi case OP_SKIP:
12750*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
12751*22dc650dSSadaf Ebrahimi case OP_THEN:
12752*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
12753*22dc650dSSadaf Ebrahimi case OP_COMMIT:
12754*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
12755*22dc650dSSadaf Ebrahimi cc = compile_control_verb_matchingpath(common, cc, parent);
12756*22dc650dSSadaf Ebrahimi break;
12757*22dc650dSSadaf Ebrahimi
12758*22dc650dSSadaf Ebrahimi case OP_FAIL:
12759*22dc650dSSadaf Ebrahimi case OP_ACCEPT:
12760*22dc650dSSadaf Ebrahimi case OP_ASSERT_ACCEPT:
12761*22dc650dSSadaf Ebrahimi cc = compile_fail_accept_matchingpath(common, cc, parent);
12762*22dc650dSSadaf Ebrahimi break;
12763*22dc650dSSadaf Ebrahimi
12764*22dc650dSSadaf Ebrahimi case OP_CLOSE:
12765*22dc650dSSadaf Ebrahimi cc = compile_close_matchingpath(common, cc);
12766*22dc650dSSadaf Ebrahimi break;
12767*22dc650dSSadaf Ebrahimi
12768*22dc650dSSadaf Ebrahimi case OP_SKIPZERO:
12769*22dc650dSSadaf Ebrahimi cc = bracketend(cc + 1);
12770*22dc650dSSadaf Ebrahimi break;
12771*22dc650dSSadaf Ebrahimi
12772*22dc650dSSadaf Ebrahimi default:
12773*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
12774*22dc650dSSadaf Ebrahimi return;
12775*22dc650dSSadaf Ebrahimi }
12776*22dc650dSSadaf Ebrahimi if (cc == NULL)
12777*22dc650dSSadaf Ebrahimi return;
12778*22dc650dSSadaf Ebrahimi }
12779*22dc650dSSadaf Ebrahimi
12780*22dc650dSSadaf Ebrahimi if (has_then_trap)
12781*22dc650dSSadaf Ebrahimi {
12782*22dc650dSSadaf Ebrahimi /* Head item on backtrack. */
12783*22dc650dSSadaf Ebrahimi PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12784*22dc650dSSadaf Ebrahimi BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12785*22dc650dSSadaf Ebrahimi BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12786*22dc650dSSadaf Ebrahimi common->then_trap = save_then_trap;
12787*22dc650dSSadaf Ebrahimi }
12788*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cc == ccend);
12789*22dc650dSSadaf Ebrahimi }
12790*22dc650dSSadaf Ebrahimi
12791*22dc650dSSadaf Ebrahimi #undef PUSH_BACKTRACK
12792*22dc650dSSadaf Ebrahimi #undef PUSH_BACKTRACK_NOVALUE
12793*22dc650dSSadaf Ebrahimi #undef BACKTRACK_AS
12794*22dc650dSSadaf Ebrahimi
12795*22dc650dSSadaf Ebrahimi #define COMPILE_BACKTRACKINGPATH(current) \
12796*22dc650dSSadaf Ebrahimi do \
12797*22dc650dSSadaf Ebrahimi { \
12798*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, (current)); \
12799*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12800*22dc650dSSadaf Ebrahimi return; \
12801*22dc650dSSadaf Ebrahimi } \
12802*22dc650dSSadaf Ebrahimi while (0)
12803*22dc650dSSadaf Ebrahimi
12804*22dc650dSSadaf Ebrahimi #define CURRENT_AS(type) ((type *)current)
12805*22dc650dSSadaf Ebrahimi
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12806*22dc650dSSadaf Ebrahimi static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12807*22dc650dSSadaf Ebrahimi {
12808*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12809*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = current->cc;
12810*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode;
12811*22dc650dSSadaf Ebrahimi PCRE2_UCHAR type;
12812*22dc650dSSadaf Ebrahimi sljit_u32 max = 0, exact;
12813*22dc650dSSadaf Ebrahimi struct sljit_label *label = NULL;
12814*22dc650dSSadaf Ebrahimi struct sljit_jump *jump = NULL;
12815*22dc650dSSadaf Ebrahimi jump_list *jumplist = NULL;
12816*22dc650dSSadaf Ebrahimi PCRE2_SPTR end;
12817*22dc650dSSadaf Ebrahimi int private_data_ptr = PRIVATE_DATA(cc);
12818*22dc650dSSadaf Ebrahimi int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12819*22dc650dSSadaf Ebrahimi int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12820*22dc650dSSadaf Ebrahimi int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12821*22dc650dSSadaf Ebrahimi
12822*22dc650dSSadaf Ebrahimi cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12823*22dc650dSSadaf Ebrahimi
12824*22dc650dSSadaf Ebrahimi switch(opcode)
12825*22dc650dSSadaf Ebrahimi {
12826*22dc650dSSadaf Ebrahimi case OP_STAR:
12827*22dc650dSSadaf Ebrahimi case OP_UPTO:
12828*22dc650dSSadaf Ebrahimi if (type == OP_ANYNL || type == OP_EXTUNI)
12829*22dc650dSSadaf Ebrahimi {
12830*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr == 0);
12831*22dc650dSSadaf Ebrahimi set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12832*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12833*22dc650dSSadaf Ebrahimi free_stack(common, 1);
12834*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12835*22dc650dSSadaf Ebrahimi }
12836*22dc650dSSadaf Ebrahimi else
12837*22dc650dSSadaf Ebrahimi {
12838*22dc650dSSadaf Ebrahimi if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12839*22dc650dSSadaf Ebrahimi {
12840*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12841*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12842*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12843*22dc650dSSadaf Ebrahimi
12844*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12845*22dc650dSSadaf Ebrahimi label = LABEL();
12846*22dc650dSSadaf Ebrahimi OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12847*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12848*22dc650dSSadaf Ebrahimi if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12849*22dc650dSSadaf Ebrahimi OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12850*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12851*22dc650dSSadaf Ebrahimi move_back(common, NULL, TRUE);
12852*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12853*22dc650dSSadaf Ebrahimi }
12854*22dc650dSSadaf Ebrahimi else
12855*22dc650dSSadaf Ebrahimi {
12856*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12857*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12858*22dc650dSSadaf Ebrahimi move_back(common, NULL, TRUE);
12859*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12860*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12861*22dc650dSSadaf Ebrahimi }
12862*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12863*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12864*22dc650dSSadaf Ebrahimi free_stack(common, 2);
12865*22dc650dSSadaf Ebrahimi }
12866*22dc650dSSadaf Ebrahimi break;
12867*22dc650dSSadaf Ebrahimi
12868*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
12869*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12870*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12871*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12872*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12873*22dc650dSSadaf Ebrahimi set_jumps(jumplist, LABEL());
12874*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12875*22dc650dSSadaf Ebrahimi free_stack(common, 1);
12876*22dc650dSSadaf Ebrahimi break;
12877*22dc650dSSadaf Ebrahimi
12878*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
12879*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12880*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12881*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12882*22dc650dSSadaf Ebrahimi add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12883*22dc650dSSadaf Ebrahimi
12884*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12885*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12886*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12887*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12888*22dc650dSSadaf Ebrahimi
12889*22dc650dSSadaf Ebrahimi set_jumps(jumplist, LABEL());
12890*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12891*22dc650dSSadaf Ebrahimi free_stack(common, 2);
12892*22dc650dSSadaf Ebrahimi break;
12893*22dc650dSSadaf Ebrahimi
12894*22dc650dSSadaf Ebrahimi case OP_QUERY:
12895*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12896*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12897*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12898*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
12899*22dc650dSSadaf Ebrahimi set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12900*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12901*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12902*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12903*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12904*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12905*22dc650dSSadaf Ebrahimi free_stack(common, 1);
12906*22dc650dSSadaf Ebrahimi break;
12907*22dc650dSSadaf Ebrahimi
12908*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
12909*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12910*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12911*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12912*22dc650dSSadaf Ebrahimi compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12913*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12914*22dc650dSSadaf Ebrahimi set_jumps(jumplist, LABEL());
12915*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
12916*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
12917*22dc650dSSadaf Ebrahimi free_stack(common, 1);
12918*22dc650dSSadaf Ebrahimi break;
12919*22dc650dSSadaf Ebrahimi
12920*22dc650dSSadaf Ebrahimi case OP_EXACT:
12921*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
12922*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
12923*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
12924*22dc650dSSadaf Ebrahimi break;
12925*22dc650dSSadaf Ebrahimi
12926*22dc650dSSadaf Ebrahimi default:
12927*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
12928*22dc650dSSadaf Ebrahimi break;
12929*22dc650dSSadaf Ebrahimi }
12930*22dc650dSSadaf Ebrahimi
12931*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
12932*22dc650dSSadaf Ebrahimi }
12933*22dc650dSSadaf Ebrahimi
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12934*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12935*22dc650dSSadaf Ebrahimi {
12936*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12937*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = current->cc;
12938*22dc650dSSadaf Ebrahimi BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12939*22dc650dSSadaf Ebrahimi PCRE2_UCHAR type;
12940*22dc650dSSadaf Ebrahimi
12941*22dc650dSSadaf Ebrahimi type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12942*22dc650dSSadaf Ebrahimi
12943*22dc650dSSadaf Ebrahimi if ((type & 0x1) == 0)
12944*22dc650dSSadaf Ebrahimi {
12945*22dc650dSSadaf Ebrahimi /* Maximize case. */
12946*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
12947*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12948*22dc650dSSadaf Ebrahimi free_stack(common, 1);
12949*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12950*22dc650dSSadaf Ebrahimi return;
12951*22dc650dSSadaf Ebrahimi }
12952*22dc650dSSadaf Ebrahimi
12953*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12954*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12955*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
12956*22dc650dSSadaf Ebrahimi free_stack(common, ref ? 2 : 3);
12957*22dc650dSSadaf Ebrahimi }
12958*22dc650dSSadaf Ebrahimi
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960*22dc650dSSadaf Ebrahimi {
12961*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12962*22dc650dSSadaf Ebrahimi recurse_entry *entry;
12963*22dc650dSSadaf Ebrahimi
12964*22dc650dSSadaf Ebrahimi if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12965*22dc650dSSadaf Ebrahimi {
12966*22dc650dSSadaf Ebrahimi entry = CURRENT_AS(recurse_backtrack)->entry;
12967*22dc650dSSadaf Ebrahimi if (entry->backtrack_label == NULL)
12968*22dc650dSSadaf Ebrahimi add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12969*22dc650dSSadaf Ebrahimi else
12970*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12971*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12972*22dc650dSSadaf Ebrahimi }
12973*22dc650dSSadaf Ebrahimi else
12974*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, current->top);
12975*22dc650dSSadaf Ebrahimi
12976*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
12977*22dc650dSSadaf Ebrahimi }
12978*22dc650dSSadaf Ebrahimi
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12979*22dc650dSSadaf Ebrahimi static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12980*22dc650dSSadaf Ebrahimi {
12981*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
12982*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = current->cc;
12983*22dc650dSSadaf Ebrahimi PCRE2_UCHAR bra = OP_BRA;
12984*22dc650dSSadaf Ebrahimi struct sljit_jump *brajump = NULL;
12985*22dc650dSSadaf Ebrahimi
12986*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12987*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAZERO)
12988*22dc650dSSadaf Ebrahimi {
12989*22dc650dSSadaf Ebrahimi bra = *cc;
12990*22dc650dSSadaf Ebrahimi cc++;
12991*22dc650dSSadaf Ebrahimi }
12992*22dc650dSSadaf Ebrahimi
12993*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
12994*22dc650dSSadaf Ebrahimi {
12995*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(current->own_backtracks == NULL);
12996*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12997*22dc650dSSadaf Ebrahimi }
12998*22dc650dSSadaf Ebrahimi
12999*22dc650dSSadaf Ebrahimi if (CURRENT_AS(assert_backtrack)->framesize < 0)
13000*22dc650dSSadaf Ebrahimi {
13001*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13002*22dc650dSSadaf Ebrahimi
13003*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
13004*22dc650dSSadaf Ebrahimi {
13005*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13006*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13007*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13008*22dc650dSSadaf Ebrahimi }
13009*22dc650dSSadaf Ebrahimi return;
13010*22dc650dSSadaf Ebrahimi }
13011*22dc650dSSadaf Ebrahimi
13012*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
13013*22dc650dSSadaf Ebrahimi {
13014*22dc650dSSadaf Ebrahimi if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
13015*22dc650dSSadaf Ebrahimi {
13016*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13017*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13018*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13019*22dc650dSSadaf Ebrahimi return;
13020*22dc650dSSadaf Ebrahimi }
13021*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13022*22dc650dSSadaf Ebrahimi brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
13023*22dc650dSSadaf Ebrahimi }
13024*22dc650dSSadaf Ebrahimi
13025*22dc650dSSadaf Ebrahimi if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
13026*22dc650dSSadaf Ebrahimi {
13027*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
13028*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13029*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13030*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
13031*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
13032*22dc650dSSadaf Ebrahimi
13033*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13034*22dc650dSSadaf Ebrahimi }
13035*22dc650dSSadaf Ebrahimi else
13036*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13037*22dc650dSSadaf Ebrahimi
13038*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
13039*22dc650dSSadaf Ebrahimi {
13040*22dc650dSSadaf Ebrahimi /* We know there is enough place on the stack. */
13041*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
13042*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13043*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
13044*22dc650dSSadaf Ebrahimi JUMPHERE(brajump);
13045*22dc650dSSadaf Ebrahimi }
13046*22dc650dSSadaf Ebrahimi }
13047*22dc650dSSadaf Ebrahimi
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)13048*22dc650dSSadaf Ebrahimi static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13049*22dc650dSSadaf Ebrahimi {
13050*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13051*22dc650dSSadaf Ebrahimi int opcode, stacksize, alt_count, alt_max;
13052*22dc650dSSadaf Ebrahimi int offset = 0;
13053*22dc650dSSadaf Ebrahimi int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
13054*22dc650dSSadaf Ebrahimi int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
13055*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = current->cc;
13056*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin;
13057*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccprev;
13058*22dc650dSSadaf Ebrahimi PCRE2_UCHAR bra = OP_BRA;
13059*22dc650dSSadaf Ebrahimi PCRE2_UCHAR ket;
13060*22dc650dSSadaf Ebrahimi assert_backtrack *assert;
13061*22dc650dSSadaf Ebrahimi BOOL has_alternatives;
13062*22dc650dSSadaf Ebrahimi BOOL needs_control_head = FALSE;
13063*22dc650dSSadaf Ebrahimi BOOL has_vreverse;
13064*22dc650dSSadaf Ebrahimi struct sljit_jump *brazero = NULL;
13065*22dc650dSSadaf Ebrahimi struct sljit_jump *next_alt = NULL;
13066*22dc650dSSadaf Ebrahimi struct sljit_jump *once = NULL;
13067*22dc650dSSadaf Ebrahimi struct sljit_jump *cond = NULL;
13068*22dc650dSSadaf Ebrahimi struct sljit_label *rmin_label = NULL;
13069*22dc650dSSadaf Ebrahimi struct sljit_label *exact_label = NULL;
13070*22dc650dSSadaf Ebrahimi struct sljit_jump *mov_addr = NULL;
13071*22dc650dSSadaf Ebrahimi
13072*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13073*22dc650dSSadaf Ebrahimi {
13074*22dc650dSSadaf Ebrahimi bra = *cc;
13075*22dc650dSSadaf Ebrahimi cc++;
13076*22dc650dSSadaf Ebrahimi }
13077*22dc650dSSadaf Ebrahimi
13078*22dc650dSSadaf Ebrahimi opcode = *cc;
13079*22dc650dSSadaf Ebrahimi ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13080*22dc650dSSadaf Ebrahimi ket = *ccbegin;
13081*22dc650dSSadaf Ebrahimi if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13082*22dc650dSSadaf Ebrahimi {
13083*22dc650dSSadaf Ebrahimi repeat_ptr = PRIVATE_DATA(ccbegin);
13084*22dc650dSSadaf Ebrahimi repeat_type = PRIVATE_DATA(ccbegin + 2);
13085*22dc650dSSadaf Ebrahimi repeat_count = PRIVATE_DATA(ccbegin + 3);
13086*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13087*22dc650dSSadaf Ebrahimi if (repeat_type == OP_UPTO)
13088*22dc650dSSadaf Ebrahimi ket = OP_KETRMAX;
13089*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
13090*22dc650dSSadaf Ebrahimi ket = OP_KETRMIN;
13091*22dc650dSSadaf Ebrahimi }
13092*22dc650dSSadaf Ebrahimi ccbegin = cc;
13093*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
13094*22dc650dSSadaf Ebrahimi has_alternatives = *cc == OP_ALT;
13095*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13096*22dc650dSSadaf Ebrahimi has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13097*22dc650dSSadaf Ebrahimi if (opcode == OP_CBRA || opcode == OP_SCBRA)
13098*22dc650dSSadaf Ebrahimi offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13099*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13100*22dc650dSSadaf Ebrahimi opcode = OP_SCOND;
13101*22dc650dSSadaf Ebrahimi
13102*22dc650dSSadaf Ebrahimi alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13103*22dc650dSSadaf Ebrahimi
13104*22dc650dSSadaf Ebrahimi /* Decoding the needs_control_head in framesize. */
13105*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
13106*22dc650dSSadaf Ebrahimi {
13107*22dc650dSSadaf Ebrahimi needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13108*22dc650dSSadaf Ebrahimi CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13109*22dc650dSSadaf Ebrahimi }
13110*22dc650dSSadaf Ebrahimi
13111*22dc650dSSadaf Ebrahimi if (ket != OP_KET && repeat_type != 0)
13112*22dc650dSSadaf Ebrahimi {
13113*22dc650dSSadaf Ebrahimi /* TMP1 is used in OP_KETRMIN below. */
13114*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13115*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13116*22dc650dSSadaf Ebrahimi if (repeat_type == OP_UPTO)
13117*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13118*22dc650dSSadaf Ebrahimi else
13119*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13120*22dc650dSSadaf Ebrahimi }
13121*22dc650dSSadaf Ebrahimi
13122*22dc650dSSadaf Ebrahimi if (ket == OP_KETRMAX)
13123*22dc650dSSadaf Ebrahimi {
13124*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
13125*22dc650dSSadaf Ebrahimi {
13126*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13127*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13128*22dc650dSSadaf Ebrahimi brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13129*22dc650dSSadaf Ebrahimi }
13130*22dc650dSSadaf Ebrahimi }
13131*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMIN)
13132*22dc650dSSadaf Ebrahimi {
13133*22dc650dSSadaf Ebrahimi if (bra != OP_BRAMINZERO)
13134*22dc650dSSadaf Ebrahimi {
13135*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136*22dc650dSSadaf Ebrahimi if (repeat_type != 0)
13137*22dc650dSSadaf Ebrahimi {
13138*22dc650dSSadaf Ebrahimi /* TMP1 was set a few lines above. */
13139*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13140*22dc650dSSadaf Ebrahimi /* Drop STR_PTR for non-greedy plus quantifier. */
13141*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13142*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13143*22dc650dSSadaf Ebrahimi }
13144*22dc650dSSadaf Ebrahimi else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13145*22dc650dSSadaf Ebrahimi {
13146*22dc650dSSadaf Ebrahimi /* Checking zero-length iteration. */
13147*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13148*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13149*22dc650dSSadaf Ebrahimi else
13150*22dc650dSSadaf Ebrahimi {
13151*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13152*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13153*22dc650dSSadaf Ebrahimi }
13154*22dc650dSSadaf Ebrahimi /* Drop STR_PTR for non-greedy plus quantifier. */
13155*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13156*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13157*22dc650dSSadaf Ebrahimi }
13158*22dc650dSSadaf Ebrahimi else
13159*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13160*22dc650dSSadaf Ebrahimi }
13161*22dc650dSSadaf Ebrahimi rmin_label = LABEL();
13162*22dc650dSSadaf Ebrahimi if (repeat_type != 0)
13163*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13164*22dc650dSSadaf Ebrahimi }
13165*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAZERO)
13166*22dc650dSSadaf Ebrahimi {
13167*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13168*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13169*22dc650dSSadaf Ebrahimi brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13170*22dc650dSSadaf Ebrahimi }
13171*22dc650dSSadaf Ebrahimi else if (repeat_type == OP_EXACT)
13172*22dc650dSSadaf Ebrahimi {
13173*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13174*22dc650dSSadaf Ebrahimi exact_label = LABEL();
13175*22dc650dSSadaf Ebrahimi }
13176*22dc650dSSadaf Ebrahimi
13177*22dc650dSSadaf Ebrahimi if (offset != 0)
13178*22dc650dSSadaf Ebrahimi {
13179*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
13180*22dc650dSSadaf Ebrahimi {
13181*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13182*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13183*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13184*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13185*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13186*22dc650dSSadaf Ebrahimi free_stack(common, 3);
13187*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13188*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13189*22dc650dSSadaf Ebrahimi }
13190*22dc650dSSadaf Ebrahimi else if (common->optimized_cbracket[offset >> 1] == 0)
13191*22dc650dSSadaf Ebrahimi {
13192*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13193*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13194*22dc650dSSadaf Ebrahimi free_stack(common, 2);
13195*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13196*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13197*22dc650dSSadaf Ebrahimi }
13198*22dc650dSSadaf Ebrahimi }
13199*22dc650dSSadaf Ebrahimi
13200*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13201*22dc650dSSadaf Ebrahimi {
13202*22dc650dSSadaf Ebrahimi if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13203*22dc650dSSadaf Ebrahimi {
13204*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13205*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13206*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13207*22dc650dSSadaf Ebrahimi }
13208*22dc650dSSadaf Ebrahimi once = JUMP(SLJIT_JUMP);
13209*22dc650dSSadaf Ebrahimi }
13210*22dc650dSSadaf Ebrahimi else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13211*22dc650dSSadaf Ebrahimi {
13212*22dc650dSSadaf Ebrahimi if (has_alternatives)
13213*22dc650dSSadaf Ebrahimi {
13214*22dc650dSSadaf Ebrahimi /* Always exactly one alternative. */
13215*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13216*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13217*22dc650dSSadaf Ebrahimi
13218*22dc650dSSadaf Ebrahimi alt_max = 2;
13219*22dc650dSSadaf Ebrahimi next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13220*22dc650dSSadaf Ebrahimi }
13221*22dc650dSSadaf Ebrahimi }
13222*22dc650dSSadaf Ebrahimi else if (has_alternatives)
13223*22dc650dSSadaf Ebrahimi {
13224*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13225*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13226*22dc650dSSadaf Ebrahimi
13227*22dc650dSSadaf Ebrahimi if (alt_max > 3)
13228*22dc650dSSadaf Ebrahimi {
13229*22dc650dSSadaf Ebrahimi sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13230*22dc650dSSadaf Ebrahimi
13231*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr);
13232*22dc650dSSadaf Ebrahimi sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL());
13233*22dc650dSSadaf Ebrahimi sljit_emit_op0(compiler, SLJIT_ENDBR);
13234*22dc650dSSadaf Ebrahimi }
13235*22dc650dSSadaf Ebrahimi else
13236*22dc650dSSadaf Ebrahimi next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13237*22dc650dSSadaf Ebrahimi }
13238*22dc650dSSadaf Ebrahimi
13239*22dc650dSSadaf Ebrahimi COMPILE_BACKTRACKINGPATH(current->top);
13240*22dc650dSSadaf Ebrahimi if (current->own_backtracks)
13241*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13242*22dc650dSSadaf Ebrahimi
13243*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13244*22dc650dSSadaf Ebrahimi {
13245*22dc650dSSadaf Ebrahimi /* Conditional block always has at most one alternative. */
13246*22dc650dSSadaf Ebrahimi if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13247*22dc650dSSadaf Ebrahimi {
13248*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(has_alternatives);
13249*22dc650dSSadaf Ebrahimi assert = CURRENT_AS(bracket_backtrack)->u.assert;
13250*22dc650dSSadaf Ebrahimi if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13251*22dc650dSSadaf Ebrahimi {
13252*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13253*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13254*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13255*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13256*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13257*22dc650dSSadaf Ebrahimi }
13258*22dc650dSSadaf Ebrahimi cond = JUMP(SLJIT_JUMP);
13259*22dc650dSSadaf Ebrahimi set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13260*22dc650dSSadaf Ebrahimi }
13261*22dc650dSSadaf Ebrahimi else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13262*22dc650dSSadaf Ebrahimi {
13263*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(has_alternatives);
13264*22dc650dSSadaf Ebrahimi cond = JUMP(SLJIT_JUMP);
13265*22dc650dSSadaf Ebrahimi set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13266*22dc650dSSadaf Ebrahimi }
13267*22dc650dSSadaf Ebrahimi else
13268*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!has_alternatives);
13269*22dc650dSSadaf Ebrahimi }
13270*22dc650dSSadaf Ebrahimi
13271*22dc650dSSadaf Ebrahimi if (has_alternatives)
13272*22dc650dSSadaf Ebrahimi {
13273*22dc650dSSadaf Ebrahimi alt_count = 1;
13274*22dc650dSSadaf Ebrahimi do
13275*22dc650dSSadaf Ebrahimi {
13276*22dc650dSSadaf Ebrahimi current->top = NULL;
13277*22dc650dSSadaf Ebrahimi current->own_backtracks = NULL;
13278*22dc650dSSadaf Ebrahimi current->simple_backtracks = NULL;
13279*22dc650dSSadaf Ebrahimi /* Conditional blocks always have an additional alternative, even if it is empty. */
13280*22dc650dSSadaf Ebrahimi if (*cc == OP_ALT)
13281*22dc650dSSadaf Ebrahimi {
13282*22dc650dSSadaf Ebrahimi ccprev = cc + 1 + LINK_SIZE;
13283*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
13284*22dc650dSSadaf Ebrahimi
13285*22dc650dSSadaf Ebrahimi has_vreverse = FALSE;
13286*22dc650dSSadaf Ebrahimi if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13287*22dc650dSSadaf Ebrahimi {
13288*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr != 0);
13289*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13290*22dc650dSSadaf Ebrahimi
13291*22dc650dSSadaf Ebrahimi has_vreverse = (*ccprev == OP_VREVERSE);
13292*22dc650dSSadaf Ebrahimi if (*ccprev == OP_REVERSE || has_vreverse)
13293*22dc650dSSadaf Ebrahimi ccprev = compile_reverse_matchingpath(common, ccprev, current);
13294*22dc650dSSadaf Ebrahimi }
13295*22dc650dSSadaf Ebrahimi else if (opcode != OP_COND && opcode != OP_SCOND)
13296*22dc650dSSadaf Ebrahimi {
13297*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13298*22dc650dSSadaf Ebrahimi {
13299*22dc650dSSadaf Ebrahimi if (private_data_ptr != 0)
13300*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13301*22dc650dSSadaf Ebrahimi else
13302*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13303*22dc650dSSadaf Ebrahimi }
13304*22dc650dSSadaf Ebrahimi else
13305*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13306*22dc650dSSadaf Ebrahimi }
13307*22dc650dSSadaf Ebrahimi
13308*22dc650dSSadaf Ebrahimi compile_matchingpath(common, ccprev, cc, current);
13309*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13310*22dc650dSSadaf Ebrahimi return;
13311*22dc650dSSadaf Ebrahimi
13312*22dc650dSSadaf Ebrahimi switch (opcode)
13313*22dc650dSSadaf Ebrahimi {
13314*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
13315*22dc650dSSadaf Ebrahimi if (has_vreverse)
13316*22dc650dSSadaf Ebrahimi {
13317*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13318*22dc650dSSadaf Ebrahimi add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13319*22dc650dSSadaf Ebrahimi }
13320*22dc650dSSadaf Ebrahimi
13321*22dc650dSSadaf Ebrahimi if (PRIVATE_DATA(ccbegin + 1))
13322*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13323*22dc650dSSadaf Ebrahimi break;
13324*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
13325*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13326*22dc650dSSadaf Ebrahimi break;
13327*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
13328*22dc650dSSadaf Ebrahimi match_script_run_common(common, private_data_ptr, current);
13329*22dc650dSSadaf Ebrahimi break;
13330*22dc650dSSadaf Ebrahimi }
13331*22dc650dSSadaf Ebrahimi }
13332*22dc650dSSadaf Ebrahimi
13333*22dc650dSSadaf Ebrahimi /* Instructions after the current alternative is successfully matched. */
13334*22dc650dSSadaf Ebrahimi /* There is a similar code in compile_bracket_matchingpath. */
13335*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
13336*22dc650dSSadaf Ebrahimi match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13337*22dc650dSSadaf Ebrahimi
13338*22dc650dSSadaf Ebrahimi stacksize = 0;
13339*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
13340*22dc650dSSadaf Ebrahimi {
13341*22dc650dSSadaf Ebrahimi /* We need to preserve the counter. TMP2 will be used below. */
13342*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13343*22dc650dSSadaf Ebrahimi stacksize++;
13344*22dc650dSSadaf Ebrahimi }
13345*22dc650dSSadaf Ebrahimi if (ket != OP_KET || bra != OP_BRA)
13346*22dc650dSSadaf Ebrahimi stacksize++;
13347*22dc650dSSadaf Ebrahimi if (offset != 0)
13348*22dc650dSSadaf Ebrahimi {
13349*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
13350*22dc650dSSadaf Ebrahimi stacksize++;
13351*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset >> 1] == 0)
13352*22dc650dSSadaf Ebrahimi stacksize += 2;
13353*22dc650dSSadaf Ebrahimi }
13354*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13355*22dc650dSSadaf Ebrahimi stacksize++;
13356*22dc650dSSadaf Ebrahimi
13357*22dc650dSSadaf Ebrahimi if (stacksize > 0)
13358*22dc650dSSadaf Ebrahimi allocate_stack(common, stacksize);
13359*22dc650dSSadaf Ebrahimi
13360*22dc650dSSadaf Ebrahimi stacksize = 0;
13361*22dc650dSSadaf Ebrahimi if (repeat_type == OP_MINUPTO)
13362*22dc650dSSadaf Ebrahimi {
13363*22dc650dSSadaf Ebrahimi /* TMP2 was set above. */
13364*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13365*22dc650dSSadaf Ebrahimi stacksize++;
13366*22dc650dSSadaf Ebrahimi }
13367*22dc650dSSadaf Ebrahimi
13368*22dc650dSSadaf Ebrahimi if (ket != OP_KET || bra != OP_BRA)
13369*22dc650dSSadaf Ebrahimi {
13370*22dc650dSSadaf Ebrahimi if (ket != OP_KET)
13371*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13372*22dc650dSSadaf Ebrahimi else
13373*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13374*22dc650dSSadaf Ebrahimi stacksize++;
13375*22dc650dSSadaf Ebrahimi }
13376*22dc650dSSadaf Ebrahimi
13377*22dc650dSSadaf Ebrahimi if (offset != 0)
13378*22dc650dSSadaf Ebrahimi stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13379*22dc650dSSadaf Ebrahimi
13380*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13381*22dc650dSSadaf Ebrahimi {
13382*22dc650dSSadaf Ebrahimi if (alt_max <= 3)
13383*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13384*22dc650dSSadaf Ebrahimi else
13385*22dc650dSSadaf Ebrahimi mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13386*22dc650dSSadaf Ebrahimi }
13387*22dc650dSSadaf Ebrahimi
13388*22dc650dSSadaf Ebrahimi if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13389*22dc650dSSadaf Ebrahimi {
13390*22dc650dSSadaf Ebrahimi /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13391*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13392*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13393*22dc650dSSadaf Ebrahimi }
13394*22dc650dSSadaf Ebrahimi
13395*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13396*22dc650dSSadaf Ebrahimi
13397*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13398*22dc650dSSadaf Ebrahimi {
13399*22dc650dSSadaf Ebrahimi if (alt_max <= 3)
13400*22dc650dSSadaf Ebrahimi {
13401*22dc650dSSadaf Ebrahimi JUMPHERE(next_alt);
13402*22dc650dSSadaf Ebrahimi alt_count++;
13403*22dc650dSSadaf Ebrahimi if (alt_count < alt_max)
13404*22dc650dSSadaf Ebrahimi {
13405*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13406*22dc650dSSadaf Ebrahimi next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13407*22dc650dSSadaf Ebrahimi }
13408*22dc650dSSadaf Ebrahimi }
13409*22dc650dSSadaf Ebrahimi else
13410*22dc650dSSadaf Ebrahimi {
13411*22dc650dSSadaf Ebrahimi sljit_set_label(mov_addr, LABEL());
13412*22dc650dSSadaf Ebrahimi sljit_emit_op0(compiler, SLJIT_ENDBR);
13413*22dc650dSSadaf Ebrahimi }
13414*22dc650dSSadaf Ebrahimi }
13415*22dc650dSSadaf Ebrahimi
13416*22dc650dSSadaf Ebrahimi COMPILE_BACKTRACKINGPATH(current->top);
13417*22dc650dSSadaf Ebrahimi if (current->own_backtracks)
13418*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13419*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!current->simple_backtracks);
13420*22dc650dSSadaf Ebrahimi }
13421*22dc650dSSadaf Ebrahimi while (*cc == OP_ALT);
13422*22dc650dSSadaf Ebrahimi
13423*22dc650dSSadaf Ebrahimi if (cond != NULL)
13424*22dc650dSSadaf Ebrahimi {
13425*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13426*22dc650dSSadaf Ebrahimi assert = CURRENT_AS(bracket_backtrack)->u.assert;
13427*22dc650dSSadaf Ebrahimi if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13428*22dc650dSSadaf Ebrahimi {
13429*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13430*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13431*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13432*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13433*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13434*22dc650dSSadaf Ebrahimi }
13435*22dc650dSSadaf Ebrahimi JUMPHERE(cond);
13436*22dc650dSSadaf Ebrahimi }
13437*22dc650dSSadaf Ebrahimi
13438*22dc650dSSadaf Ebrahimi /* Free the STR_PTR. */
13439*22dc650dSSadaf Ebrahimi if (private_data_ptr == 0)
13440*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13441*22dc650dSSadaf Ebrahimi }
13442*22dc650dSSadaf Ebrahimi
13443*22dc650dSSadaf Ebrahimi if (offset != 0)
13444*22dc650dSSadaf Ebrahimi {
13445*22dc650dSSadaf Ebrahimi /* Using both tmp register is better for instruction scheduling. */
13446*22dc650dSSadaf Ebrahimi if (common->optimized_cbracket[offset >> 1] != 0)
13447*22dc650dSSadaf Ebrahimi {
13448*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13449*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13450*22dc650dSSadaf Ebrahimi free_stack(common, 2);
13451*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13452*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13453*22dc650dSSadaf Ebrahimi }
13454*22dc650dSSadaf Ebrahimi else
13455*22dc650dSSadaf Ebrahimi {
13456*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13457*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13458*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13459*22dc650dSSadaf Ebrahimi }
13460*22dc650dSSadaf Ebrahimi }
13461*22dc650dSSadaf Ebrahimi else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13462*22dc650dSSadaf Ebrahimi {
13463*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13464*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13465*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13466*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13467*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13468*22dc650dSSadaf Ebrahimi free_stack(common, 4);
13469*22dc650dSSadaf Ebrahimi }
13470*22dc650dSSadaf Ebrahimi else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13471*22dc650dSSadaf Ebrahimi {
13472*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13473*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13474*22dc650dSSadaf Ebrahimi }
13475*22dc650dSSadaf Ebrahimi else if (opcode == OP_ONCE)
13476*22dc650dSSadaf Ebrahimi {
13477*22dc650dSSadaf Ebrahimi cc = ccbegin + GET(ccbegin, 1);
13478*22dc650dSSadaf Ebrahimi stacksize = needs_control_head ? 1 : 0;
13479*22dc650dSSadaf Ebrahimi
13480*22dc650dSSadaf Ebrahimi if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13481*22dc650dSSadaf Ebrahimi {
13482*22dc650dSSadaf Ebrahimi /* Reset head and drop saved frame. */
13483*22dc650dSSadaf Ebrahimi stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13484*22dc650dSSadaf Ebrahimi }
13485*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13486*22dc650dSSadaf Ebrahimi {
13487*22dc650dSSadaf Ebrahimi /* The STR_PTR must be released. */
13488*22dc650dSSadaf Ebrahimi stacksize++;
13489*22dc650dSSadaf Ebrahimi }
13490*22dc650dSSadaf Ebrahimi
13491*22dc650dSSadaf Ebrahimi if (stacksize > 0)
13492*22dc650dSSadaf Ebrahimi free_stack(common, stacksize);
13493*22dc650dSSadaf Ebrahimi
13494*22dc650dSSadaf Ebrahimi JUMPHERE(once);
13495*22dc650dSSadaf Ebrahimi /* Restore previous private_data_ptr */
13496*22dc650dSSadaf Ebrahimi if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13497*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13498*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMIN)
13499*22dc650dSSadaf Ebrahimi {
13500*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13501*22dc650dSSadaf Ebrahimi /* See the comment below. */
13502*22dc650dSSadaf Ebrahimi free_stack(common, 2);
13503*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13504*22dc650dSSadaf Ebrahimi }
13505*22dc650dSSadaf Ebrahimi }
13506*22dc650dSSadaf Ebrahimi
13507*22dc650dSSadaf Ebrahimi if (repeat_type == OP_EXACT)
13508*22dc650dSSadaf Ebrahimi {
13509*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13510*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13511*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13512*22dc650dSSadaf Ebrahimi }
13513*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMAX)
13514*22dc650dSSadaf Ebrahimi {
13515*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13516*22dc650dSSadaf Ebrahimi if (bra != OP_BRAZERO)
13517*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13518*22dc650dSSadaf Ebrahimi
13519*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13520*22dc650dSSadaf Ebrahimi if (bra == OP_BRAZERO)
13521*22dc650dSSadaf Ebrahimi {
13522*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13523*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13524*22dc650dSSadaf Ebrahimi JUMPHERE(brazero);
13525*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13526*22dc650dSSadaf Ebrahimi }
13527*22dc650dSSadaf Ebrahimi }
13528*22dc650dSSadaf Ebrahimi else if (ket == OP_KETRMIN)
13529*22dc650dSSadaf Ebrahimi {
13530*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13531*22dc650dSSadaf Ebrahimi
13532*22dc650dSSadaf Ebrahimi /* OP_ONCE removes everything in case of a backtrack, so we don't
13533*22dc650dSSadaf Ebrahimi need to explicitly release the STR_PTR. The extra release would
13534*22dc650dSSadaf Ebrahimi affect badly the free_stack(2) above. */
13535*22dc650dSSadaf Ebrahimi if (opcode != OP_ONCE)
13536*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13537*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13538*22dc650dSSadaf Ebrahimi if (opcode == OP_ONCE)
13539*22dc650dSSadaf Ebrahimi free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13540*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAMINZERO)
13541*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13542*22dc650dSSadaf Ebrahimi }
13543*22dc650dSSadaf Ebrahimi else if (bra == OP_BRAZERO)
13544*22dc650dSSadaf Ebrahimi {
13545*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13546*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13547*22dc650dSSadaf Ebrahimi JUMPHERE(brazero);
13548*22dc650dSSadaf Ebrahimi }
13549*22dc650dSSadaf Ebrahimi }
13550*22dc650dSSadaf Ebrahimi
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)13551*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13552*22dc650dSSadaf Ebrahimi {
13553*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13554*22dc650dSSadaf Ebrahimi int offset;
13555*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
13556*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc;
13557*22dc650dSSadaf Ebrahimi
13558*22dc650dSSadaf Ebrahimi /* No retry on backtrack, just drop everything. */
13559*22dc650dSSadaf Ebrahimi if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13560*22dc650dSSadaf Ebrahimi {
13561*22dc650dSSadaf Ebrahimi cc = current->cc;
13562*22dc650dSSadaf Ebrahimi
13563*22dc650dSSadaf Ebrahimi if (*cc == OP_BRAPOSZERO)
13564*22dc650dSSadaf Ebrahimi cc++;
13565*22dc650dSSadaf Ebrahimi
13566*22dc650dSSadaf Ebrahimi if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13567*22dc650dSSadaf Ebrahimi {
13568*22dc650dSSadaf Ebrahimi offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13569*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13570*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13571*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13572*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
13573*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13574*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13575*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
13576*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13577*22dc650dSSadaf Ebrahimi }
13578*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13579*22dc650dSSadaf Ebrahimi free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13580*22dc650dSSadaf Ebrahimi return;
13581*22dc650dSSadaf Ebrahimi }
13582*22dc650dSSadaf Ebrahimi
13583*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13584*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13585*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13586*22dc650dSSadaf Ebrahimi
13587*22dc650dSSadaf Ebrahimi if (current->own_backtracks)
13588*22dc650dSSadaf Ebrahimi {
13589*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
13590*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13591*22dc650dSSadaf Ebrahimi /* Drop the stack frame. */
13592*22dc650dSSadaf Ebrahimi free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13593*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
13594*22dc650dSSadaf Ebrahimi }
13595*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13596*22dc650dSSadaf Ebrahimi }
13597*22dc650dSSadaf Ebrahimi
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13598*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13599*22dc650dSSadaf Ebrahimi {
13600*22dc650dSSadaf Ebrahimi assert_backtrack backtrack;
13601*22dc650dSSadaf Ebrahimi
13602*22dc650dSSadaf Ebrahimi current->top = NULL;
13603*22dc650dSSadaf Ebrahimi current->own_backtracks = NULL;
13604*22dc650dSSadaf Ebrahimi current->simple_backtracks = NULL;
13605*22dc650dSSadaf Ebrahimi if (current->cc[1] > OP_ASSERTBACK_NOT)
13606*22dc650dSSadaf Ebrahimi {
13607*22dc650dSSadaf Ebrahimi /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13608*22dc650dSSadaf Ebrahimi compile_bracket_matchingpath(common, current->cc, current);
13609*22dc650dSSadaf Ebrahimi compile_bracket_backtrackingpath(common, current->top);
13610*22dc650dSSadaf Ebrahimi }
13611*22dc650dSSadaf Ebrahimi else
13612*22dc650dSSadaf Ebrahimi {
13613*22dc650dSSadaf Ebrahimi memset(&backtrack, 0, sizeof(backtrack));
13614*22dc650dSSadaf Ebrahimi backtrack.common.cc = current->cc;
13615*22dc650dSSadaf Ebrahimi backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13616*22dc650dSSadaf Ebrahimi /* Manual call of compile_assert_matchingpath. */
13617*22dc650dSSadaf Ebrahimi compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13618*22dc650dSSadaf Ebrahimi }
13619*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13620*22dc650dSSadaf Ebrahimi }
13621*22dc650dSSadaf Ebrahimi
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13622*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623*22dc650dSSadaf Ebrahimi {
13624*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13625*22dc650dSSadaf Ebrahimi PCRE2_UCHAR opcode = *current->cc;
13626*22dc650dSSadaf Ebrahimi struct sljit_label *loop;
13627*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
13628*22dc650dSSadaf Ebrahimi
13629*22dc650dSSadaf Ebrahimi if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13630*22dc650dSSadaf Ebrahimi {
13631*22dc650dSSadaf Ebrahimi if (common->then_trap != NULL)
13632*22dc650dSSadaf Ebrahimi {
13633*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0);
13634*22dc650dSSadaf Ebrahimi
13635*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13636*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13637*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13638*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
13639*22dc650dSSadaf Ebrahimi
13640*22dc650dSSadaf Ebrahimi loop = LABEL();
13641*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13642*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
13643*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13644*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13645*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13646*22dc650dSSadaf Ebrahimi return;
13647*22dc650dSSadaf Ebrahimi }
13648*22dc650dSSadaf Ebrahimi else if (!common->local_quit_available && common->in_positive_assertion)
13649*22dc650dSSadaf Ebrahimi {
13650*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13651*22dc650dSSadaf Ebrahimi return;
13652*22dc650dSSadaf Ebrahimi }
13653*22dc650dSSadaf Ebrahimi }
13654*22dc650dSSadaf Ebrahimi
13655*22dc650dSSadaf Ebrahimi if (common->local_quit_available)
13656*22dc650dSSadaf Ebrahimi {
13657*22dc650dSSadaf Ebrahimi /* Abort match with a fail. */
13658*22dc650dSSadaf Ebrahimi if (common->quit_label == NULL)
13659*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13660*22dc650dSSadaf Ebrahimi else
13661*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->quit_label);
13662*22dc650dSSadaf Ebrahimi return;
13663*22dc650dSSadaf Ebrahimi }
13664*22dc650dSSadaf Ebrahimi
13665*22dc650dSSadaf Ebrahimi if (opcode == OP_SKIP_ARG)
13666*22dc650dSSadaf Ebrahimi {
13667*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13668*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13669*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13670*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13671*22dc650dSSadaf Ebrahimi
13672*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13673*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13674*22dc650dSSadaf Ebrahimi return;
13675*22dc650dSSadaf Ebrahimi }
13676*22dc650dSSadaf Ebrahimi
13677*22dc650dSSadaf Ebrahimi if (opcode == OP_SKIP)
13678*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13679*22dc650dSSadaf Ebrahimi else
13680*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13681*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13682*22dc650dSSadaf Ebrahimi }
13683*22dc650dSSadaf Ebrahimi
compile_vreverse_backtrackingpath(compiler_common * common,struct backtrack_common * current)13684*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13685*22dc650dSSadaf Ebrahimi {
13686*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13687*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
13688*22dc650dSSadaf Ebrahimi struct sljit_label *label;
13689*22dc650dSSadaf Ebrahimi
13690*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13691*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13692*22dc650dSSadaf Ebrahimi skip_valid_char(common);
13693*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13694*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13695*22dc650dSSadaf Ebrahimi
13696*22dc650dSSadaf Ebrahimi label = LABEL();
13697*22dc650dSSadaf Ebrahimi sljit_set_label(jump, label);
13698*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, label);
13699*22dc650dSSadaf Ebrahimi }
13700*22dc650dSSadaf Ebrahimi
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13701*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13702*22dc650dSSadaf Ebrahimi {
13703*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13704*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
13705*22dc650dSSadaf Ebrahimi int size;
13706*22dc650dSSadaf Ebrahimi
13707*22dc650dSSadaf Ebrahimi if (CURRENT_AS(then_trap_backtrack)->then_trap)
13708*22dc650dSSadaf Ebrahimi {
13709*22dc650dSSadaf Ebrahimi common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13710*22dc650dSSadaf Ebrahimi return;
13711*22dc650dSSadaf Ebrahimi }
13712*22dc650dSSadaf Ebrahimi
13713*22dc650dSSadaf Ebrahimi size = CURRENT_AS(then_trap_backtrack)->framesize;
13714*22dc650dSSadaf Ebrahimi size = 3 + (size < 0 ? 0 : size);
13715*22dc650dSSadaf Ebrahimi
13716*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13717*22dc650dSSadaf Ebrahimi free_stack(common, size);
13718*22dc650dSSadaf Ebrahimi jump = JUMP(SLJIT_JUMP);
13719*22dc650dSSadaf Ebrahimi
13720*22dc650dSSadaf Ebrahimi set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13721*22dc650dSSadaf Ebrahimi /* STACK_TOP is set by THEN. */
13722*22dc650dSSadaf Ebrahimi if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13723*22dc650dSSadaf Ebrahimi {
13724*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13725*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13726*22dc650dSSadaf Ebrahimi }
13727*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13728*22dc650dSSadaf Ebrahimi free_stack(common, 3);
13729*22dc650dSSadaf Ebrahimi
13730*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
13731*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13732*22dc650dSSadaf Ebrahimi }
13733*22dc650dSSadaf Ebrahimi
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13734*22dc650dSSadaf Ebrahimi static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13735*22dc650dSSadaf Ebrahimi {
13736*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13737*22dc650dSSadaf Ebrahimi then_trap_backtrack *save_then_trap = common->then_trap;
13738*22dc650dSSadaf Ebrahimi
13739*22dc650dSSadaf Ebrahimi while (current)
13740*22dc650dSSadaf Ebrahimi {
13741*22dc650dSSadaf Ebrahimi if (current->simple_backtracks != NULL)
13742*22dc650dSSadaf Ebrahimi set_jumps(current->simple_backtracks, LABEL());
13743*22dc650dSSadaf Ebrahimi switch(*current->cc)
13744*22dc650dSSadaf Ebrahimi {
13745*22dc650dSSadaf Ebrahimi case OP_SET_SOM:
13746*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13747*22dc650dSSadaf Ebrahimi free_stack(common, 1);
13748*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13749*22dc650dSSadaf Ebrahimi break;
13750*22dc650dSSadaf Ebrahimi
13751*22dc650dSSadaf Ebrahimi case OP_STAR:
13752*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
13753*22dc650dSSadaf Ebrahimi case OP_PLUS:
13754*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
13755*22dc650dSSadaf Ebrahimi case OP_QUERY:
13756*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
13757*22dc650dSSadaf Ebrahimi case OP_UPTO:
13758*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
13759*22dc650dSSadaf Ebrahimi case OP_EXACT:
13760*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
13761*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
13762*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
13763*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
13764*22dc650dSSadaf Ebrahimi case OP_STARI:
13765*22dc650dSSadaf Ebrahimi case OP_MINSTARI:
13766*22dc650dSSadaf Ebrahimi case OP_PLUSI:
13767*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
13768*22dc650dSSadaf Ebrahimi case OP_QUERYI:
13769*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
13770*22dc650dSSadaf Ebrahimi case OP_UPTOI:
13771*22dc650dSSadaf Ebrahimi case OP_MINUPTOI:
13772*22dc650dSSadaf Ebrahimi case OP_EXACTI:
13773*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
13774*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
13775*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
13776*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
13777*22dc650dSSadaf Ebrahimi case OP_NOTSTAR:
13778*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR:
13779*22dc650dSSadaf Ebrahimi case OP_NOTPLUS:
13780*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS:
13781*22dc650dSSadaf Ebrahimi case OP_NOTQUERY:
13782*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY:
13783*22dc650dSSadaf Ebrahimi case OP_NOTUPTO:
13784*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO:
13785*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
13786*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
13787*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
13788*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
13789*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
13790*22dc650dSSadaf Ebrahimi case OP_NOTSTARI:
13791*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI:
13792*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
13793*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI:
13794*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI:
13795*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
13796*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI:
13797*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
13798*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
13799*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
13800*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
13801*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
13802*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
13803*22dc650dSSadaf Ebrahimi case OP_TYPESTAR:
13804*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR:
13805*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
13806*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS:
13807*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY:
13808*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
13809*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
13810*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
13811*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
13812*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
13813*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
13814*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
13815*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
13816*22dc650dSSadaf Ebrahimi case OP_CLASS:
13817*22dc650dSSadaf Ebrahimi case OP_NCLASS:
13818*22dc650dSSadaf Ebrahimi #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13819*22dc650dSSadaf Ebrahimi case OP_XCLASS:
13820*22dc650dSSadaf Ebrahimi #endif
13821*22dc650dSSadaf Ebrahimi compile_iterator_backtrackingpath(common, current);
13822*22dc650dSSadaf Ebrahimi break;
13823*22dc650dSSadaf Ebrahimi
13824*22dc650dSSadaf Ebrahimi case OP_REF:
13825*22dc650dSSadaf Ebrahimi case OP_REFI:
13826*22dc650dSSadaf Ebrahimi case OP_DNREF:
13827*22dc650dSSadaf Ebrahimi case OP_DNREFI:
13828*22dc650dSSadaf Ebrahimi compile_ref_iterator_backtrackingpath(common, current);
13829*22dc650dSSadaf Ebrahimi break;
13830*22dc650dSSadaf Ebrahimi
13831*22dc650dSSadaf Ebrahimi case OP_RECURSE:
13832*22dc650dSSadaf Ebrahimi compile_recurse_backtrackingpath(common, current);
13833*22dc650dSSadaf Ebrahimi break;
13834*22dc650dSSadaf Ebrahimi
13835*22dc650dSSadaf Ebrahimi case OP_ASSERT:
13836*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
13837*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
13838*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
13839*22dc650dSSadaf Ebrahimi compile_assert_backtrackingpath(common, current);
13840*22dc650dSSadaf Ebrahimi break;
13841*22dc650dSSadaf Ebrahimi
13842*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
13843*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
13844*22dc650dSSadaf Ebrahimi case OP_ONCE:
13845*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
13846*22dc650dSSadaf Ebrahimi case OP_BRA:
13847*22dc650dSSadaf Ebrahimi case OP_CBRA:
13848*22dc650dSSadaf Ebrahimi case OP_COND:
13849*22dc650dSSadaf Ebrahimi case OP_SBRA:
13850*22dc650dSSadaf Ebrahimi case OP_SCBRA:
13851*22dc650dSSadaf Ebrahimi case OP_SCOND:
13852*22dc650dSSadaf Ebrahimi compile_bracket_backtrackingpath(common, current);
13853*22dc650dSSadaf Ebrahimi break;
13854*22dc650dSSadaf Ebrahimi
13855*22dc650dSSadaf Ebrahimi case OP_BRAZERO:
13856*22dc650dSSadaf Ebrahimi if (current->cc[1] > OP_ASSERTBACK_NOT)
13857*22dc650dSSadaf Ebrahimi compile_bracket_backtrackingpath(common, current);
13858*22dc650dSSadaf Ebrahimi else
13859*22dc650dSSadaf Ebrahimi compile_assert_backtrackingpath(common, current);
13860*22dc650dSSadaf Ebrahimi break;
13861*22dc650dSSadaf Ebrahimi
13862*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
13863*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
13864*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
13865*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
13866*22dc650dSSadaf Ebrahimi case OP_BRAPOSZERO:
13867*22dc650dSSadaf Ebrahimi compile_bracketpos_backtrackingpath(common, current);
13868*22dc650dSSadaf Ebrahimi break;
13869*22dc650dSSadaf Ebrahimi
13870*22dc650dSSadaf Ebrahimi case OP_BRAMINZERO:
13871*22dc650dSSadaf Ebrahimi compile_braminzero_backtrackingpath(common, current);
13872*22dc650dSSadaf Ebrahimi break;
13873*22dc650dSSadaf Ebrahimi
13874*22dc650dSSadaf Ebrahimi case OP_MARK:
13875*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13876*22dc650dSSadaf Ebrahimi if (common->has_skip_arg)
13877*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13878*22dc650dSSadaf Ebrahimi free_stack(common, common->has_skip_arg ? 5 : 1);
13879*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13880*22dc650dSSadaf Ebrahimi if (common->has_skip_arg)
13881*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13882*22dc650dSSadaf Ebrahimi break;
13883*22dc650dSSadaf Ebrahimi
13884*22dc650dSSadaf Ebrahimi case OP_THEN:
13885*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
13886*22dc650dSSadaf Ebrahimi case OP_PRUNE:
13887*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
13888*22dc650dSSadaf Ebrahimi case OP_SKIP:
13889*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
13890*22dc650dSSadaf Ebrahimi compile_control_verb_backtrackingpath(common, current);
13891*22dc650dSSadaf Ebrahimi break;
13892*22dc650dSSadaf Ebrahimi
13893*22dc650dSSadaf Ebrahimi case OP_COMMIT:
13894*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
13895*22dc650dSSadaf Ebrahimi if (!common->local_quit_available)
13896*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13897*22dc650dSSadaf Ebrahimi if (common->quit_label == NULL)
13898*22dc650dSSadaf Ebrahimi add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13899*22dc650dSSadaf Ebrahimi else
13900*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->quit_label);
13901*22dc650dSSadaf Ebrahimi break;
13902*22dc650dSSadaf Ebrahimi
13903*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
13904*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
13905*22dc650dSSadaf Ebrahimi case OP_FAIL:
13906*22dc650dSSadaf Ebrahimi case OP_ACCEPT:
13907*22dc650dSSadaf Ebrahimi case OP_ASSERT_ACCEPT:
13908*22dc650dSSadaf Ebrahimi set_jumps(current->own_backtracks, LABEL());
13909*22dc650dSSadaf Ebrahimi break;
13910*22dc650dSSadaf Ebrahimi
13911*22dc650dSSadaf Ebrahimi case OP_VREVERSE:
13912*22dc650dSSadaf Ebrahimi compile_vreverse_backtrackingpath(common, current);
13913*22dc650dSSadaf Ebrahimi break;
13914*22dc650dSSadaf Ebrahimi
13915*22dc650dSSadaf Ebrahimi case OP_THEN_TRAP:
13916*22dc650dSSadaf Ebrahimi /* A virtual opcode for then traps. */
13917*22dc650dSSadaf Ebrahimi compile_then_trap_backtrackingpath(common, current);
13918*22dc650dSSadaf Ebrahimi break;
13919*22dc650dSSadaf Ebrahimi
13920*22dc650dSSadaf Ebrahimi default:
13921*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
13922*22dc650dSSadaf Ebrahimi break;
13923*22dc650dSSadaf Ebrahimi }
13924*22dc650dSSadaf Ebrahimi current = current->prev;
13925*22dc650dSSadaf Ebrahimi }
13926*22dc650dSSadaf Ebrahimi common->then_trap = save_then_trap;
13927*22dc650dSSadaf Ebrahimi }
13928*22dc650dSSadaf Ebrahimi
compile_recurse(compiler_common * common)13929*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void compile_recurse(compiler_common *common)
13930*22dc650dSSadaf Ebrahimi {
13931*22dc650dSSadaf Ebrahimi DEFINE_COMPILER;
13932*22dc650dSSadaf Ebrahimi PCRE2_SPTR cc = common->start + common->currententry->start;
13933*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13934*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13935*22dc650dSSadaf Ebrahimi uint32_t recurse_flags = 0;
13936*22dc650dSSadaf Ebrahimi int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13937*22dc650dSSadaf Ebrahimi int alt_count, alt_max, local_size;
13938*22dc650dSSadaf Ebrahimi backtrack_common altbacktrack;
13939*22dc650dSSadaf Ebrahimi jump_list *match = NULL;
13940*22dc650dSSadaf Ebrahimi struct sljit_jump *next_alt = NULL;
13941*22dc650dSSadaf Ebrahimi struct sljit_jump *accept_exit = NULL;
13942*22dc650dSSadaf Ebrahimi struct sljit_label *quit;
13943*22dc650dSSadaf Ebrahimi struct sljit_jump *mov_addr = NULL;
13944*22dc650dSSadaf Ebrahimi
13945*22dc650dSSadaf Ebrahimi /* Recurse captures then. */
13946*22dc650dSSadaf Ebrahimi common->then_trap = NULL;
13947*22dc650dSSadaf Ebrahimi
13948*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13949*22dc650dSSadaf Ebrahimi
13950*22dc650dSSadaf Ebrahimi alt_max = no_alternatives(cc);
13951*22dc650dSSadaf Ebrahimi alt_count = 0;
13952*22dc650dSSadaf Ebrahimi
13953*22dc650dSSadaf Ebrahimi /* Matching path. */
13954*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13955*22dc650dSSadaf Ebrahimi common->currententry->entry_label = LABEL();
13956*22dc650dSSadaf Ebrahimi set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13957*22dc650dSSadaf Ebrahimi
13958*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13959*22dc650dSSadaf Ebrahimi count_match(common);
13960*22dc650dSSadaf Ebrahimi
13961*22dc650dSSadaf Ebrahimi local_size = (alt_max > 1) ? 2 : 1;
13962*22dc650dSSadaf Ebrahimi
13963*22dc650dSSadaf Ebrahimi /* (Reversed) stack layout:
13964*22dc650dSSadaf Ebrahimi [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13965*22dc650dSSadaf Ebrahimi
13966*22dc650dSSadaf Ebrahimi allocate_stack(common, private_data_size + local_size);
13967*22dc650dSSadaf Ebrahimi /* Save return address. */
13968*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13969*22dc650dSSadaf Ebrahimi
13970*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13971*22dc650dSSadaf Ebrahimi
13972*22dc650dSSadaf Ebrahimi /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13973*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13974*22dc650dSSadaf Ebrahimi
13975*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_control_head_found)
13976*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13977*22dc650dSSadaf Ebrahimi
13978*22dc650dSSadaf Ebrahimi if (alt_max > 1)
13979*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13980*22dc650dSSadaf Ebrahimi
13981*22dc650dSSadaf Ebrahimi memset(&altbacktrack, 0, sizeof(backtrack_common));
13982*22dc650dSSadaf Ebrahimi common->quit_label = NULL;
13983*22dc650dSSadaf Ebrahimi common->accept_label = NULL;
13984*22dc650dSSadaf Ebrahimi common->quit = NULL;
13985*22dc650dSSadaf Ebrahimi common->accept = NULL;
13986*22dc650dSSadaf Ebrahimi altbacktrack.cc = ccbegin;
13987*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
13988*22dc650dSSadaf Ebrahimi while (1)
13989*22dc650dSSadaf Ebrahimi {
13990*22dc650dSSadaf Ebrahimi altbacktrack.top = NULL;
13991*22dc650dSSadaf Ebrahimi altbacktrack.own_backtracks = NULL;
13992*22dc650dSSadaf Ebrahimi
13993*22dc650dSSadaf Ebrahimi if (altbacktrack.cc != ccbegin)
13994*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13995*22dc650dSSadaf Ebrahimi
13996*22dc650dSSadaf Ebrahimi compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13997*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13998*22dc650dSSadaf Ebrahimi return;
13999*22dc650dSSadaf Ebrahimi
14000*22dc650dSSadaf Ebrahimi allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
14001*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14002*22dc650dSSadaf Ebrahimi
14003*22dc650dSSadaf Ebrahimi if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
14004*22dc650dSSadaf Ebrahimi {
14005*22dc650dSSadaf Ebrahimi if (alt_max > 3)
14006*22dc650dSSadaf Ebrahimi mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
14007*22dc650dSSadaf Ebrahimi else
14008*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
14009*22dc650dSSadaf Ebrahimi }
14010*22dc650dSSadaf Ebrahimi
14011*22dc650dSSadaf Ebrahimi add_jump(compiler, &match, JUMP(SLJIT_JUMP));
14012*22dc650dSSadaf Ebrahimi
14013*22dc650dSSadaf Ebrahimi if (alt_count == 0)
14014*22dc650dSSadaf Ebrahimi {
14015*22dc650dSSadaf Ebrahimi /* Backtracking path entry. */
14016*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
14017*22dc650dSSadaf Ebrahimi common->currententry->backtrack_label = LABEL();
14018*22dc650dSSadaf Ebrahimi set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
14019*22dc650dSSadaf Ebrahimi
14020*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
14021*22dc650dSSadaf Ebrahimi
14022*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_accept_found)
14023*22dc650dSSadaf Ebrahimi accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14024*22dc650dSSadaf Ebrahimi
14025*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
14026*22dc650dSSadaf Ebrahimi /* Save return address. */
14027*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
14028*22dc650dSSadaf Ebrahimi
14029*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14030*22dc650dSSadaf Ebrahimi
14031*22dc650dSSadaf Ebrahimi if (alt_max > 1)
14032*22dc650dSSadaf Ebrahimi {
14033*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
14034*22dc650dSSadaf Ebrahimi free_stack(common, 2);
14035*22dc650dSSadaf Ebrahimi
14036*22dc650dSSadaf Ebrahimi if (alt_max > 3)
14037*22dc650dSSadaf Ebrahimi {
14038*22dc650dSSadaf Ebrahimi sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
14039*22dc650dSSadaf Ebrahimi sljit_set_label(mov_addr, LABEL());
14040*22dc650dSSadaf Ebrahimi sljit_emit_op0(compiler, SLJIT_ENDBR);
14041*22dc650dSSadaf Ebrahimi }
14042*22dc650dSSadaf Ebrahimi else
14043*22dc650dSSadaf Ebrahimi next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
14044*22dc650dSSadaf Ebrahimi }
14045*22dc650dSSadaf Ebrahimi else
14046*22dc650dSSadaf Ebrahimi free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
14047*22dc650dSSadaf Ebrahimi }
14048*22dc650dSSadaf Ebrahimi else if (alt_max > 3)
14049*22dc650dSSadaf Ebrahimi {
14050*22dc650dSSadaf Ebrahimi sljit_set_label(mov_addr, LABEL());
14051*22dc650dSSadaf Ebrahimi sljit_emit_op0(compiler, SLJIT_ENDBR);
14052*22dc650dSSadaf Ebrahimi }
14053*22dc650dSSadaf Ebrahimi else
14054*22dc650dSSadaf Ebrahimi {
14055*22dc650dSSadaf Ebrahimi JUMPHERE(next_alt);
14056*22dc650dSSadaf Ebrahimi if (alt_count + 1 < alt_max)
14057*22dc650dSSadaf Ebrahimi {
14058*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
14059*22dc650dSSadaf Ebrahimi next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
14060*22dc650dSSadaf Ebrahimi }
14061*22dc650dSSadaf Ebrahimi }
14062*22dc650dSSadaf Ebrahimi
14063*22dc650dSSadaf Ebrahimi alt_count++;
14064*22dc650dSSadaf Ebrahimi
14065*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, altbacktrack.top);
14066*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14067*22dc650dSSadaf Ebrahimi return;
14068*22dc650dSSadaf Ebrahimi set_jumps(altbacktrack.own_backtracks, LABEL());
14069*22dc650dSSadaf Ebrahimi
14070*22dc650dSSadaf Ebrahimi if (*cc != OP_ALT)
14071*22dc650dSSadaf Ebrahimi break;
14072*22dc650dSSadaf Ebrahimi
14073*22dc650dSSadaf Ebrahimi altbacktrack.cc = cc + 1 + LINK_SIZE;
14074*22dc650dSSadaf Ebrahimi cc += GET(cc, 1);
14075*22dc650dSSadaf Ebrahimi }
14076*22dc650dSSadaf Ebrahimi
14077*22dc650dSSadaf Ebrahimi /* No alternative is matched. */
14078*22dc650dSSadaf Ebrahimi
14079*22dc650dSSadaf Ebrahimi quit = LABEL();
14080*22dc650dSSadaf Ebrahimi
14081*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14082*22dc650dSSadaf Ebrahimi
14083*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14084*22dc650dSSadaf Ebrahimi free_stack(common, private_data_size + local_size);
14085*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14086*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14087*22dc650dSSadaf Ebrahimi
14088*22dc650dSSadaf Ebrahimi if (common->quit != NULL)
14089*22dc650dSSadaf Ebrahimi {
14090*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14091*22dc650dSSadaf Ebrahimi
14092*22dc650dSSadaf Ebrahimi set_jumps(common->quit, LABEL());
14093*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14094*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14095*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, quit);
14096*22dc650dSSadaf Ebrahimi }
14097*22dc650dSSadaf Ebrahimi
14098*22dc650dSSadaf Ebrahimi if (recurse_flags & recurse_flag_accept_found)
14099*22dc650dSSadaf Ebrahimi {
14100*22dc650dSSadaf Ebrahimi JUMPHERE(accept_exit);
14101*22dc650dSSadaf Ebrahimi free_stack(common, 2);
14102*22dc650dSSadaf Ebrahimi
14103*22dc650dSSadaf Ebrahimi /* Save return address. */
14104*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14105*22dc650dSSadaf Ebrahimi
14106*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14107*22dc650dSSadaf Ebrahimi
14108*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14109*22dc650dSSadaf Ebrahimi free_stack(common, private_data_size + local_size);
14110*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14111*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14112*22dc650dSSadaf Ebrahimi }
14113*22dc650dSSadaf Ebrahimi
14114*22dc650dSSadaf Ebrahimi if (common->accept != NULL)
14115*22dc650dSSadaf Ebrahimi {
14116*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14117*22dc650dSSadaf Ebrahimi
14118*22dc650dSSadaf Ebrahimi set_jumps(common->accept, LABEL());
14119*22dc650dSSadaf Ebrahimi
14120*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14121*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14122*22dc650dSSadaf Ebrahimi
14123*22dc650dSSadaf Ebrahimi allocate_stack(common, 2);
14124*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14125*22dc650dSSadaf Ebrahimi }
14126*22dc650dSSadaf Ebrahimi
14127*22dc650dSSadaf Ebrahimi set_jumps(match, LABEL());
14128*22dc650dSSadaf Ebrahimi
14129*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14130*22dc650dSSadaf Ebrahimi
14131*22dc650dSSadaf Ebrahimi copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14132*22dc650dSSadaf Ebrahimi
14133*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14134*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14135*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14136*22dc650dSSadaf Ebrahimi }
14137*22dc650dSSadaf Ebrahimi
14138*22dc650dSSadaf Ebrahimi #undef COMPILE_BACKTRACKINGPATH
14139*22dc650dSSadaf Ebrahimi #undef CURRENT_AS
14140*22dc650dSSadaf Ebrahimi
14141*22dc650dSSadaf Ebrahimi #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14142*22dc650dSSadaf Ebrahimi (PCRE2_JIT_INVALID_UTF)
14143*22dc650dSSadaf Ebrahimi
jit_compile(pcre2_code * code,sljit_u32 mode)14144*22dc650dSSadaf Ebrahimi static int jit_compile(pcre2_code *code, sljit_u32 mode)
14145*22dc650dSSadaf Ebrahimi {
14146*22dc650dSSadaf Ebrahimi pcre2_real_code *re = (pcre2_real_code *)code;
14147*22dc650dSSadaf Ebrahimi struct sljit_compiler *compiler;
14148*22dc650dSSadaf Ebrahimi backtrack_common rootbacktrack;
14149*22dc650dSSadaf Ebrahimi compiler_common common_data;
14150*22dc650dSSadaf Ebrahimi compiler_common *common = &common_data;
14151*22dc650dSSadaf Ebrahimi const sljit_u8 *tables = re->tables;
14152*22dc650dSSadaf Ebrahimi void *allocator_data = &re->memctl;
14153*22dc650dSSadaf Ebrahimi int private_data_size;
14154*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccend;
14155*22dc650dSSadaf Ebrahimi executable_functions *functions;
14156*22dc650dSSadaf Ebrahimi void *executable_func;
14157*22dc650dSSadaf Ebrahimi sljit_uw executable_size;
14158*22dc650dSSadaf Ebrahimi sljit_uw total_length;
14159*22dc650dSSadaf Ebrahimi struct sljit_label *mainloop_label = NULL;
14160*22dc650dSSadaf Ebrahimi struct sljit_label *continue_match_label;
14161*22dc650dSSadaf Ebrahimi struct sljit_label *empty_match_found_label = NULL;
14162*22dc650dSSadaf Ebrahimi struct sljit_label *empty_match_backtrack_label = NULL;
14163*22dc650dSSadaf Ebrahimi struct sljit_label *reset_match_label;
14164*22dc650dSSadaf Ebrahimi struct sljit_label *quit_label;
14165*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
14166*22dc650dSSadaf Ebrahimi struct sljit_jump *minlength_check_failed = NULL;
14167*22dc650dSSadaf Ebrahimi struct sljit_jump *empty_match = NULL;
14168*22dc650dSSadaf Ebrahimi struct sljit_jump *end_anchor_failed = NULL;
14169*22dc650dSSadaf Ebrahimi jump_list *reqcu_not_found = NULL;
14170*22dc650dSSadaf Ebrahimi
14171*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(tables);
14172*22dc650dSSadaf Ebrahimi
14173*22dc650dSSadaf Ebrahimi #if HAS_VIRTUAL_REGISTERS == 1
14174*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14175*22dc650dSSadaf Ebrahimi #elif HAS_VIRTUAL_REGISTERS == 0
14176*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14177*22dc650dSSadaf Ebrahimi #else
14178*22dc650dSSadaf Ebrahimi #error "Invalid value for HAS_VIRTUAL_REGISTERS"
14179*22dc650dSSadaf Ebrahimi #endif
14180*22dc650dSSadaf Ebrahimi
14181*22dc650dSSadaf Ebrahimi memset(&rootbacktrack, 0, sizeof(backtrack_common));
14182*22dc650dSSadaf Ebrahimi memset(common, 0, sizeof(compiler_common));
14183*22dc650dSSadaf Ebrahimi common->re = re;
14184*22dc650dSSadaf Ebrahimi common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14185*22dc650dSSadaf Ebrahimi rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14186*22dc650dSSadaf Ebrahimi
14187*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
14188*22dc650dSSadaf Ebrahimi common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14189*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
14190*22dc650dSSadaf Ebrahimi mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14191*22dc650dSSadaf Ebrahimi
14192*22dc650dSSadaf Ebrahimi common->start = rootbacktrack.cc;
14193*22dc650dSSadaf Ebrahimi common->read_only_data_head = NULL;
14194*22dc650dSSadaf Ebrahimi common->fcc = tables + fcc_offset;
14195*22dc650dSSadaf Ebrahimi common->lcc = (sljit_sw)(tables + lcc_offset);
14196*22dc650dSSadaf Ebrahimi common->mode = mode;
14197*22dc650dSSadaf Ebrahimi common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14198*22dc650dSSadaf Ebrahimi common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14199*22dc650dSSadaf Ebrahimi common->nltype = NLTYPE_FIXED;
14200*22dc650dSSadaf Ebrahimi switch(re->newline_convention)
14201*22dc650dSSadaf Ebrahimi {
14202*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14203*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14204*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14205*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14206*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14207*22dc650dSSadaf Ebrahimi case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14208*22dc650dSSadaf Ebrahimi default: return PCRE2_ERROR_INTERNAL;
14209*22dc650dSSadaf Ebrahimi }
14210*22dc650dSSadaf Ebrahimi common->nlmax = READ_CHAR_MAX;
14211*22dc650dSSadaf Ebrahimi common->nlmin = 0;
14212*22dc650dSSadaf Ebrahimi if (re->bsr_convention == PCRE2_BSR_UNICODE)
14213*22dc650dSSadaf Ebrahimi common->bsr_nltype = NLTYPE_ANY;
14214*22dc650dSSadaf Ebrahimi else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14215*22dc650dSSadaf Ebrahimi common->bsr_nltype = NLTYPE_ANYCRLF;
14216*22dc650dSSadaf Ebrahimi else
14217*22dc650dSSadaf Ebrahimi {
14218*22dc650dSSadaf Ebrahimi #ifdef BSR_ANYCRLF
14219*22dc650dSSadaf Ebrahimi common->bsr_nltype = NLTYPE_ANYCRLF;
14220*22dc650dSSadaf Ebrahimi #else
14221*22dc650dSSadaf Ebrahimi common->bsr_nltype = NLTYPE_ANY;
14222*22dc650dSSadaf Ebrahimi #endif
14223*22dc650dSSadaf Ebrahimi }
14224*22dc650dSSadaf Ebrahimi common->bsr_nlmax = READ_CHAR_MAX;
14225*22dc650dSSadaf Ebrahimi common->bsr_nlmin = 0;
14226*22dc650dSSadaf Ebrahimi common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14227*22dc650dSSadaf Ebrahimi common->ctypes = (sljit_sw)(tables + ctypes_offset);
14228*22dc650dSSadaf Ebrahimi common->name_count = re->name_count;
14229*22dc650dSSadaf Ebrahimi common->name_entry_size = re->name_entry_size;
14230*22dc650dSSadaf Ebrahimi common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14231*22dc650dSSadaf Ebrahimi common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14232*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
14233*22dc650dSSadaf Ebrahimi /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14234*22dc650dSSadaf Ebrahimi common->utf = (re->overall_options & PCRE2_UTF) != 0;
14235*22dc650dSSadaf Ebrahimi common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14236*22dc650dSSadaf Ebrahimi if (common->utf)
14237*22dc650dSSadaf Ebrahimi {
14238*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_ANY)
14239*22dc650dSSadaf Ebrahimi common->nlmax = 0x2029;
14240*22dc650dSSadaf Ebrahimi else if (common->nltype == NLTYPE_ANYCRLF)
14241*22dc650dSSadaf Ebrahimi common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14242*22dc650dSSadaf Ebrahimi else
14243*22dc650dSSadaf Ebrahimi {
14244*22dc650dSSadaf Ebrahimi /* We only care about the first newline character. */
14245*22dc650dSSadaf Ebrahimi common->nlmax = common->newline & 0xff;
14246*22dc650dSSadaf Ebrahimi }
14247*22dc650dSSadaf Ebrahimi
14248*22dc650dSSadaf Ebrahimi if (common->nltype == NLTYPE_FIXED)
14249*22dc650dSSadaf Ebrahimi common->nlmin = common->newline & 0xff;
14250*22dc650dSSadaf Ebrahimi else
14251*22dc650dSSadaf Ebrahimi common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14252*22dc650dSSadaf Ebrahimi
14253*22dc650dSSadaf Ebrahimi if (common->bsr_nltype == NLTYPE_ANY)
14254*22dc650dSSadaf Ebrahimi common->bsr_nlmax = 0x2029;
14255*22dc650dSSadaf Ebrahimi else
14256*22dc650dSSadaf Ebrahimi common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14257*22dc650dSSadaf Ebrahimi common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14258*22dc650dSSadaf Ebrahimi }
14259*22dc650dSSadaf Ebrahimi else
14260*22dc650dSSadaf Ebrahimi common->invalid_utf = FALSE;
14261*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
14262*22dc650dSSadaf Ebrahimi ccend = bracketend(common->start);
14263*22dc650dSSadaf Ebrahimi
14264*22dc650dSSadaf Ebrahimi /* Calculate the local space size on the stack. */
14265*22dc650dSSadaf Ebrahimi common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14266*22dc650dSSadaf Ebrahimi common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14267*22dc650dSSadaf Ebrahimi if (!common->optimized_cbracket)
14268*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14269*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14270*22dc650dSSadaf Ebrahimi memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14271*22dc650dSSadaf Ebrahimi #else
14272*22dc650dSSadaf Ebrahimi memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14273*22dc650dSSadaf Ebrahimi #endif
14274*22dc650dSSadaf Ebrahimi
14275*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14276*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14277*22dc650dSSadaf Ebrahimi common->capture_last_ptr = common->ovector_start;
14278*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14279*22dc650dSSadaf Ebrahimi #endif
14280*22dc650dSSadaf Ebrahimi if (!check_opcode_types(common, common->start, ccend))
14281*22dc650dSSadaf Ebrahimi {
14282*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14283*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14284*22dc650dSSadaf Ebrahimi }
14285*22dc650dSSadaf Ebrahimi
14286*22dc650dSSadaf Ebrahimi /* Checking flags and updating ovector_start. */
14287*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14288*22dc650dSSadaf Ebrahimi {
14289*22dc650dSSadaf Ebrahimi common->req_char_ptr = common->ovector_start;
14290*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14291*22dc650dSSadaf Ebrahimi }
14292*22dc650dSSadaf Ebrahimi if (mode != PCRE2_JIT_COMPLETE)
14293*22dc650dSSadaf Ebrahimi {
14294*22dc650dSSadaf Ebrahimi common->start_used_ptr = common->ovector_start;
14295*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14296*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_PARTIAL_SOFT)
14297*22dc650dSSadaf Ebrahimi {
14298*22dc650dSSadaf Ebrahimi common->hit_start = common->ovector_start;
14299*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14300*22dc650dSSadaf Ebrahimi }
14301*22dc650dSSadaf Ebrahimi }
14302*22dc650dSSadaf Ebrahimi if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14303*22dc650dSSadaf Ebrahimi {
14304*22dc650dSSadaf Ebrahimi common->match_end_ptr = common->ovector_start;
14305*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14306*22dc650dSSadaf Ebrahimi }
14307*22dc650dSSadaf Ebrahimi #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14308*22dc650dSSadaf Ebrahimi common->control_head_ptr = 1;
14309*22dc650dSSadaf Ebrahimi #endif
14310*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
14311*22dc650dSSadaf Ebrahimi {
14312*22dc650dSSadaf Ebrahimi common->control_head_ptr = common->ovector_start;
14313*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14314*22dc650dSSadaf Ebrahimi }
14315*22dc650dSSadaf Ebrahimi if (common->has_set_som)
14316*22dc650dSSadaf Ebrahimi {
14317*22dc650dSSadaf Ebrahimi /* Saving the real start pointer is necessary. */
14318*22dc650dSSadaf Ebrahimi common->start_ptr = common->ovector_start;
14319*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14320*22dc650dSSadaf Ebrahimi }
14321*22dc650dSSadaf Ebrahimi
14322*22dc650dSSadaf Ebrahimi /* Aligning ovector to even number of sljit words. */
14323*22dc650dSSadaf Ebrahimi if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14324*22dc650dSSadaf Ebrahimi common->ovector_start += sizeof(sljit_sw);
14325*22dc650dSSadaf Ebrahimi
14326*22dc650dSSadaf Ebrahimi if (common->start_ptr == 0)
14327*22dc650dSSadaf Ebrahimi common->start_ptr = OVECTOR(0);
14328*22dc650dSSadaf Ebrahimi
14329*22dc650dSSadaf Ebrahimi /* Capturing brackets cannot be optimized if callouts are allowed. */
14330*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
14331*22dc650dSSadaf Ebrahimi memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14332*22dc650dSSadaf Ebrahimi
14333*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14334*22dc650dSSadaf Ebrahimi common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14335*22dc650dSSadaf Ebrahimi
14336*22dc650dSSadaf Ebrahimi total_length = ccend - common->start;
14337*22dc650dSSadaf Ebrahimi common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14338*22dc650dSSadaf Ebrahimi if (!common->private_data_ptrs)
14339*22dc650dSSadaf Ebrahimi {
14340*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14341*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14342*22dc650dSSadaf Ebrahimi }
14343*22dc650dSSadaf Ebrahimi memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14344*22dc650dSSadaf Ebrahimi
14345*22dc650dSSadaf Ebrahimi private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14346*22dc650dSSadaf Ebrahimi
14347*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14348*22dc650dSSadaf Ebrahimi detect_early_fail(common, common->start, &private_data_size, 0, 0);
14349*22dc650dSSadaf Ebrahimi
14350*22dc650dSSadaf Ebrahimi set_private_data_ptrs(common, &private_data_size, ccend);
14351*22dc650dSSadaf Ebrahimi
14352*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14353*22dc650dSSadaf Ebrahimi
14354*22dc650dSSadaf Ebrahimi if (private_data_size > 65536)
14355*22dc650dSSadaf Ebrahimi {
14356*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14357*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14358*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14359*22dc650dSSadaf Ebrahimi }
14360*22dc650dSSadaf Ebrahimi
14361*22dc650dSSadaf Ebrahimi if (common->has_then)
14362*22dc650dSSadaf Ebrahimi {
14363*22dc650dSSadaf Ebrahimi common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14364*22dc650dSSadaf Ebrahimi memset(common->then_offsets, 0, total_length);
14365*22dc650dSSadaf Ebrahimi set_then_offsets(common, common->start, NULL);
14366*22dc650dSSadaf Ebrahimi }
14367*22dc650dSSadaf Ebrahimi
14368*22dc650dSSadaf Ebrahimi compiler = sljit_create_compiler(allocator_data);
14369*22dc650dSSadaf Ebrahimi if (!compiler)
14370*22dc650dSSadaf Ebrahimi {
14371*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14372*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14373*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14374*22dc650dSSadaf Ebrahimi }
14375*22dc650dSSadaf Ebrahimi common->compiler = compiler;
14376*22dc650dSSadaf Ebrahimi
14377*22dc650dSSadaf Ebrahimi /* Main pcre2_jit_exec entry. */
14378*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14379*22dc650dSSadaf Ebrahimi sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14380*22dc650dSSadaf Ebrahimi
14381*22dc650dSSadaf Ebrahimi /* Register init. */
14382*22dc650dSSadaf Ebrahimi reset_ovector(common, (re->top_bracket + 1) * 2);
14383*22dc650dSSadaf Ebrahimi if (common->req_char_ptr != 0)
14384*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14385*22dc650dSSadaf Ebrahimi
14386*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14387*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14388*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14389*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14390*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14391*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14392*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14393*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14394*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14395*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14396*22dc650dSSadaf Ebrahimi
14397*22dc650dSSadaf Ebrahimi if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14398*22dc650dSSadaf Ebrahimi reset_early_fail(common);
14399*22dc650dSSadaf Ebrahimi
14400*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_PARTIAL_SOFT)
14401*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14402*22dc650dSSadaf Ebrahimi if (common->mark_ptr != 0)
14403*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14404*22dc650dSSadaf Ebrahimi if (common->control_head_ptr != 0)
14405*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14406*22dc650dSSadaf Ebrahimi
14407*22dc650dSSadaf Ebrahimi /* Main part of the matching */
14408*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ANCHORED) == 0)
14409*22dc650dSSadaf Ebrahimi {
14410*22dc650dSSadaf Ebrahimi mainloop_label = mainloop_entry(common);
14411*22dc650dSSadaf Ebrahimi continue_match_label = LABEL();
14412*22dc650dSSadaf Ebrahimi /* Forward search if possible. */
14413*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14414*22dc650dSSadaf Ebrahimi {
14415*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14416*22dc650dSSadaf Ebrahimi ;
14417*22dc650dSSadaf Ebrahimi else if ((re->flags & PCRE2_FIRSTSET) != 0)
14418*22dc650dSSadaf Ebrahimi fast_forward_first_char(common);
14419*22dc650dSSadaf Ebrahimi else if ((re->flags & PCRE2_STARTLINE) != 0)
14420*22dc650dSSadaf Ebrahimi fast_forward_newline(common);
14421*22dc650dSSadaf Ebrahimi else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14422*22dc650dSSadaf Ebrahimi fast_forward_start_bits(common);
14423*22dc650dSSadaf Ebrahimi }
14424*22dc650dSSadaf Ebrahimi }
14425*22dc650dSSadaf Ebrahimi else
14426*22dc650dSSadaf Ebrahimi continue_match_label = LABEL();
14427*22dc650dSSadaf Ebrahimi
14428*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14429*22dc650dSSadaf Ebrahimi {
14430*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14431*22dc650dSSadaf Ebrahimi OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14432*22dc650dSSadaf Ebrahimi minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14433*22dc650dSSadaf Ebrahimi }
14434*22dc650dSSadaf Ebrahimi if (common->req_char_ptr != 0)
14435*22dc650dSSadaf Ebrahimi reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14436*22dc650dSSadaf Ebrahimi
14437*22dc650dSSadaf Ebrahimi /* Store the current STR_PTR in OVECTOR(0). */
14438*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14439*22dc650dSSadaf Ebrahimi /* Copy the limit of allowed recursions. */
14440*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14441*22dc650dSSadaf Ebrahimi if (common->capture_last_ptr != 0)
14442*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14443*22dc650dSSadaf Ebrahimi if (common->fast_forward_bc_ptr != NULL)
14444*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14445*22dc650dSSadaf Ebrahimi
14446*22dc650dSSadaf Ebrahimi if (common->start_ptr != OVECTOR(0))
14447*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14448*22dc650dSSadaf Ebrahimi
14449*22dc650dSSadaf Ebrahimi /* Copy the beginning of the string. */
14450*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_PARTIAL_SOFT)
14451*22dc650dSSadaf Ebrahimi {
14452*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14453*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14454*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
14455*22dc650dSSadaf Ebrahimi }
14456*22dc650dSSadaf Ebrahimi else if (mode == PCRE2_JIT_PARTIAL_HARD)
14457*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14458*22dc650dSSadaf Ebrahimi
14459*22dc650dSSadaf Ebrahimi compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14460*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14461*22dc650dSSadaf Ebrahimi {
14462*22dc650dSSadaf Ebrahimi sljit_free_compiler(compiler);
14463*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14464*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14465*22dc650dSSadaf Ebrahimi PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14466*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14467*22dc650dSSadaf Ebrahimi }
14468*22dc650dSSadaf Ebrahimi
14469*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14470*22dc650dSSadaf Ebrahimi end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14471*22dc650dSSadaf Ebrahimi
14472*22dc650dSSadaf Ebrahimi if (common->might_be_empty)
14473*22dc650dSSadaf Ebrahimi {
14474*22dc650dSSadaf Ebrahimi empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14475*22dc650dSSadaf Ebrahimi empty_match_found_label = LABEL();
14476*22dc650dSSadaf Ebrahimi }
14477*22dc650dSSadaf Ebrahimi
14478*22dc650dSSadaf Ebrahimi common->accept_label = LABEL();
14479*22dc650dSSadaf Ebrahimi if (common->accept != NULL)
14480*22dc650dSSadaf Ebrahimi set_jumps(common->accept, common->accept_label);
14481*22dc650dSSadaf Ebrahimi
14482*22dc650dSSadaf Ebrahimi /* This means we have a match. Update the ovector. */
14483*22dc650dSSadaf Ebrahimi copy_ovector(common, re->top_bracket + 1);
14484*22dc650dSSadaf Ebrahimi common->quit_label = common->abort_label = LABEL();
14485*22dc650dSSadaf Ebrahimi if (common->quit != NULL)
14486*22dc650dSSadaf Ebrahimi set_jumps(common->quit, common->quit_label);
14487*22dc650dSSadaf Ebrahimi if (common->abort != NULL)
14488*22dc650dSSadaf Ebrahimi set_jumps(common->abort, common->abort_label);
14489*22dc650dSSadaf Ebrahimi if (minlength_check_failed != NULL)
14490*22dc650dSSadaf Ebrahimi SET_LABEL(minlength_check_failed, common->abort_label);
14491*22dc650dSSadaf Ebrahimi
14492*22dc650dSSadaf Ebrahimi sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14493*22dc650dSSadaf Ebrahimi sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14494*22dc650dSSadaf Ebrahimi
14495*22dc650dSSadaf Ebrahimi if (common->failed_match != NULL)
14496*22dc650dSSadaf Ebrahimi {
14497*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14498*22dc650dSSadaf Ebrahimi set_jumps(common->failed_match, LABEL());
14499*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14500*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->abort_label);
14501*22dc650dSSadaf Ebrahimi }
14502*22dc650dSSadaf Ebrahimi
14503*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14504*22dc650dSSadaf Ebrahimi JUMPHERE(end_anchor_failed);
14505*22dc650dSSadaf Ebrahimi
14506*22dc650dSSadaf Ebrahimi if (mode != PCRE2_JIT_COMPLETE)
14507*22dc650dSSadaf Ebrahimi {
14508*22dc650dSSadaf Ebrahimi common->partialmatchlabel = LABEL();
14509*22dc650dSSadaf Ebrahimi set_jumps(common->partialmatch, common->partialmatchlabel);
14510*22dc650dSSadaf Ebrahimi return_with_partial_match(common, common->quit_label);
14511*22dc650dSSadaf Ebrahimi }
14512*22dc650dSSadaf Ebrahimi
14513*22dc650dSSadaf Ebrahimi if (common->might_be_empty)
14514*22dc650dSSadaf Ebrahimi empty_match_backtrack_label = LABEL();
14515*22dc650dSSadaf Ebrahimi compile_backtrackingpath(common, rootbacktrack.top);
14516*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14517*22dc650dSSadaf Ebrahimi {
14518*22dc650dSSadaf Ebrahimi sljit_free_compiler(compiler);
14519*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14520*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14521*22dc650dSSadaf Ebrahimi PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14522*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14523*22dc650dSSadaf Ebrahimi }
14524*22dc650dSSadaf Ebrahimi
14525*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(rootbacktrack.prev == NULL);
14526*22dc650dSSadaf Ebrahimi reset_match_label = LABEL();
14527*22dc650dSSadaf Ebrahimi
14528*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_PARTIAL_SOFT)
14529*22dc650dSSadaf Ebrahimi {
14530*22dc650dSSadaf Ebrahimi /* Update hit_start only in the first time. */
14531*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14532*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14533*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14534*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14535*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
14536*22dc650dSSadaf Ebrahimi }
14537*22dc650dSSadaf Ebrahimi
14538*22dc650dSSadaf Ebrahimi /* Check we have remaining characters. */
14539*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14540*22dc650dSSadaf Ebrahimi {
14541*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14542*22dc650dSSadaf Ebrahimi }
14543*22dc650dSSadaf Ebrahimi
14544*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14545*22dc650dSSadaf Ebrahimi (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14546*22dc650dSSadaf Ebrahimi
14547*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_ANCHORED) == 0)
14548*22dc650dSSadaf Ebrahimi {
14549*22dc650dSSadaf Ebrahimi if (common->ff_newline_shortcut != NULL)
14550*22dc650dSSadaf Ebrahimi {
14551*22dc650dSSadaf Ebrahimi /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14552*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14553*22dc650dSSadaf Ebrahimi {
14554*22dc650dSSadaf Ebrahimi if (common->match_end_ptr != 0)
14555*22dc650dSSadaf Ebrahimi {
14556*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14557*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14558*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14559*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14560*22dc650dSSadaf Ebrahimi }
14561*22dc650dSSadaf Ebrahimi else
14562*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14563*22dc650dSSadaf Ebrahimi }
14564*22dc650dSSadaf Ebrahimi }
14565*22dc650dSSadaf Ebrahimi else
14566*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14567*22dc650dSSadaf Ebrahimi }
14568*22dc650dSSadaf Ebrahimi
14569*22dc650dSSadaf Ebrahimi /* No more remaining characters. */
14570*22dc650dSSadaf Ebrahimi if (reqcu_not_found != NULL)
14571*22dc650dSSadaf Ebrahimi set_jumps(reqcu_not_found, LABEL());
14572*22dc650dSSadaf Ebrahimi
14573*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_PARTIAL_SOFT)
14574*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14575*22dc650dSSadaf Ebrahimi
14576*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14577*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->quit_label);
14578*22dc650dSSadaf Ebrahimi
14579*22dc650dSSadaf Ebrahimi flush_stubs(common);
14580*22dc650dSSadaf Ebrahimi
14581*22dc650dSSadaf Ebrahimi if (common->might_be_empty)
14582*22dc650dSSadaf Ebrahimi {
14583*22dc650dSSadaf Ebrahimi JUMPHERE(empty_match);
14584*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14585*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14586*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14587*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14588*22dc650dSSadaf Ebrahimi OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14589*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_ZERO, empty_match_found_label);
14590*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14591*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14592*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14593*22dc650dSSadaf Ebrahimi }
14594*22dc650dSSadaf Ebrahimi
14595*22dc650dSSadaf Ebrahimi common->fast_forward_bc_ptr = NULL;
14596*22dc650dSSadaf Ebrahimi common->early_fail_start_ptr = 0;
14597*22dc650dSSadaf Ebrahimi common->early_fail_end_ptr = 0;
14598*22dc650dSSadaf Ebrahimi common->currententry = common->entries;
14599*22dc650dSSadaf Ebrahimi common->local_quit_available = TRUE;
14600*22dc650dSSadaf Ebrahimi quit_label = common->quit_label;
14601*22dc650dSSadaf Ebrahimi if (common->currententry != NULL)
14602*22dc650dSSadaf Ebrahimi {
14603*22dc650dSSadaf Ebrahimi /* A free bit for each private data. */
14604*22dc650dSSadaf Ebrahimi common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14605*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(common->recurse_bitset_size > 0);
14606*22dc650dSSadaf Ebrahimi common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14607*22dc650dSSadaf Ebrahimi
14608*22dc650dSSadaf Ebrahimi if (common->recurse_bitset != NULL)
14609*22dc650dSSadaf Ebrahimi {
14610*22dc650dSSadaf Ebrahimi do
14611*22dc650dSSadaf Ebrahimi {
14612*22dc650dSSadaf Ebrahimi /* Might add new entries. */
14613*22dc650dSSadaf Ebrahimi compile_recurse(common);
14614*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14615*22dc650dSSadaf Ebrahimi break;
14616*22dc650dSSadaf Ebrahimi flush_stubs(common);
14617*22dc650dSSadaf Ebrahimi common->currententry = common->currententry->next;
14618*22dc650dSSadaf Ebrahimi }
14619*22dc650dSSadaf Ebrahimi while (common->currententry != NULL);
14620*22dc650dSSadaf Ebrahimi
14621*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->recurse_bitset, allocator_data);
14622*22dc650dSSadaf Ebrahimi }
14623*22dc650dSSadaf Ebrahimi
14624*22dc650dSSadaf Ebrahimi if (common->currententry != NULL)
14625*22dc650dSSadaf Ebrahimi {
14626*22dc650dSSadaf Ebrahimi /* The common->recurse_bitset has been freed. */
14627*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14628*22dc650dSSadaf Ebrahimi
14629*22dc650dSSadaf Ebrahimi sljit_free_compiler(compiler);
14630*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14631*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14632*22dc650dSSadaf Ebrahimi PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14633*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14634*22dc650dSSadaf Ebrahimi }
14635*22dc650dSSadaf Ebrahimi }
14636*22dc650dSSadaf Ebrahimi common->local_quit_available = FALSE;
14637*22dc650dSSadaf Ebrahimi common->quit_label = quit_label;
14638*22dc650dSSadaf Ebrahimi
14639*22dc650dSSadaf Ebrahimi /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14640*22dc650dSSadaf Ebrahimi /* This is a (really) rare case. */
14641*22dc650dSSadaf Ebrahimi set_jumps(common->stackalloc, LABEL());
14642*22dc650dSSadaf Ebrahimi /* RETURN_ADDR is not a saved register. */
14643*22dc650dSSadaf Ebrahimi sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14644*22dc650dSSadaf Ebrahimi
14645*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14646*22dc650dSSadaf Ebrahimi
14647*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14648*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14649*22dc650dSSadaf Ebrahimi OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14650*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14651*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14652*22dc650dSSadaf Ebrahimi
14653*22dc650dSSadaf Ebrahimi sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14654*22dc650dSSadaf Ebrahimi
14655*22dc650dSSadaf Ebrahimi jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14656*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14657*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14658*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14659*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14660*22dc650dSSadaf Ebrahimi OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14661*22dc650dSSadaf Ebrahimi
14662*22dc650dSSadaf Ebrahimi /* Allocation failed. */
14663*22dc650dSSadaf Ebrahimi JUMPHERE(jump);
14664*22dc650dSSadaf Ebrahimi /* We break the return address cache here, but this is a really rare case. */
14665*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14666*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->quit_label);
14667*22dc650dSSadaf Ebrahimi
14668*22dc650dSSadaf Ebrahimi /* Call limit reached. */
14669*22dc650dSSadaf Ebrahimi set_jumps(common->calllimit, LABEL());
14670*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14671*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, common->quit_label);
14672*22dc650dSSadaf Ebrahimi
14673*22dc650dSSadaf Ebrahimi if (common->revertframes != NULL)
14674*22dc650dSSadaf Ebrahimi {
14675*22dc650dSSadaf Ebrahimi set_jumps(common->revertframes, LABEL());
14676*22dc650dSSadaf Ebrahimi do_revertframes(common);
14677*22dc650dSSadaf Ebrahimi }
14678*22dc650dSSadaf Ebrahimi if (common->wordboundary != NULL)
14679*22dc650dSSadaf Ebrahimi {
14680*22dc650dSSadaf Ebrahimi set_jumps(common->wordboundary, LABEL());
14681*22dc650dSSadaf Ebrahimi check_wordboundary(common, FALSE);
14682*22dc650dSSadaf Ebrahimi }
14683*22dc650dSSadaf Ebrahimi if (common->ucp_wordboundary != NULL)
14684*22dc650dSSadaf Ebrahimi {
14685*22dc650dSSadaf Ebrahimi set_jumps(common->ucp_wordboundary, LABEL());
14686*22dc650dSSadaf Ebrahimi check_wordboundary(common, TRUE);
14687*22dc650dSSadaf Ebrahimi }
14688*22dc650dSSadaf Ebrahimi if (common->anynewline != NULL)
14689*22dc650dSSadaf Ebrahimi {
14690*22dc650dSSadaf Ebrahimi set_jumps(common->anynewline, LABEL());
14691*22dc650dSSadaf Ebrahimi check_anynewline(common);
14692*22dc650dSSadaf Ebrahimi }
14693*22dc650dSSadaf Ebrahimi if (common->hspace != NULL)
14694*22dc650dSSadaf Ebrahimi {
14695*22dc650dSSadaf Ebrahimi set_jumps(common->hspace, LABEL());
14696*22dc650dSSadaf Ebrahimi check_hspace(common);
14697*22dc650dSSadaf Ebrahimi }
14698*22dc650dSSadaf Ebrahimi if (common->vspace != NULL)
14699*22dc650dSSadaf Ebrahimi {
14700*22dc650dSSadaf Ebrahimi set_jumps(common->vspace, LABEL());
14701*22dc650dSSadaf Ebrahimi check_vspace(common);
14702*22dc650dSSadaf Ebrahimi }
14703*22dc650dSSadaf Ebrahimi if (common->casefulcmp != NULL)
14704*22dc650dSSadaf Ebrahimi {
14705*22dc650dSSadaf Ebrahimi set_jumps(common->casefulcmp, LABEL());
14706*22dc650dSSadaf Ebrahimi do_casefulcmp(common);
14707*22dc650dSSadaf Ebrahimi }
14708*22dc650dSSadaf Ebrahimi if (common->caselesscmp != NULL)
14709*22dc650dSSadaf Ebrahimi {
14710*22dc650dSSadaf Ebrahimi set_jumps(common->caselesscmp, LABEL());
14711*22dc650dSSadaf Ebrahimi do_caselesscmp(common);
14712*22dc650dSSadaf Ebrahimi }
14713*22dc650dSSadaf Ebrahimi if (common->reset_match != NULL || common->restart_match != NULL)
14714*22dc650dSSadaf Ebrahimi {
14715*22dc650dSSadaf Ebrahimi if (common->restart_match != NULL)
14716*22dc650dSSadaf Ebrahimi {
14717*22dc650dSSadaf Ebrahimi set_jumps(common->restart_match, LABEL());
14718*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14719*22dc650dSSadaf Ebrahimi }
14720*22dc650dSSadaf Ebrahimi
14721*22dc650dSSadaf Ebrahimi set_jumps(common->reset_match, LABEL());
14722*22dc650dSSadaf Ebrahimi do_reset_match(common, (re->top_bracket + 1) * 2);
14723*22dc650dSSadaf Ebrahimi /* The value of restart_match is in TMP1. */
14724*22dc650dSSadaf Ebrahimi CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14725*22dc650dSSadaf Ebrahimi OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14726*22dc650dSSadaf Ebrahimi JUMPTO(SLJIT_JUMP, reset_match_label);
14727*22dc650dSSadaf Ebrahimi }
14728*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
14729*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
14730*22dc650dSSadaf Ebrahimi if (common->utfreadchar != NULL)
14731*22dc650dSSadaf Ebrahimi {
14732*22dc650dSSadaf Ebrahimi set_jumps(common->utfreadchar, LABEL());
14733*22dc650dSSadaf Ebrahimi do_utfreadchar(common);
14734*22dc650dSSadaf Ebrahimi }
14735*22dc650dSSadaf Ebrahimi if (common->utfreadtype8 != NULL)
14736*22dc650dSSadaf Ebrahimi {
14737*22dc650dSSadaf Ebrahimi set_jumps(common->utfreadtype8, LABEL());
14738*22dc650dSSadaf Ebrahimi do_utfreadtype8(common);
14739*22dc650dSSadaf Ebrahimi }
14740*22dc650dSSadaf Ebrahimi if (common->utfpeakcharback != NULL)
14741*22dc650dSSadaf Ebrahimi {
14742*22dc650dSSadaf Ebrahimi set_jumps(common->utfpeakcharback, LABEL());
14743*22dc650dSSadaf Ebrahimi do_utfpeakcharback(common);
14744*22dc650dSSadaf Ebrahimi }
14745*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14746*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14747*22dc650dSSadaf Ebrahimi if (common->utfreadchar_invalid != NULL)
14748*22dc650dSSadaf Ebrahimi {
14749*22dc650dSSadaf Ebrahimi set_jumps(common->utfreadchar_invalid, LABEL());
14750*22dc650dSSadaf Ebrahimi do_utfreadchar_invalid(common);
14751*22dc650dSSadaf Ebrahimi }
14752*22dc650dSSadaf Ebrahimi if (common->utfreadnewline_invalid != NULL)
14753*22dc650dSSadaf Ebrahimi {
14754*22dc650dSSadaf Ebrahimi set_jumps(common->utfreadnewline_invalid, LABEL());
14755*22dc650dSSadaf Ebrahimi do_utfreadnewline_invalid(common);
14756*22dc650dSSadaf Ebrahimi }
14757*22dc650dSSadaf Ebrahimi if (common->utfmoveback_invalid)
14758*22dc650dSSadaf Ebrahimi {
14759*22dc650dSSadaf Ebrahimi set_jumps(common->utfmoveback_invalid, LABEL());
14760*22dc650dSSadaf Ebrahimi do_utfmoveback_invalid(common);
14761*22dc650dSSadaf Ebrahimi }
14762*22dc650dSSadaf Ebrahimi if (common->utfpeakcharback_invalid)
14763*22dc650dSSadaf Ebrahimi {
14764*22dc650dSSadaf Ebrahimi set_jumps(common->utfpeakcharback_invalid, LABEL());
14765*22dc650dSSadaf Ebrahimi do_utfpeakcharback_invalid(common);
14766*22dc650dSSadaf Ebrahimi }
14767*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14768*22dc650dSSadaf Ebrahimi if (common->getucd != NULL)
14769*22dc650dSSadaf Ebrahimi {
14770*22dc650dSSadaf Ebrahimi set_jumps(common->getucd, LABEL());
14771*22dc650dSSadaf Ebrahimi do_getucd(common);
14772*22dc650dSSadaf Ebrahimi }
14773*22dc650dSSadaf Ebrahimi if (common->getucdtype != NULL)
14774*22dc650dSSadaf Ebrahimi {
14775*22dc650dSSadaf Ebrahimi set_jumps(common->getucdtype, LABEL());
14776*22dc650dSSadaf Ebrahimi do_getucdtype(common);
14777*22dc650dSSadaf Ebrahimi }
14778*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
14779*22dc650dSSadaf Ebrahimi
14780*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->optimized_cbracket, allocator_data);
14781*22dc650dSSadaf Ebrahimi SLJIT_FREE(common->private_data_ptrs, allocator_data);
14782*22dc650dSSadaf Ebrahimi
14783*22dc650dSSadaf Ebrahimi executable_func = sljit_generate_code(compiler, 0, NULL);
14784*22dc650dSSadaf Ebrahimi executable_size = sljit_get_generated_code_size(compiler);
14785*22dc650dSSadaf Ebrahimi sljit_free_compiler(compiler);
14786*22dc650dSSadaf Ebrahimi
14787*22dc650dSSadaf Ebrahimi if (executable_func == NULL)
14788*22dc650dSSadaf Ebrahimi {
14789*22dc650dSSadaf Ebrahimi PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14790*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14791*22dc650dSSadaf Ebrahimi }
14792*22dc650dSSadaf Ebrahimi
14793*22dc650dSSadaf Ebrahimi /* Reuse the function descriptor if possible. */
14794*22dc650dSSadaf Ebrahimi if (re->executable_jit != NULL)
14795*22dc650dSSadaf Ebrahimi functions = (executable_functions *)re->executable_jit;
14796*22dc650dSSadaf Ebrahimi else
14797*22dc650dSSadaf Ebrahimi {
14798*22dc650dSSadaf Ebrahimi functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14799*22dc650dSSadaf Ebrahimi if (functions == NULL)
14800*22dc650dSSadaf Ebrahimi {
14801*22dc650dSSadaf Ebrahimi /* This case is highly unlikely since we just recently
14802*22dc650dSSadaf Ebrahimi freed a lot of memory. Not impossible though. */
14803*22dc650dSSadaf Ebrahimi sljit_free_code(executable_func, NULL);
14804*22dc650dSSadaf Ebrahimi PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14805*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14806*22dc650dSSadaf Ebrahimi }
14807*22dc650dSSadaf Ebrahimi memset(functions, 0, sizeof(executable_functions));
14808*22dc650dSSadaf Ebrahimi functions->top_bracket = re->top_bracket + 1;
14809*22dc650dSSadaf Ebrahimi functions->limit_match = re->limit_match;
14810*22dc650dSSadaf Ebrahimi re->executable_jit = functions;
14811*22dc650dSSadaf Ebrahimi }
14812*22dc650dSSadaf Ebrahimi
14813*22dc650dSSadaf Ebrahimi /* Turn mode into an index. */
14814*22dc650dSSadaf Ebrahimi if (mode == PCRE2_JIT_COMPLETE)
14815*22dc650dSSadaf Ebrahimi mode = 0;
14816*22dc650dSSadaf Ebrahimi else
14817*22dc650dSSadaf Ebrahimi mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14818*22dc650dSSadaf Ebrahimi
14819*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14820*22dc650dSSadaf Ebrahimi functions->executable_funcs[mode] = executable_func;
14821*22dc650dSSadaf Ebrahimi functions->read_only_data_heads[mode] = common->read_only_data_head;
14822*22dc650dSSadaf Ebrahimi functions->executable_sizes[mode] = executable_size;
14823*22dc650dSSadaf Ebrahimi return 0;
14824*22dc650dSSadaf Ebrahimi }
14825*22dc650dSSadaf Ebrahimi
14826*22dc650dSSadaf Ebrahimi #endif
14827*22dc650dSSadaf Ebrahimi
14828*22dc650dSSadaf Ebrahimi /*************************************************
14829*22dc650dSSadaf Ebrahimi * JIT compile a Regular Expression *
14830*22dc650dSSadaf Ebrahimi *************************************************/
14831*22dc650dSSadaf Ebrahimi
14832*22dc650dSSadaf Ebrahimi /* This function used JIT to convert a previously-compiled pattern into machine
14833*22dc650dSSadaf Ebrahimi code.
14834*22dc650dSSadaf Ebrahimi
14835*22dc650dSSadaf Ebrahimi Arguments:
14836*22dc650dSSadaf Ebrahimi code a compiled pattern
14837*22dc650dSSadaf Ebrahimi options JIT option bits
14838*22dc650dSSadaf Ebrahimi
14839*22dc650dSSadaf Ebrahimi Returns: 0: success or (*NOJIT) was used
14840*22dc650dSSadaf Ebrahimi <0: an error code
14841*22dc650dSSadaf Ebrahimi */
14842*22dc650dSSadaf Ebrahimi
14843*22dc650dSSadaf Ebrahimi #define PUBLIC_JIT_COMPILE_OPTIONS \
14844*22dc650dSSadaf Ebrahimi (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14845*22dc650dSSadaf Ebrahimi
14846*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14847*22dc650dSSadaf Ebrahimi pcre2_jit_compile(pcre2_code *code, uint32_t options)
14848*22dc650dSSadaf Ebrahimi {
14849*22dc650dSSadaf Ebrahimi pcre2_real_code *re = (pcre2_real_code *)code;
14850*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
14851*22dc650dSSadaf Ebrahimi executable_functions *functions;
14852*22dc650dSSadaf Ebrahimi static int executable_allocator_is_working = -1;
14853*22dc650dSSadaf Ebrahimi #endif
14854*22dc650dSSadaf Ebrahimi
14855*22dc650dSSadaf Ebrahimi if (code == NULL)
14856*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NULL;
14857*22dc650dSSadaf Ebrahimi
14858*22dc650dSSadaf Ebrahimi if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14859*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_JIT_BADOPTION;
14860*22dc650dSSadaf Ebrahimi
14861*22dc650dSSadaf Ebrahimi /* Support for invalid UTF was first introduced in JIT, with the option
14862*22dc650dSSadaf Ebrahimi PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14863*22dc650dSSadaf Ebrahimi compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14864*22dc650dSSadaf Ebrahimi preferred feature, with the earlier option deprecated. However, for backward
14865*22dc650dSSadaf Ebrahimi compatibility, if the earlier option is set, it forces the new option so that
14866*22dc650dSSadaf Ebrahimi if JIT matching falls back to the interpreter, there is still support for
14867*22dc650dSSadaf Ebrahimi invalid UTF. However, if this function has already been successfully called
14868*22dc650dSSadaf Ebrahimi without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14869*22dc650dSSadaf Ebrahimi non-invalid-supporting JIT code was compiled), give an error.
14870*22dc650dSSadaf Ebrahimi
14871*22dc650dSSadaf Ebrahimi If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14872*22dc650dSSadaf Ebrahimi actions are needed:
14873*22dc650dSSadaf Ebrahimi
14874*22dc650dSSadaf Ebrahimi 1. Remove the definition from pcre2.h.in and from the list in
14875*22dc650dSSadaf Ebrahimi PUBLIC_JIT_COMPILE_OPTIONS above.
14876*22dc650dSSadaf Ebrahimi
14877*22dc650dSSadaf Ebrahimi 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14878*22dc650dSSadaf Ebrahimi
14879*22dc650dSSadaf Ebrahimi 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14880*22dc650dSSadaf Ebrahimi
14881*22dc650dSSadaf Ebrahimi 4. Delete the following short block of code. The setting of "re" and
14882*22dc650dSSadaf Ebrahimi "functions" can be moved into the JIT-only block below, but if that is
14883*22dc650dSSadaf Ebrahimi done, (void)re and (void)functions will be needed in the non-JIT case, to
14884*22dc650dSSadaf Ebrahimi avoid compiler warnings.
14885*22dc650dSSadaf Ebrahimi */
14886*22dc650dSSadaf Ebrahimi
14887*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
14888*22dc650dSSadaf Ebrahimi functions = (executable_functions *)re->executable_jit;
14889*22dc650dSSadaf Ebrahimi #endif
14890*22dc650dSSadaf Ebrahimi
14891*22dc650dSSadaf Ebrahimi if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14892*22dc650dSSadaf Ebrahimi {
14893*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14894*22dc650dSSadaf Ebrahimi {
14895*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
14896*22dc650dSSadaf Ebrahimi if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14897*22dc650dSSadaf Ebrahimi #endif
14898*22dc650dSSadaf Ebrahimi re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14899*22dc650dSSadaf Ebrahimi }
14900*22dc650dSSadaf Ebrahimi }
14901*22dc650dSSadaf Ebrahimi
14902*22dc650dSSadaf Ebrahimi /* The above tests are run with and without JIT support. This means that
14903*22dc650dSSadaf Ebrahimi PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14904*22dc650dSSadaf Ebrahimi interpreter support) even in the absence of JIT. But now, if there is no JIT
14905*22dc650dSSadaf Ebrahimi support, give an error return. */
14906*22dc650dSSadaf Ebrahimi
14907*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_JIT
14908*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_JIT_BADOPTION;
14909*22dc650dSSadaf Ebrahimi #else /* SUPPORT_JIT */
14910*22dc650dSSadaf Ebrahimi
14911*22dc650dSSadaf Ebrahimi /* There is JIT support. Do the necessary. */
14912*22dc650dSSadaf Ebrahimi
14913*22dc650dSSadaf Ebrahimi if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14914*22dc650dSSadaf Ebrahimi
14915*22dc650dSSadaf Ebrahimi if (executable_allocator_is_working == -1)
14916*22dc650dSSadaf Ebrahimi {
14917*22dc650dSSadaf Ebrahimi /* Checks whether the executable allocator is working. This check
14918*22dc650dSSadaf Ebrahimi might run multiple times in multi-threaded environments, but the
14919*22dc650dSSadaf Ebrahimi result should not be affected by it. */
14920*22dc650dSSadaf Ebrahimi void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14921*22dc650dSSadaf Ebrahimi if (ptr != NULL)
14922*22dc650dSSadaf Ebrahimi {
14923*22dc650dSSadaf Ebrahimi SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14924*22dc650dSSadaf Ebrahimi executable_allocator_is_working = 1;
14925*22dc650dSSadaf Ebrahimi }
14926*22dc650dSSadaf Ebrahimi else executable_allocator_is_working = 0;
14927*22dc650dSSadaf Ebrahimi }
14928*22dc650dSSadaf Ebrahimi
14929*22dc650dSSadaf Ebrahimi if (!executable_allocator_is_working)
14930*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOMEMORY;
14931*22dc650dSSadaf Ebrahimi
14932*22dc650dSSadaf Ebrahimi if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14933*22dc650dSSadaf Ebrahimi options |= PCRE2_JIT_INVALID_UTF;
14934*22dc650dSSadaf Ebrahimi
14935*22dc650dSSadaf Ebrahimi if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14936*22dc650dSSadaf Ebrahimi || functions->executable_funcs[0] == NULL)) {
14937*22dc650dSSadaf Ebrahimi uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14938*22dc650dSSadaf Ebrahimi int result = jit_compile(code, options & ~excluded_options);
14939*22dc650dSSadaf Ebrahimi if (result != 0)
14940*22dc650dSSadaf Ebrahimi return result;
14941*22dc650dSSadaf Ebrahimi }
14942*22dc650dSSadaf Ebrahimi
14943*22dc650dSSadaf Ebrahimi if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14944*22dc650dSSadaf Ebrahimi || functions->executable_funcs[1] == NULL)) {
14945*22dc650dSSadaf Ebrahimi uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14946*22dc650dSSadaf Ebrahimi int result = jit_compile(code, options & ~excluded_options);
14947*22dc650dSSadaf Ebrahimi if (result != 0)
14948*22dc650dSSadaf Ebrahimi return result;
14949*22dc650dSSadaf Ebrahimi }
14950*22dc650dSSadaf Ebrahimi
14951*22dc650dSSadaf Ebrahimi if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14952*22dc650dSSadaf Ebrahimi || functions->executable_funcs[2] == NULL)) {
14953*22dc650dSSadaf Ebrahimi uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14954*22dc650dSSadaf Ebrahimi int result = jit_compile(code, options & ~excluded_options);
14955*22dc650dSSadaf Ebrahimi if (result != 0)
14956*22dc650dSSadaf Ebrahimi return result;
14957*22dc650dSSadaf Ebrahimi }
14958*22dc650dSSadaf Ebrahimi
14959*22dc650dSSadaf Ebrahimi return 0;
14960*22dc650dSSadaf Ebrahimi
14961*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_JIT */
14962*22dc650dSSadaf Ebrahimi }
14963*22dc650dSSadaf Ebrahimi
14964*22dc650dSSadaf Ebrahimi /* JIT compiler uses an all-in-one approach. This improves security,
14965*22dc650dSSadaf Ebrahimi since the code generator functions are not exported. */
14966*22dc650dSSadaf Ebrahimi
14967*22dc650dSSadaf Ebrahimi #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14968*22dc650dSSadaf Ebrahimi
14969*22dc650dSSadaf Ebrahimi #include "pcre2_jit_match.c"
14970*22dc650dSSadaf Ebrahimi #include "pcre2_jit_misc.c"
14971*22dc650dSSadaf Ebrahimi
14972*22dc650dSSadaf Ebrahimi /* End of pcre2_jit_compile.c */
14973