xref: /aosp_15_r20/external/pcre/src/pcre2_substitute.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2022 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
43*22dc650dSSadaf Ebrahimi #include "config.h"
44*22dc650dSSadaf Ebrahimi #endif
45*22dc650dSSadaf Ebrahimi 
46*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi #define PTR_STACK_SIZE 20
49*22dc650dSSadaf Ebrahimi 
50*22dc650dSSadaf Ebrahimi #define SUBSTITUTE_OPTIONS \
51*22dc650dSSadaf Ebrahimi   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52*22dc650dSSadaf Ebrahimi    PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
53*22dc650dSSadaf Ebrahimi    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
54*22dc650dSSadaf Ebrahimi    PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55*22dc650dSSadaf Ebrahimi 
56*22dc650dSSadaf Ebrahimi 
57*22dc650dSSadaf Ebrahimi 
58*22dc650dSSadaf Ebrahimi /*************************************************
59*22dc650dSSadaf Ebrahimi *           Find end of substitute text          *
60*22dc650dSSadaf Ebrahimi *************************************************/
61*22dc650dSSadaf Ebrahimi 
62*22dc650dSSadaf Ebrahimi /* In extended mode, we recognize ${name:+set text:unset text} and similar
63*22dc650dSSadaf Ebrahimi constructions. This requires the identification of unescaped : and }
64*22dc650dSSadaf Ebrahimi characters. This function scans for such. It must deal with nested ${
65*22dc650dSSadaf Ebrahimi constructions. The pointer to the text is updated, either to the required end
66*22dc650dSSadaf Ebrahimi character, or to where an error was detected.
67*22dc650dSSadaf Ebrahimi 
68*22dc650dSSadaf Ebrahimi Arguments:
69*22dc650dSSadaf Ebrahimi   code      points to the compiled expression (for options)
70*22dc650dSSadaf Ebrahimi   ptrptr    points to the pointer to the start of the text (updated)
71*22dc650dSSadaf Ebrahimi   ptrend    end of the whole string
72*22dc650dSSadaf Ebrahimi   last      TRUE if the last expected string (only } recognized)
73*22dc650dSSadaf Ebrahimi 
74*22dc650dSSadaf Ebrahimi Returns:    0 on success
75*22dc650dSSadaf Ebrahimi             negative error code on failure
76*22dc650dSSadaf Ebrahimi */
77*22dc650dSSadaf Ebrahimi 
78*22dc650dSSadaf Ebrahimi static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)79*22dc650dSSadaf Ebrahimi find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
80*22dc650dSSadaf Ebrahimi   BOOL last)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi int rc = 0;
83*22dc650dSSadaf Ebrahimi uint32_t nestlevel = 0;
84*22dc650dSSadaf Ebrahimi BOOL literal = FALSE;
85*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptr = *ptrptr;
86*22dc650dSSadaf Ebrahimi 
87*22dc650dSSadaf Ebrahimi for (; ptr < ptrend; ptr++)
88*22dc650dSSadaf Ebrahimi   {
89*22dc650dSSadaf Ebrahimi   if (literal)
90*22dc650dSSadaf Ebrahimi     {
91*22dc650dSSadaf Ebrahimi     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
92*22dc650dSSadaf Ebrahimi       {
93*22dc650dSSadaf Ebrahimi       literal = FALSE;
94*22dc650dSSadaf Ebrahimi       ptr += 1;
95*22dc650dSSadaf Ebrahimi       }
96*22dc650dSSadaf Ebrahimi     }
97*22dc650dSSadaf Ebrahimi 
98*22dc650dSSadaf Ebrahimi   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99*22dc650dSSadaf Ebrahimi     {
100*22dc650dSSadaf Ebrahimi     if (nestlevel == 0) goto EXIT;
101*22dc650dSSadaf Ebrahimi     nestlevel--;
102*22dc650dSSadaf Ebrahimi     }
103*22dc650dSSadaf Ebrahimi 
104*22dc650dSSadaf Ebrahimi   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
105*22dc650dSSadaf Ebrahimi 
106*22dc650dSSadaf Ebrahimi   else if (*ptr == CHAR_DOLLAR_SIGN)
107*22dc650dSSadaf Ebrahimi     {
108*22dc650dSSadaf Ebrahimi     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
109*22dc650dSSadaf Ebrahimi       {
110*22dc650dSSadaf Ebrahimi       nestlevel++;
111*22dc650dSSadaf Ebrahimi       ptr += 1;
112*22dc650dSSadaf Ebrahimi       }
113*22dc650dSSadaf Ebrahimi     }
114*22dc650dSSadaf Ebrahimi 
115*22dc650dSSadaf Ebrahimi   else if (*ptr == CHAR_BACKSLASH)
116*22dc650dSSadaf Ebrahimi     {
117*22dc650dSSadaf Ebrahimi     int erc;
118*22dc650dSSadaf Ebrahimi     int errorcode;
119*22dc650dSSadaf Ebrahimi     uint32_t ch;
120*22dc650dSSadaf Ebrahimi 
121*22dc650dSSadaf Ebrahimi     if (ptr < ptrend - 1) switch (ptr[1])
122*22dc650dSSadaf Ebrahimi       {
123*22dc650dSSadaf Ebrahimi       case CHAR_L:
124*22dc650dSSadaf Ebrahimi       case CHAR_l:
125*22dc650dSSadaf Ebrahimi       case CHAR_U:
126*22dc650dSSadaf Ebrahimi       case CHAR_u:
127*22dc650dSSadaf Ebrahimi       ptr += 1;
128*22dc650dSSadaf Ebrahimi       continue;
129*22dc650dSSadaf Ebrahimi       }
130*22dc650dSSadaf Ebrahimi 
131*22dc650dSSadaf Ebrahimi     ptr += 1;  /* Must point after \ */
132*22dc650dSSadaf Ebrahimi     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133*22dc650dSSadaf Ebrahimi       code->overall_options, code->extra_options, FALSE, NULL);
134*22dc650dSSadaf Ebrahimi     ptr -= 1;  /* Back to last code unit of escape */
135*22dc650dSSadaf Ebrahimi     if (errorcode != 0)
136*22dc650dSSadaf Ebrahimi       {
137*22dc650dSSadaf Ebrahimi       rc = errorcode;
138*22dc650dSSadaf Ebrahimi       goto EXIT;
139*22dc650dSSadaf Ebrahimi       }
140*22dc650dSSadaf Ebrahimi 
141*22dc650dSSadaf Ebrahimi     switch(erc)
142*22dc650dSSadaf Ebrahimi       {
143*22dc650dSSadaf Ebrahimi       case 0:      /* Data character */
144*22dc650dSSadaf Ebrahimi       case ESC_E:  /* Isolated \E is ignored */
145*22dc650dSSadaf Ebrahimi       break;
146*22dc650dSSadaf Ebrahimi 
147*22dc650dSSadaf Ebrahimi       case ESC_Q:
148*22dc650dSSadaf Ebrahimi       literal = TRUE;
149*22dc650dSSadaf Ebrahimi       break;
150*22dc650dSSadaf Ebrahimi 
151*22dc650dSSadaf Ebrahimi       default:
152*22dc650dSSadaf Ebrahimi       rc = PCRE2_ERROR_BADREPESCAPE;
153*22dc650dSSadaf Ebrahimi       goto EXIT;
154*22dc650dSSadaf Ebrahimi       }
155*22dc650dSSadaf Ebrahimi     }
156*22dc650dSSadaf Ebrahimi   }
157*22dc650dSSadaf Ebrahimi 
158*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
159*22dc650dSSadaf Ebrahimi 
160*22dc650dSSadaf Ebrahimi EXIT:
161*22dc650dSSadaf Ebrahimi *ptrptr = ptr;
162*22dc650dSSadaf Ebrahimi return rc;
163*22dc650dSSadaf Ebrahimi }
164*22dc650dSSadaf Ebrahimi 
165*22dc650dSSadaf Ebrahimi 
166*22dc650dSSadaf Ebrahimi 
167*22dc650dSSadaf Ebrahimi /*************************************************
168*22dc650dSSadaf Ebrahimi *              Match and substitute              *
169*22dc650dSSadaf Ebrahimi *************************************************/
170*22dc650dSSadaf Ebrahimi 
171*22dc650dSSadaf Ebrahimi /* This function applies a compiled re to a subject string and creates a new
172*22dc650dSSadaf Ebrahimi string with substitutions. The first 7 arguments are the same as for
173*22dc650dSSadaf Ebrahimi pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174*22dc650dSSadaf Ebrahimi 
175*22dc650dSSadaf Ebrahimi Arguments:
176*22dc650dSSadaf Ebrahimi   code            points to the compiled expression
177*22dc650dSSadaf Ebrahimi   subject         points to the subject string
178*22dc650dSSadaf Ebrahimi   length          length of subject string (may contain binary zeros)
179*22dc650dSSadaf Ebrahimi   start_offset    where to start in the subject string
180*22dc650dSSadaf Ebrahimi   options         option bits
181*22dc650dSSadaf Ebrahimi   match_data      points to a match_data block, or is NULL
182*22dc650dSSadaf Ebrahimi   context         points a PCRE2 context
183*22dc650dSSadaf Ebrahimi   replacement     points to the replacement string
184*22dc650dSSadaf Ebrahimi   rlength         length of replacement string
185*22dc650dSSadaf Ebrahimi   buffer          where to put the substituted string
186*22dc650dSSadaf Ebrahimi   blength         points to length of buffer; updated to length of string
187*22dc650dSSadaf Ebrahimi 
188*22dc650dSSadaf Ebrahimi Returns:          >= 0 number of substitutions made
189*22dc650dSSadaf Ebrahimi                   < 0 an error code
190*22dc650dSSadaf Ebrahimi                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191*22dc650dSSadaf Ebrahimi */
192*22dc650dSSadaf Ebrahimi 
193*22dc650dSSadaf Ebrahimi /* This macro checks for space in the buffer before copying into it. On
194*22dc650dSSadaf Ebrahimi overflow, either give an error immediately, or keep on, accumulating the
195*22dc650dSSadaf Ebrahimi length. */
196*22dc650dSSadaf Ebrahimi 
197*22dc650dSSadaf Ebrahimi #define CHECKMEMCPY(from,length) \
198*22dc650dSSadaf Ebrahimi   { \
199*22dc650dSSadaf Ebrahimi   if (!overflowed && lengthleft < length) \
200*22dc650dSSadaf Ebrahimi     { \
201*22dc650dSSadaf Ebrahimi     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202*22dc650dSSadaf Ebrahimi     overflowed = TRUE; \
203*22dc650dSSadaf Ebrahimi     extra_needed = length - lengthleft; \
204*22dc650dSSadaf Ebrahimi     } \
205*22dc650dSSadaf Ebrahimi   else if (overflowed) \
206*22dc650dSSadaf Ebrahimi     { \
207*22dc650dSSadaf Ebrahimi     extra_needed += length; \
208*22dc650dSSadaf Ebrahimi     }  \
209*22dc650dSSadaf Ebrahimi   else \
210*22dc650dSSadaf Ebrahimi     {  \
211*22dc650dSSadaf Ebrahimi     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212*22dc650dSSadaf Ebrahimi     buff_offset += length; \
213*22dc650dSSadaf Ebrahimi     lengthleft -= length; \
214*22dc650dSSadaf Ebrahimi     } \
215*22dc650dSSadaf Ebrahimi   }
216*22dc650dSSadaf Ebrahimi 
217*22dc650dSSadaf Ebrahimi /* Here's the function */
218*22dc650dSSadaf Ebrahimi 
219*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)220*22dc650dSSadaf Ebrahimi pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221*22dc650dSSadaf Ebrahimi   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222*22dc650dSSadaf Ebrahimi   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
224*22dc650dSSadaf Ebrahimi {
225*22dc650dSSadaf Ebrahimi int rc;
226*22dc650dSSadaf Ebrahimi int subs;
227*22dc650dSSadaf Ebrahimi int forcecase = 0;
228*22dc650dSSadaf Ebrahimi int forcecasereset = 0;
229*22dc650dSSadaf Ebrahimi uint32_t ovector_count;
230*22dc650dSSadaf Ebrahimi uint32_t goptions = 0;
231*22dc650dSSadaf Ebrahimi uint32_t suboptions;
232*22dc650dSSadaf Ebrahimi pcre2_match_data *internal_match_data = NULL;
233*22dc650dSSadaf Ebrahimi BOOL escaped_literal = FALSE;
234*22dc650dSSadaf Ebrahimi BOOL overflowed = FALSE;
235*22dc650dSSadaf Ebrahimi BOOL use_existing_match;
236*22dc650dSSadaf Ebrahimi BOOL replacement_only;
237*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
238*22dc650dSSadaf Ebrahimi BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
239*22dc650dSSadaf Ebrahimi BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
240*22dc650dSSadaf Ebrahimi #endif
241*22dc650dSSadaf Ebrahimi PCRE2_UCHAR temp[6];
242*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptr;
243*22dc650dSSadaf Ebrahimi PCRE2_SPTR repend;
244*22dc650dSSadaf Ebrahimi PCRE2_SIZE extra_needed = 0;
245*22dc650dSSadaf Ebrahimi PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector;
247*22dc650dSSadaf Ebrahimi PCRE2_SIZE ovecsave[3];
248*22dc650dSSadaf Ebrahimi pcre2_substitute_callout_block scb;
249*22dc650dSSadaf Ebrahimi 
250*22dc650dSSadaf Ebrahimi /* General initialization */
251*22dc650dSSadaf Ebrahimi 
252*22dc650dSSadaf Ebrahimi buff_offset = 0;
253*22dc650dSSadaf Ebrahimi lengthleft = buff_length = *blength;
254*22dc650dSSadaf Ebrahimi *blength = PCRE2_UNSET;
255*22dc650dSSadaf Ebrahimi ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
256*22dc650dSSadaf Ebrahimi 
257*22dc650dSSadaf Ebrahimi /* Partial matching is not valid. This must come after setting *blength to
258*22dc650dSSadaf Ebrahimi PCRE2_UNSET, so as not to imply an offset in the replacement. */
259*22dc650dSSadaf Ebrahimi 
260*22dc650dSSadaf Ebrahimi if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
261*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_BADOPTION;
262*22dc650dSSadaf Ebrahimi 
263*22dc650dSSadaf Ebrahimi /* Validate length and find the end of the replacement. A NULL replacement of
264*22dc650dSSadaf Ebrahimi zero length is interpreted as an empty string. */
265*22dc650dSSadaf Ebrahimi 
266*22dc650dSSadaf Ebrahimi if (replacement == NULL)
267*22dc650dSSadaf Ebrahimi   {
268*22dc650dSSadaf Ebrahimi   if (rlength != 0) return PCRE2_ERROR_NULL;
269*22dc650dSSadaf Ebrahimi   replacement = (PCRE2_SPTR)"";
270*22dc650dSSadaf Ebrahimi   }
271*22dc650dSSadaf Ebrahimi 
272*22dc650dSSadaf Ebrahimi if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
273*22dc650dSSadaf Ebrahimi repend = replacement + rlength;
274*22dc650dSSadaf Ebrahimi 
275*22dc650dSSadaf Ebrahimi /* Check for using a match that has already happened. Note that the subject
276*22dc650dSSadaf Ebrahimi pointer in the match data may be NULL after a no-match. */
277*22dc650dSSadaf Ebrahimi 
278*22dc650dSSadaf Ebrahimi use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
279*22dc650dSSadaf Ebrahimi replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
280*22dc650dSSadaf Ebrahimi 
281*22dc650dSSadaf Ebrahimi /* If starting from an existing match, there must be an externally provided
282*22dc650dSSadaf Ebrahimi match data block. We create an internal match_data block in two cases: (a) an
283*22dc650dSSadaf Ebrahimi external one is not supplied (and we are not starting from an existing match);
284*22dc650dSSadaf Ebrahimi (b) an existing match is to be used for the first substitution. In the latter
285*22dc650dSSadaf Ebrahimi case, we copy the existing match into the internal block, except for any cached
286*22dc650dSSadaf Ebrahimi heap frame size and pointer. This ensures that no changes are made to the
287*22dc650dSSadaf Ebrahimi external match data block. */
288*22dc650dSSadaf Ebrahimi 
289*22dc650dSSadaf Ebrahimi if (match_data == NULL)
290*22dc650dSSadaf Ebrahimi   {
291*22dc650dSSadaf Ebrahimi   pcre2_general_context *gcontext;
292*22dc650dSSadaf Ebrahimi   if (use_existing_match) return PCRE2_ERROR_NULL;
293*22dc650dSSadaf Ebrahimi   gcontext = (mcontext == NULL)?
294*22dc650dSSadaf Ebrahimi     (pcre2_general_context *)code :
295*22dc650dSSadaf Ebrahimi     (pcre2_general_context *)mcontext;
296*22dc650dSSadaf Ebrahimi   match_data = internal_match_data =
297*22dc650dSSadaf Ebrahimi     pcre2_match_data_create_from_pattern(code, gcontext);
298*22dc650dSSadaf Ebrahimi   if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
299*22dc650dSSadaf Ebrahimi   }
300*22dc650dSSadaf Ebrahimi 
301*22dc650dSSadaf Ebrahimi else if (use_existing_match)
302*22dc650dSSadaf Ebrahimi   {
303*22dc650dSSadaf Ebrahimi   pcre2_general_context *gcontext = (mcontext == NULL)?
304*22dc650dSSadaf Ebrahimi     (pcre2_general_context *)code :
305*22dc650dSSadaf Ebrahimi     (pcre2_general_context *)mcontext;
306*22dc650dSSadaf Ebrahimi   int pairs = (code->top_bracket + 1 < match_data->oveccount)?
307*22dc650dSSadaf Ebrahimi     code->top_bracket + 1 : match_data->oveccount;
308*22dc650dSSadaf Ebrahimi   internal_match_data = pcre2_match_data_create(match_data->oveccount,
309*22dc650dSSadaf Ebrahimi     gcontext);
310*22dc650dSSadaf Ebrahimi   if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
311*22dc650dSSadaf Ebrahimi   memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
312*22dc650dSSadaf Ebrahimi     + 2*pairs*sizeof(PCRE2_SIZE));
313*22dc650dSSadaf Ebrahimi   internal_match_data->heapframes = NULL;
314*22dc650dSSadaf Ebrahimi   internal_match_data->heapframes_size = 0;
315*22dc650dSSadaf Ebrahimi   match_data = internal_match_data;
316*22dc650dSSadaf Ebrahimi   }
317*22dc650dSSadaf Ebrahimi 
318*22dc650dSSadaf Ebrahimi /* Remember ovector details */
319*22dc650dSSadaf Ebrahimi 
320*22dc650dSSadaf Ebrahimi ovector = pcre2_get_ovector_pointer(match_data);
321*22dc650dSSadaf Ebrahimi ovector_count = pcre2_get_ovector_count(match_data);
322*22dc650dSSadaf Ebrahimi 
323*22dc650dSSadaf Ebrahimi /* Fixed things in the callout block */
324*22dc650dSSadaf Ebrahimi 
325*22dc650dSSadaf Ebrahimi scb.version = 0;
326*22dc650dSSadaf Ebrahimi scb.input = subject;
327*22dc650dSSadaf Ebrahimi scb.output = (PCRE2_SPTR)buffer;
328*22dc650dSSadaf Ebrahimi scb.ovector = ovector;
329*22dc650dSSadaf Ebrahimi 
330*22dc650dSSadaf Ebrahimi /* A NULL subject of zero length is treated as an empty string. */
331*22dc650dSSadaf Ebrahimi 
332*22dc650dSSadaf Ebrahimi if (subject == NULL)
333*22dc650dSSadaf Ebrahimi   {
334*22dc650dSSadaf Ebrahimi   if (length != 0) return PCRE2_ERROR_NULL;
335*22dc650dSSadaf Ebrahimi   subject = (PCRE2_SPTR)"";
336*22dc650dSSadaf Ebrahimi   }
337*22dc650dSSadaf Ebrahimi 
338*22dc650dSSadaf Ebrahimi /* Find length of zero-terminated subject */
339*22dc650dSSadaf Ebrahimi 
340*22dc650dSSadaf Ebrahimi if (length == PCRE2_ZERO_TERMINATED)
341*22dc650dSSadaf Ebrahimi   length = subject? PRIV(strlen)(subject) : 0;
342*22dc650dSSadaf Ebrahimi 
343*22dc650dSSadaf Ebrahimi /* Check UTF replacement string if necessary. */
344*22dc650dSSadaf Ebrahimi 
345*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
346*22dc650dSSadaf Ebrahimi if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
347*22dc650dSSadaf Ebrahimi   {
348*22dc650dSSadaf Ebrahimi   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
349*22dc650dSSadaf Ebrahimi   if (rc != 0)
350*22dc650dSSadaf Ebrahimi     {
351*22dc650dSSadaf Ebrahimi     match_data->leftchar = 0;
352*22dc650dSSadaf Ebrahimi     goto EXIT;
353*22dc650dSSadaf Ebrahimi     }
354*22dc650dSSadaf Ebrahimi   }
355*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
356*22dc650dSSadaf Ebrahimi 
357*22dc650dSSadaf Ebrahimi /* Save the substitute options and remove them from the match options. */
358*22dc650dSSadaf Ebrahimi 
359*22dc650dSSadaf Ebrahimi suboptions = options & SUBSTITUTE_OPTIONS;
360*22dc650dSSadaf Ebrahimi options &= ~SUBSTITUTE_OPTIONS;
361*22dc650dSSadaf Ebrahimi 
362*22dc650dSSadaf Ebrahimi /* Error if the start match offset is greater than the length of the subject. */
363*22dc650dSSadaf Ebrahimi 
364*22dc650dSSadaf Ebrahimi if (start_offset > length)
365*22dc650dSSadaf Ebrahimi   {
366*22dc650dSSadaf Ebrahimi   match_data->leftchar = 0;
367*22dc650dSSadaf Ebrahimi   rc = PCRE2_ERROR_BADOFFSET;
368*22dc650dSSadaf Ebrahimi   goto EXIT;
369*22dc650dSSadaf Ebrahimi   }
370*22dc650dSSadaf Ebrahimi 
371*22dc650dSSadaf Ebrahimi /* Copy up to the start offset, unless only the replacement is required. */
372*22dc650dSSadaf Ebrahimi 
373*22dc650dSSadaf Ebrahimi if (!replacement_only) CHECKMEMCPY(subject, start_offset);
374*22dc650dSSadaf Ebrahimi 
375*22dc650dSSadaf Ebrahimi /* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
376*22dc650dSSadaf Ebrahimi match is taken from the match_data that was passed in. */
377*22dc650dSSadaf Ebrahimi 
378*22dc650dSSadaf Ebrahimi subs = 0;
379*22dc650dSSadaf Ebrahimi do
380*22dc650dSSadaf Ebrahimi   {
381*22dc650dSSadaf Ebrahimi   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
382*22dc650dSSadaf Ebrahimi   uint32_t ptrstackptr = 0;
383*22dc650dSSadaf Ebrahimi 
384*22dc650dSSadaf Ebrahimi   if (use_existing_match)
385*22dc650dSSadaf Ebrahimi     {
386*22dc650dSSadaf Ebrahimi     rc = match_data->rc;
387*22dc650dSSadaf Ebrahimi     use_existing_match = FALSE;
388*22dc650dSSadaf Ebrahimi     }
389*22dc650dSSadaf Ebrahimi   else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
390*22dc650dSSadaf Ebrahimi     match_data, mcontext);
391*22dc650dSSadaf Ebrahimi 
392*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
393*22dc650dSSadaf Ebrahimi   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
394*22dc650dSSadaf Ebrahimi #endif
395*22dc650dSSadaf Ebrahimi 
396*22dc650dSSadaf Ebrahimi   /* Any error other than no match returns the error code. No match when not
397*22dc650dSSadaf Ebrahimi   doing the special after-empty-match global rematch, or when at the end of the
398*22dc650dSSadaf Ebrahimi   subject, breaks the global loop. Otherwise, advance the starting point by one
399*22dc650dSSadaf Ebrahimi   character, copying it to the output, and try again. */
400*22dc650dSSadaf Ebrahimi 
401*22dc650dSSadaf Ebrahimi   if (rc < 0)
402*22dc650dSSadaf Ebrahimi     {
403*22dc650dSSadaf Ebrahimi     PCRE2_SIZE save_start;
404*22dc650dSSadaf Ebrahimi 
405*22dc650dSSadaf Ebrahimi     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
406*22dc650dSSadaf Ebrahimi     if (goptions == 0 || start_offset >= length) break;
407*22dc650dSSadaf Ebrahimi 
408*22dc650dSSadaf Ebrahimi     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
409*22dc650dSSadaf Ebrahimi     we have advanced into the middle of it, advance one more code point. In
410*22dc650dSSadaf Ebrahimi     other words, do not start in the middle of CRLF, even if CR and LF on their
411*22dc650dSSadaf Ebrahimi     own are valid newlines. */
412*22dc650dSSadaf Ebrahimi 
413*22dc650dSSadaf Ebrahimi     save_start = start_offset++;
414*22dc650dSSadaf Ebrahimi     if (subject[start_offset-1] == CHAR_CR &&
415*22dc650dSSadaf Ebrahimi         code->newline_convention != PCRE2_NEWLINE_CR &&
416*22dc650dSSadaf Ebrahimi         code->newline_convention != PCRE2_NEWLINE_LF &&
417*22dc650dSSadaf Ebrahimi         start_offset < length &&
418*22dc650dSSadaf Ebrahimi         subject[start_offset] == CHAR_LF)
419*22dc650dSSadaf Ebrahimi       start_offset++;
420*22dc650dSSadaf Ebrahimi 
421*22dc650dSSadaf Ebrahimi     /* Otherwise, in UTF mode, advance past any secondary code points. */
422*22dc650dSSadaf Ebrahimi 
423*22dc650dSSadaf Ebrahimi     else if ((code->overall_options & PCRE2_UTF) != 0)
424*22dc650dSSadaf Ebrahimi       {
425*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
426*22dc650dSSadaf Ebrahimi       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
427*22dc650dSSadaf Ebrahimi         start_offset++;
428*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
429*22dc650dSSadaf Ebrahimi       while (start_offset < length &&
430*22dc650dSSadaf Ebrahimi             (subject[start_offset] & 0xfc00) == 0xdc00)
431*22dc650dSSadaf Ebrahimi         start_offset++;
432*22dc650dSSadaf Ebrahimi #endif
433*22dc650dSSadaf Ebrahimi       }
434*22dc650dSSadaf Ebrahimi 
435*22dc650dSSadaf Ebrahimi     /* Copy what we have advanced past (unless not required), reset the special
436*22dc650dSSadaf Ebrahimi     global options, and continue to the next match. */
437*22dc650dSSadaf Ebrahimi 
438*22dc650dSSadaf Ebrahimi     fraglength = start_offset - save_start;
439*22dc650dSSadaf Ebrahimi     if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
440*22dc650dSSadaf Ebrahimi     goptions = 0;
441*22dc650dSSadaf Ebrahimi     continue;
442*22dc650dSSadaf Ebrahimi     }
443*22dc650dSSadaf Ebrahimi 
444*22dc650dSSadaf Ebrahimi   /* Handle a successful match. Matches that use \K to end before they start
445*22dc650dSSadaf Ebrahimi   or start before the current point in the subject are not supported. */
446*22dc650dSSadaf Ebrahimi 
447*22dc650dSSadaf Ebrahimi   if (ovector[1] < ovector[0] || ovector[0] < start_offset)
448*22dc650dSSadaf Ebrahimi     {
449*22dc650dSSadaf Ebrahimi     rc = PCRE2_ERROR_BADSUBSPATTERN;
450*22dc650dSSadaf Ebrahimi     goto EXIT;
451*22dc650dSSadaf Ebrahimi     }
452*22dc650dSSadaf Ebrahimi 
453*22dc650dSSadaf Ebrahimi   /* Check for the same match as previous. This is legitimate after matching an
454*22dc650dSSadaf Ebrahimi   empty string that starts after the initial match offset. We have tried again
455*22dc650dSSadaf Ebrahimi   at the match point in case the pattern is one like /(?<=\G.)/ which can never
456*22dc650dSSadaf Ebrahimi   match at its starting point, so running the match achieves the bumpalong. If
457*22dc650dSSadaf Ebrahimi   we do get the same (null) match at the original match point, it isn't such a
458*22dc650dSSadaf Ebrahimi   pattern, so we now do the empty string magic. In all other cases, a repeat
459*22dc650dSSadaf Ebrahimi   match should never occur. */
460*22dc650dSSadaf Ebrahimi 
461*22dc650dSSadaf Ebrahimi   if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
462*22dc650dSSadaf Ebrahimi     {
463*22dc650dSSadaf Ebrahimi     if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
464*22dc650dSSadaf Ebrahimi       {
465*22dc650dSSadaf Ebrahimi       goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
466*22dc650dSSadaf Ebrahimi       ovecsave[2] = start_offset;
467*22dc650dSSadaf Ebrahimi       continue;    /* Back to the top of the loop */
468*22dc650dSSadaf Ebrahimi       }
469*22dc650dSSadaf Ebrahimi     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
470*22dc650dSSadaf Ebrahimi     goto EXIT;
471*22dc650dSSadaf Ebrahimi     }
472*22dc650dSSadaf Ebrahimi 
473*22dc650dSSadaf Ebrahimi   /* Count substitutions with a paranoid check for integer overflow; surely no
474*22dc650dSSadaf Ebrahimi   real call to this function would ever hit this! */
475*22dc650dSSadaf Ebrahimi 
476*22dc650dSSadaf Ebrahimi   if (subs == INT_MAX)
477*22dc650dSSadaf Ebrahimi     {
478*22dc650dSSadaf Ebrahimi     rc = PCRE2_ERROR_TOOMANYREPLACE;
479*22dc650dSSadaf Ebrahimi     goto EXIT;
480*22dc650dSSadaf Ebrahimi     }
481*22dc650dSSadaf Ebrahimi   subs++;
482*22dc650dSSadaf Ebrahimi 
483*22dc650dSSadaf Ebrahimi   /* Copy the text leading up to the match (unless not required), and remember
484*22dc650dSSadaf Ebrahimi   where the insert begins and how many ovector pairs are set. */
485*22dc650dSSadaf Ebrahimi 
486*22dc650dSSadaf Ebrahimi   if (rc == 0) rc = ovector_count;
487*22dc650dSSadaf Ebrahimi   fraglength = ovector[0] - start_offset;
488*22dc650dSSadaf Ebrahimi   if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
489*22dc650dSSadaf Ebrahimi   scb.output_offsets[0] = buff_offset;
490*22dc650dSSadaf Ebrahimi   scb.oveccount = rc;
491*22dc650dSSadaf Ebrahimi 
492*22dc650dSSadaf Ebrahimi   /* Process the replacement string. If the entire replacement is literal, just
493*22dc650dSSadaf Ebrahimi   copy it with length check. */
494*22dc650dSSadaf Ebrahimi 
495*22dc650dSSadaf Ebrahimi   ptr = replacement;
496*22dc650dSSadaf Ebrahimi   if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
497*22dc650dSSadaf Ebrahimi     {
498*22dc650dSSadaf Ebrahimi     CHECKMEMCPY(ptr, rlength);
499*22dc650dSSadaf Ebrahimi     }
500*22dc650dSSadaf Ebrahimi 
501*22dc650dSSadaf Ebrahimi   /* Within a non-literal replacement, which must be scanned character by
502*22dc650dSSadaf Ebrahimi   character, local literal mode can be set by \Q, but only in extended mode
503*22dc650dSSadaf Ebrahimi   when backslashes are being interpreted. In extended mode we must handle
504*22dc650dSSadaf Ebrahimi   nested substrings that are to be reprocessed. */
505*22dc650dSSadaf Ebrahimi 
506*22dc650dSSadaf Ebrahimi   else for (;;)
507*22dc650dSSadaf Ebrahimi     {
508*22dc650dSSadaf Ebrahimi     uint32_t ch;
509*22dc650dSSadaf Ebrahimi     unsigned int chlen;
510*22dc650dSSadaf Ebrahimi 
511*22dc650dSSadaf Ebrahimi     /* If at the end of a nested substring, pop the stack. */
512*22dc650dSSadaf Ebrahimi 
513*22dc650dSSadaf Ebrahimi     if (ptr >= repend)
514*22dc650dSSadaf Ebrahimi       {
515*22dc650dSSadaf Ebrahimi       if (ptrstackptr == 0) break;       /* End of replacement string */
516*22dc650dSSadaf Ebrahimi       repend = ptrstack[--ptrstackptr];
517*22dc650dSSadaf Ebrahimi       ptr = ptrstack[--ptrstackptr];
518*22dc650dSSadaf Ebrahimi       continue;
519*22dc650dSSadaf Ebrahimi       }
520*22dc650dSSadaf Ebrahimi 
521*22dc650dSSadaf Ebrahimi     /* Handle the next character */
522*22dc650dSSadaf Ebrahimi 
523*22dc650dSSadaf Ebrahimi     if (escaped_literal)
524*22dc650dSSadaf Ebrahimi       {
525*22dc650dSSadaf Ebrahimi       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
526*22dc650dSSadaf Ebrahimi         {
527*22dc650dSSadaf Ebrahimi         escaped_literal = FALSE;
528*22dc650dSSadaf Ebrahimi         ptr += 2;
529*22dc650dSSadaf Ebrahimi         continue;
530*22dc650dSSadaf Ebrahimi         }
531*22dc650dSSadaf Ebrahimi       goto LOADLITERAL;
532*22dc650dSSadaf Ebrahimi       }
533*22dc650dSSadaf Ebrahimi 
534*22dc650dSSadaf Ebrahimi     /* Not in literal mode. */
535*22dc650dSSadaf Ebrahimi 
536*22dc650dSSadaf Ebrahimi     if (*ptr == CHAR_DOLLAR_SIGN)
537*22dc650dSSadaf Ebrahimi       {
538*22dc650dSSadaf Ebrahimi       int group, n;
539*22dc650dSSadaf Ebrahimi       uint32_t special = 0;
540*22dc650dSSadaf Ebrahimi       BOOL inparens;
541*22dc650dSSadaf Ebrahimi       BOOL star;
542*22dc650dSSadaf Ebrahimi       PCRE2_SIZE sublength;
543*22dc650dSSadaf Ebrahimi       PCRE2_SPTR text1_start = NULL;
544*22dc650dSSadaf Ebrahimi       PCRE2_SPTR text1_end = NULL;
545*22dc650dSSadaf Ebrahimi       PCRE2_SPTR text2_start = NULL;
546*22dc650dSSadaf Ebrahimi       PCRE2_SPTR text2_end = NULL;
547*22dc650dSSadaf Ebrahimi       PCRE2_UCHAR next;
548*22dc650dSSadaf Ebrahimi       PCRE2_UCHAR name[33];
549*22dc650dSSadaf Ebrahimi 
550*22dc650dSSadaf Ebrahimi       if (++ptr >= repend) goto BAD;
551*22dc650dSSadaf Ebrahimi       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
552*22dc650dSSadaf Ebrahimi 
553*22dc650dSSadaf Ebrahimi       group = -1;
554*22dc650dSSadaf Ebrahimi       n = 0;
555*22dc650dSSadaf Ebrahimi       inparens = FALSE;
556*22dc650dSSadaf Ebrahimi       star = FALSE;
557*22dc650dSSadaf Ebrahimi 
558*22dc650dSSadaf Ebrahimi       if (next == CHAR_LEFT_CURLY_BRACKET)
559*22dc650dSSadaf Ebrahimi         {
560*22dc650dSSadaf Ebrahimi         if (++ptr >= repend) goto BAD;
561*22dc650dSSadaf Ebrahimi         next = *ptr;
562*22dc650dSSadaf Ebrahimi         inparens = TRUE;
563*22dc650dSSadaf Ebrahimi         }
564*22dc650dSSadaf Ebrahimi 
565*22dc650dSSadaf Ebrahimi       if (next == CHAR_ASTERISK)
566*22dc650dSSadaf Ebrahimi         {
567*22dc650dSSadaf Ebrahimi         if (++ptr >= repend) goto BAD;
568*22dc650dSSadaf Ebrahimi         next = *ptr;
569*22dc650dSSadaf Ebrahimi         star = TRUE;
570*22dc650dSSadaf Ebrahimi         }
571*22dc650dSSadaf Ebrahimi 
572*22dc650dSSadaf Ebrahimi       if (!star && next >= CHAR_0 && next <= CHAR_9)
573*22dc650dSSadaf Ebrahimi         {
574*22dc650dSSadaf Ebrahimi         group = next - CHAR_0;
575*22dc650dSSadaf Ebrahimi         while (++ptr < repend)
576*22dc650dSSadaf Ebrahimi           {
577*22dc650dSSadaf Ebrahimi           next = *ptr;
578*22dc650dSSadaf Ebrahimi           if (next < CHAR_0 || next > CHAR_9) break;
579*22dc650dSSadaf Ebrahimi           group = group * 10 + next - CHAR_0;
580*22dc650dSSadaf Ebrahimi 
581*22dc650dSSadaf Ebrahimi           /* A check for a number greater than the hightest captured group
582*22dc650dSSadaf Ebrahimi           is sufficient here; no need for a separate overflow check. If unknown
583*22dc650dSSadaf Ebrahimi           groups are to be treated as unset, just skip over any remaining
584*22dc650dSSadaf Ebrahimi           digits and carry on. */
585*22dc650dSSadaf Ebrahimi 
586*22dc650dSSadaf Ebrahimi           if (group > code->top_bracket)
587*22dc650dSSadaf Ebrahimi             {
588*22dc650dSSadaf Ebrahimi             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
589*22dc650dSSadaf Ebrahimi               {
590*22dc650dSSadaf Ebrahimi               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
591*22dc650dSSadaf Ebrahimi               break;
592*22dc650dSSadaf Ebrahimi               }
593*22dc650dSSadaf Ebrahimi             else
594*22dc650dSSadaf Ebrahimi               {
595*22dc650dSSadaf Ebrahimi               rc = PCRE2_ERROR_NOSUBSTRING;
596*22dc650dSSadaf Ebrahimi               goto PTREXIT;
597*22dc650dSSadaf Ebrahimi               }
598*22dc650dSSadaf Ebrahimi             }
599*22dc650dSSadaf Ebrahimi           }
600*22dc650dSSadaf Ebrahimi         }
601*22dc650dSSadaf Ebrahimi       else
602*22dc650dSSadaf Ebrahimi         {
603*22dc650dSSadaf Ebrahimi         const uint8_t *ctypes = code->tables + ctypes_offset;
604*22dc650dSSadaf Ebrahimi         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
605*22dc650dSSadaf Ebrahimi           {
606*22dc650dSSadaf Ebrahimi           name[n++] = next;
607*22dc650dSSadaf Ebrahimi           if (n > 32) goto BAD;
608*22dc650dSSadaf Ebrahimi           if (++ptr >= repend) break;
609*22dc650dSSadaf Ebrahimi           next = *ptr;
610*22dc650dSSadaf Ebrahimi           }
611*22dc650dSSadaf Ebrahimi         if (n == 0) goto BAD;
612*22dc650dSSadaf Ebrahimi         name[n] = 0;
613*22dc650dSSadaf Ebrahimi         }
614*22dc650dSSadaf Ebrahimi 
615*22dc650dSSadaf Ebrahimi       /* In extended mode we recognize ${name:+set text:unset text} and
616*22dc650dSSadaf Ebrahimi       ${name:-default text}. */
617*22dc650dSSadaf Ebrahimi 
618*22dc650dSSadaf Ebrahimi       if (inparens)
619*22dc650dSSadaf Ebrahimi         {
620*22dc650dSSadaf Ebrahimi         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
621*22dc650dSSadaf Ebrahimi              !star && ptr < repend - 2 && next == CHAR_COLON)
622*22dc650dSSadaf Ebrahimi           {
623*22dc650dSSadaf Ebrahimi           special = *(++ptr);
624*22dc650dSSadaf Ebrahimi           if (special != CHAR_PLUS && special != CHAR_MINUS)
625*22dc650dSSadaf Ebrahimi             {
626*22dc650dSSadaf Ebrahimi             rc = PCRE2_ERROR_BADSUBSTITUTION;
627*22dc650dSSadaf Ebrahimi             goto PTREXIT;
628*22dc650dSSadaf Ebrahimi             }
629*22dc650dSSadaf Ebrahimi 
630*22dc650dSSadaf Ebrahimi           text1_start = ++ptr;
631*22dc650dSSadaf Ebrahimi           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
632*22dc650dSSadaf Ebrahimi           if (rc != 0) goto PTREXIT;
633*22dc650dSSadaf Ebrahimi           text1_end = ptr;
634*22dc650dSSadaf Ebrahimi 
635*22dc650dSSadaf Ebrahimi           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
636*22dc650dSSadaf Ebrahimi             {
637*22dc650dSSadaf Ebrahimi             text2_start = ++ptr;
638*22dc650dSSadaf Ebrahimi             rc = find_text_end(code, &ptr, repend, TRUE);
639*22dc650dSSadaf Ebrahimi             if (rc != 0) goto PTREXIT;
640*22dc650dSSadaf Ebrahimi             text2_end = ptr;
641*22dc650dSSadaf Ebrahimi             }
642*22dc650dSSadaf Ebrahimi           }
643*22dc650dSSadaf Ebrahimi 
644*22dc650dSSadaf Ebrahimi         else
645*22dc650dSSadaf Ebrahimi           {
646*22dc650dSSadaf Ebrahimi           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
647*22dc650dSSadaf Ebrahimi             {
648*22dc650dSSadaf Ebrahimi             rc = PCRE2_ERROR_REPMISSINGBRACE;
649*22dc650dSSadaf Ebrahimi             goto PTREXIT;
650*22dc650dSSadaf Ebrahimi             }
651*22dc650dSSadaf Ebrahimi           }
652*22dc650dSSadaf Ebrahimi 
653*22dc650dSSadaf Ebrahimi         ptr++;
654*22dc650dSSadaf Ebrahimi         }
655*22dc650dSSadaf Ebrahimi 
656*22dc650dSSadaf Ebrahimi       /* Have found a syntactically correct group number or name, or *name.
657*22dc650dSSadaf Ebrahimi       Only *MARK is currently recognized. */
658*22dc650dSSadaf Ebrahimi 
659*22dc650dSSadaf Ebrahimi       if (star)
660*22dc650dSSadaf Ebrahimi         {
661*22dc650dSSadaf Ebrahimi         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
662*22dc650dSSadaf Ebrahimi           {
663*22dc650dSSadaf Ebrahimi           PCRE2_SPTR mark = pcre2_get_mark(match_data);
664*22dc650dSSadaf Ebrahimi           if (mark != NULL)
665*22dc650dSSadaf Ebrahimi             {
666*22dc650dSSadaf Ebrahimi             PCRE2_SPTR mark_start = mark;
667*22dc650dSSadaf Ebrahimi             while (*mark != 0) mark++;
668*22dc650dSSadaf Ebrahimi             fraglength = mark - mark_start;
669*22dc650dSSadaf Ebrahimi             CHECKMEMCPY(mark_start, fraglength);
670*22dc650dSSadaf Ebrahimi             }
671*22dc650dSSadaf Ebrahimi           }
672*22dc650dSSadaf Ebrahimi         else goto BAD;
673*22dc650dSSadaf Ebrahimi         }
674*22dc650dSSadaf Ebrahimi 
675*22dc650dSSadaf Ebrahimi       /* Substitute the contents of a group. We don't use substring_copy
676*22dc650dSSadaf Ebrahimi       functions any more, in order to support case forcing. */
677*22dc650dSSadaf Ebrahimi 
678*22dc650dSSadaf Ebrahimi       else
679*22dc650dSSadaf Ebrahimi         {
680*22dc650dSSadaf Ebrahimi         PCRE2_SPTR subptr, subptrend;
681*22dc650dSSadaf Ebrahimi 
682*22dc650dSSadaf Ebrahimi         /* Find a number for a named group. In case there are duplicate names,
683*22dc650dSSadaf Ebrahimi         search for the first one that is set. If the name is not found when
684*22dc650dSSadaf Ebrahimi         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
685*22dc650dSSadaf Ebrahimi         non-existent group. */
686*22dc650dSSadaf Ebrahimi 
687*22dc650dSSadaf Ebrahimi         if (group < 0)
688*22dc650dSSadaf Ebrahimi           {
689*22dc650dSSadaf Ebrahimi           PCRE2_SPTR first, last, entry;
690*22dc650dSSadaf Ebrahimi           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
691*22dc650dSSadaf Ebrahimi           if (rc == PCRE2_ERROR_NOSUBSTRING &&
692*22dc650dSSadaf Ebrahimi               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
693*22dc650dSSadaf Ebrahimi             {
694*22dc650dSSadaf Ebrahimi             group = code->top_bracket + 1;
695*22dc650dSSadaf Ebrahimi             }
696*22dc650dSSadaf Ebrahimi           else
697*22dc650dSSadaf Ebrahimi             {
698*22dc650dSSadaf Ebrahimi             if (rc < 0) goto PTREXIT;
699*22dc650dSSadaf Ebrahimi             for (entry = first; entry <= last; entry += rc)
700*22dc650dSSadaf Ebrahimi               {
701*22dc650dSSadaf Ebrahimi               uint32_t ng = GET2(entry, 0);
702*22dc650dSSadaf Ebrahimi               if (ng < ovector_count)
703*22dc650dSSadaf Ebrahimi                 {
704*22dc650dSSadaf Ebrahimi                 if (group < 0) group = ng;          /* First in ovector */
705*22dc650dSSadaf Ebrahimi                 if (ovector[ng*2] != PCRE2_UNSET)
706*22dc650dSSadaf Ebrahimi                   {
707*22dc650dSSadaf Ebrahimi                   group = ng;                       /* First that is set */
708*22dc650dSSadaf Ebrahimi                   break;
709*22dc650dSSadaf Ebrahimi                   }
710*22dc650dSSadaf Ebrahimi                 }
711*22dc650dSSadaf Ebrahimi               }
712*22dc650dSSadaf Ebrahimi 
713*22dc650dSSadaf Ebrahimi             /* If group is still negative, it means we did not find a group
714*22dc650dSSadaf Ebrahimi             that is in the ovector. Just set the first group. */
715*22dc650dSSadaf Ebrahimi 
716*22dc650dSSadaf Ebrahimi             if (group < 0) group = GET2(first, 0);
717*22dc650dSSadaf Ebrahimi             }
718*22dc650dSSadaf Ebrahimi           }
719*22dc650dSSadaf Ebrahimi 
720*22dc650dSSadaf Ebrahimi         /* We now have a group that is identified by number. Find the length of
721*22dc650dSSadaf Ebrahimi         the captured string. If a group in a non-special substitution is unset
722*22dc650dSSadaf Ebrahimi         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
723*22dc650dSSadaf Ebrahimi 
724*22dc650dSSadaf Ebrahimi         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
725*22dc650dSSadaf Ebrahimi         if (rc < 0)
726*22dc650dSSadaf Ebrahimi           {
727*22dc650dSSadaf Ebrahimi           if (rc == PCRE2_ERROR_NOSUBSTRING &&
728*22dc650dSSadaf Ebrahimi               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
729*22dc650dSSadaf Ebrahimi             {
730*22dc650dSSadaf Ebrahimi             rc = PCRE2_ERROR_UNSET;
731*22dc650dSSadaf Ebrahimi             }
732*22dc650dSSadaf Ebrahimi           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
733*22dc650dSSadaf Ebrahimi           if (special == 0)                           /* Plain substitution */
734*22dc650dSSadaf Ebrahimi             {
735*22dc650dSSadaf Ebrahimi             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
736*22dc650dSSadaf Ebrahimi             goto PTREXIT;                             /* Else error */
737*22dc650dSSadaf Ebrahimi             }
738*22dc650dSSadaf Ebrahimi           }
739*22dc650dSSadaf Ebrahimi 
740*22dc650dSSadaf Ebrahimi         /* If special is '+' we have a 'set' and possibly an 'unset' text,
741*22dc650dSSadaf Ebrahimi         both of which are reprocessed when used. If special is '-' we have a
742*22dc650dSSadaf Ebrahimi         default text for when the group is unset; it must be reprocessed. */
743*22dc650dSSadaf Ebrahimi 
744*22dc650dSSadaf Ebrahimi         if (special != 0)
745*22dc650dSSadaf Ebrahimi           {
746*22dc650dSSadaf Ebrahimi           if (special == CHAR_MINUS)
747*22dc650dSSadaf Ebrahimi             {
748*22dc650dSSadaf Ebrahimi             if (rc == 0) goto LITERAL_SUBSTITUTE;
749*22dc650dSSadaf Ebrahimi             text2_start = text1_start;
750*22dc650dSSadaf Ebrahimi             text2_end = text1_end;
751*22dc650dSSadaf Ebrahimi             }
752*22dc650dSSadaf Ebrahimi 
753*22dc650dSSadaf Ebrahimi           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
754*22dc650dSSadaf Ebrahimi           ptrstack[ptrstackptr++] = ptr;
755*22dc650dSSadaf Ebrahimi           ptrstack[ptrstackptr++] = repend;
756*22dc650dSSadaf Ebrahimi 
757*22dc650dSSadaf Ebrahimi           if (rc == 0)
758*22dc650dSSadaf Ebrahimi             {
759*22dc650dSSadaf Ebrahimi             ptr = text1_start;
760*22dc650dSSadaf Ebrahimi             repend = text1_end;
761*22dc650dSSadaf Ebrahimi             }
762*22dc650dSSadaf Ebrahimi           else
763*22dc650dSSadaf Ebrahimi             {
764*22dc650dSSadaf Ebrahimi             ptr = text2_start;
765*22dc650dSSadaf Ebrahimi             repend = text2_end;
766*22dc650dSSadaf Ebrahimi             }
767*22dc650dSSadaf Ebrahimi           continue;
768*22dc650dSSadaf Ebrahimi           }
769*22dc650dSSadaf Ebrahimi 
770*22dc650dSSadaf Ebrahimi         /* Otherwise we have a literal substitution of a group's contents. */
771*22dc650dSSadaf Ebrahimi 
772*22dc650dSSadaf Ebrahimi         LITERAL_SUBSTITUTE:
773*22dc650dSSadaf Ebrahimi         subptr = subject + ovector[group*2];
774*22dc650dSSadaf Ebrahimi         subptrend = subject + ovector[group*2 + 1];
775*22dc650dSSadaf Ebrahimi 
776*22dc650dSSadaf Ebrahimi         /* Substitute a literal string, possibly forcing alphabetic case. */
777*22dc650dSSadaf Ebrahimi 
778*22dc650dSSadaf Ebrahimi         while (subptr < subptrend)
779*22dc650dSSadaf Ebrahimi           {
780*22dc650dSSadaf Ebrahimi           GETCHARINCTEST(ch, subptr);
781*22dc650dSSadaf Ebrahimi           if (forcecase != 0)
782*22dc650dSSadaf Ebrahimi             {
783*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
784*22dc650dSSadaf Ebrahimi             if (utf || ucp)
785*22dc650dSSadaf Ebrahimi               {
786*22dc650dSSadaf Ebrahimi               uint32_t type = UCD_CHARTYPE(ch);
787*22dc650dSSadaf Ebrahimi               if (PRIV(ucp_gentype)[type] == ucp_L &&
788*22dc650dSSadaf Ebrahimi                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
789*22dc650dSSadaf Ebrahimi                 ch = UCD_OTHERCASE(ch);
790*22dc650dSSadaf Ebrahimi               }
791*22dc650dSSadaf Ebrahimi             else
792*22dc650dSSadaf Ebrahimi #endif
793*22dc650dSSadaf Ebrahimi               {
794*22dc650dSSadaf Ebrahimi               if (((code->tables + cbits_offset +
795*22dc650dSSadaf Ebrahimi                   ((forcecase > 0)? cbit_upper:cbit_lower)
796*22dc650dSSadaf Ebrahimi                   )[ch/8] & (1u << (ch%8))) == 0)
797*22dc650dSSadaf Ebrahimi                 ch = (code->tables + fcc_offset)[ch];
798*22dc650dSSadaf Ebrahimi               }
799*22dc650dSSadaf Ebrahimi             forcecase = forcecasereset;
800*22dc650dSSadaf Ebrahimi             }
801*22dc650dSSadaf Ebrahimi 
802*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
803*22dc650dSSadaf Ebrahimi           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
804*22dc650dSSadaf Ebrahimi #endif
805*22dc650dSSadaf Ebrahimi             {
806*22dc650dSSadaf Ebrahimi             temp[0] = ch;
807*22dc650dSSadaf Ebrahimi             chlen = 1;
808*22dc650dSSadaf Ebrahimi             }
809*22dc650dSSadaf Ebrahimi           CHECKMEMCPY(temp, chlen);
810*22dc650dSSadaf Ebrahimi           }
811*22dc650dSSadaf Ebrahimi         }
812*22dc650dSSadaf Ebrahimi       }
813*22dc650dSSadaf Ebrahimi 
814*22dc650dSSadaf Ebrahimi     /* Handle an escape sequence in extended mode. We can use check_escape()
815*22dc650dSSadaf Ebrahimi     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
816*22dc650dSSadaf Ebrahimi     the case-forcing escapes are not supported in pcre2_compile() so must be
817*22dc650dSSadaf Ebrahimi     recognized here. */
818*22dc650dSSadaf Ebrahimi 
819*22dc650dSSadaf Ebrahimi     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
820*22dc650dSSadaf Ebrahimi               *ptr == CHAR_BACKSLASH)
821*22dc650dSSadaf Ebrahimi       {
822*22dc650dSSadaf Ebrahimi       int errorcode;
823*22dc650dSSadaf Ebrahimi 
824*22dc650dSSadaf Ebrahimi       if (ptr < repend - 1) switch (ptr[1])
825*22dc650dSSadaf Ebrahimi         {
826*22dc650dSSadaf Ebrahimi         case CHAR_L:
827*22dc650dSSadaf Ebrahimi         forcecase = forcecasereset = -1;
828*22dc650dSSadaf Ebrahimi         ptr += 2;
829*22dc650dSSadaf Ebrahimi         continue;
830*22dc650dSSadaf Ebrahimi 
831*22dc650dSSadaf Ebrahimi         case CHAR_l:
832*22dc650dSSadaf Ebrahimi         forcecase = -1;
833*22dc650dSSadaf Ebrahimi         forcecasereset = 0;
834*22dc650dSSadaf Ebrahimi         ptr += 2;
835*22dc650dSSadaf Ebrahimi         continue;
836*22dc650dSSadaf Ebrahimi 
837*22dc650dSSadaf Ebrahimi         case CHAR_U:
838*22dc650dSSadaf Ebrahimi         forcecase = forcecasereset = 1;
839*22dc650dSSadaf Ebrahimi         ptr += 2;
840*22dc650dSSadaf Ebrahimi         continue;
841*22dc650dSSadaf Ebrahimi 
842*22dc650dSSadaf Ebrahimi         case CHAR_u:
843*22dc650dSSadaf Ebrahimi         forcecase = 1;
844*22dc650dSSadaf Ebrahimi         forcecasereset = 0;
845*22dc650dSSadaf Ebrahimi         ptr += 2;
846*22dc650dSSadaf Ebrahimi         continue;
847*22dc650dSSadaf Ebrahimi 
848*22dc650dSSadaf Ebrahimi         default:
849*22dc650dSSadaf Ebrahimi         break;
850*22dc650dSSadaf Ebrahimi         }
851*22dc650dSSadaf Ebrahimi 
852*22dc650dSSadaf Ebrahimi       ptr++;  /* Point after \ */
853*22dc650dSSadaf Ebrahimi       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
854*22dc650dSSadaf Ebrahimi         code->overall_options, code->extra_options, FALSE, NULL);
855*22dc650dSSadaf Ebrahimi       if (errorcode != 0) goto BADESCAPE;
856*22dc650dSSadaf Ebrahimi 
857*22dc650dSSadaf Ebrahimi       switch(rc)
858*22dc650dSSadaf Ebrahimi         {
859*22dc650dSSadaf Ebrahimi         case ESC_E:
860*22dc650dSSadaf Ebrahimi         forcecase = forcecasereset = 0;
861*22dc650dSSadaf Ebrahimi         continue;
862*22dc650dSSadaf Ebrahimi 
863*22dc650dSSadaf Ebrahimi         case ESC_Q:
864*22dc650dSSadaf Ebrahimi         escaped_literal = TRUE;
865*22dc650dSSadaf Ebrahimi         continue;
866*22dc650dSSadaf Ebrahimi 
867*22dc650dSSadaf Ebrahimi         case 0:      /* Data character */
868*22dc650dSSadaf Ebrahimi         goto LITERAL;
869*22dc650dSSadaf Ebrahimi 
870*22dc650dSSadaf Ebrahimi         default:
871*22dc650dSSadaf Ebrahimi         goto BADESCAPE;
872*22dc650dSSadaf Ebrahimi         }
873*22dc650dSSadaf Ebrahimi       }
874*22dc650dSSadaf Ebrahimi 
875*22dc650dSSadaf Ebrahimi     /* Handle a literal code unit */
876*22dc650dSSadaf Ebrahimi 
877*22dc650dSSadaf Ebrahimi     else
878*22dc650dSSadaf Ebrahimi       {
879*22dc650dSSadaf Ebrahimi       LOADLITERAL:
880*22dc650dSSadaf Ebrahimi       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
881*22dc650dSSadaf Ebrahimi 
882*22dc650dSSadaf Ebrahimi       LITERAL:
883*22dc650dSSadaf Ebrahimi       if (forcecase != 0)
884*22dc650dSSadaf Ebrahimi         {
885*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
886*22dc650dSSadaf Ebrahimi         if (utf || ucp)
887*22dc650dSSadaf Ebrahimi           {
888*22dc650dSSadaf Ebrahimi           uint32_t type = UCD_CHARTYPE(ch);
889*22dc650dSSadaf Ebrahimi           if (PRIV(ucp_gentype)[type] == ucp_L &&
890*22dc650dSSadaf Ebrahimi               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
891*22dc650dSSadaf Ebrahimi             ch = UCD_OTHERCASE(ch);
892*22dc650dSSadaf Ebrahimi           }
893*22dc650dSSadaf Ebrahimi         else
894*22dc650dSSadaf Ebrahimi #endif
895*22dc650dSSadaf Ebrahimi           {
896*22dc650dSSadaf Ebrahimi           if (((code->tables + cbits_offset +
897*22dc650dSSadaf Ebrahimi               ((forcecase > 0)? cbit_upper:cbit_lower)
898*22dc650dSSadaf Ebrahimi               )[ch/8] & (1u << (ch%8))) == 0)
899*22dc650dSSadaf Ebrahimi             ch = (code->tables + fcc_offset)[ch];
900*22dc650dSSadaf Ebrahimi           }
901*22dc650dSSadaf Ebrahimi         forcecase = forcecasereset;
902*22dc650dSSadaf Ebrahimi         }
903*22dc650dSSadaf Ebrahimi 
904*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
905*22dc650dSSadaf Ebrahimi       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
906*22dc650dSSadaf Ebrahimi #endif
907*22dc650dSSadaf Ebrahimi         {
908*22dc650dSSadaf Ebrahimi         temp[0] = ch;
909*22dc650dSSadaf Ebrahimi         chlen = 1;
910*22dc650dSSadaf Ebrahimi         }
911*22dc650dSSadaf Ebrahimi       CHECKMEMCPY(temp, chlen);
912*22dc650dSSadaf Ebrahimi       } /* End handling a literal code unit */
913*22dc650dSSadaf Ebrahimi     }   /* End of loop for scanning the replacement. */
914*22dc650dSSadaf Ebrahimi 
915*22dc650dSSadaf Ebrahimi   /* The replacement has been copied to the output, or its size has been
916*22dc650dSSadaf Ebrahimi   remembered. Do the callout if there is one and we have done an actual
917*22dc650dSSadaf Ebrahimi   replacement. */
918*22dc650dSSadaf Ebrahimi 
919*22dc650dSSadaf Ebrahimi   if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
920*22dc650dSSadaf Ebrahimi     {
921*22dc650dSSadaf Ebrahimi     scb.subscount = subs;
922*22dc650dSSadaf Ebrahimi     scb.output_offsets[1] = buff_offset;
923*22dc650dSSadaf Ebrahimi     rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
924*22dc650dSSadaf Ebrahimi 
925*22dc650dSSadaf Ebrahimi     /* A non-zero return means cancel this substitution. Instead, copy the
926*22dc650dSSadaf Ebrahimi     matched string fragment. */
927*22dc650dSSadaf Ebrahimi 
928*22dc650dSSadaf Ebrahimi     if (rc != 0)
929*22dc650dSSadaf Ebrahimi       {
930*22dc650dSSadaf Ebrahimi       PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
931*22dc650dSSadaf Ebrahimi       PCRE2_SIZE oldlength = ovector[1] - ovector[0];
932*22dc650dSSadaf Ebrahimi 
933*22dc650dSSadaf Ebrahimi       buff_offset -= newlength;
934*22dc650dSSadaf Ebrahimi       lengthleft += newlength;
935*22dc650dSSadaf Ebrahimi       if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
936*22dc650dSSadaf Ebrahimi 
937*22dc650dSSadaf Ebrahimi       /* A negative return means do not do any more. */
938*22dc650dSSadaf Ebrahimi 
939*22dc650dSSadaf Ebrahimi       if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
940*22dc650dSSadaf Ebrahimi       }
941*22dc650dSSadaf Ebrahimi     }
942*22dc650dSSadaf Ebrahimi 
943*22dc650dSSadaf Ebrahimi   /* Save the details of this match. See above for how this data is used. If we
944*22dc650dSSadaf Ebrahimi   matched an empty string, do the magic for global matches. Update the start
945*22dc650dSSadaf Ebrahimi   offset to point to the rest of the subject string. If we re-used an existing
946*22dc650dSSadaf Ebrahimi   match for the first match, switch to the internal match data block. */
947*22dc650dSSadaf Ebrahimi 
948*22dc650dSSadaf Ebrahimi   ovecsave[0] = ovector[0];
949*22dc650dSSadaf Ebrahimi   ovecsave[1] = ovector[1];
950*22dc650dSSadaf Ebrahimi   ovecsave[2] = start_offset;
951*22dc650dSSadaf Ebrahimi 
952*22dc650dSSadaf Ebrahimi   goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
953*22dc650dSSadaf Ebrahimi     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
954*22dc650dSSadaf Ebrahimi   start_offset = ovector[1];
955*22dc650dSSadaf Ebrahimi   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
956*22dc650dSSadaf Ebrahimi 
957*22dc650dSSadaf Ebrahimi /* Copy the rest of the subject unless not required, and terminate the output
958*22dc650dSSadaf Ebrahimi with a binary zero. */
959*22dc650dSSadaf Ebrahimi 
960*22dc650dSSadaf Ebrahimi if (!replacement_only)
961*22dc650dSSadaf Ebrahimi   {
962*22dc650dSSadaf Ebrahimi   fraglength = length - start_offset;
963*22dc650dSSadaf Ebrahimi   CHECKMEMCPY(subject + start_offset, fraglength);
964*22dc650dSSadaf Ebrahimi   }
965*22dc650dSSadaf Ebrahimi 
966*22dc650dSSadaf Ebrahimi temp[0] = 0;
967*22dc650dSSadaf Ebrahimi CHECKMEMCPY(temp, 1);
968*22dc650dSSadaf Ebrahimi 
969*22dc650dSSadaf Ebrahimi /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
970*22dc650dSSadaf Ebrahimi and matching has carried on after a full buffer, in order to compute the length
971*22dc650dSSadaf Ebrahimi needed. Otherwise, an overflow generates an immediate error return. */
972*22dc650dSSadaf Ebrahimi 
973*22dc650dSSadaf Ebrahimi if (overflowed)
974*22dc650dSSadaf Ebrahimi   {
975*22dc650dSSadaf Ebrahimi   rc = PCRE2_ERROR_NOMEMORY;
976*22dc650dSSadaf Ebrahimi   *blength = buff_length + extra_needed;
977*22dc650dSSadaf Ebrahimi   }
978*22dc650dSSadaf Ebrahimi 
979*22dc650dSSadaf Ebrahimi /* After a successful execution, return the number of substitutions and set the
980*22dc650dSSadaf Ebrahimi length of buffer used, excluding the trailing zero. */
981*22dc650dSSadaf Ebrahimi 
982*22dc650dSSadaf Ebrahimi else
983*22dc650dSSadaf Ebrahimi   {
984*22dc650dSSadaf Ebrahimi   rc = subs;
985*22dc650dSSadaf Ebrahimi   *blength = buff_offset - 1;
986*22dc650dSSadaf Ebrahimi   }
987*22dc650dSSadaf Ebrahimi 
988*22dc650dSSadaf Ebrahimi EXIT:
989*22dc650dSSadaf Ebrahimi if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
990*22dc650dSSadaf Ebrahimi   else match_data->rc = rc;
991*22dc650dSSadaf Ebrahimi return rc;
992*22dc650dSSadaf Ebrahimi 
993*22dc650dSSadaf Ebrahimi NOROOM:
994*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_NOMEMORY;
995*22dc650dSSadaf Ebrahimi goto EXIT;
996*22dc650dSSadaf Ebrahimi 
997*22dc650dSSadaf Ebrahimi BAD:
998*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADREPLACEMENT;
999*22dc650dSSadaf Ebrahimi goto PTREXIT;
1000*22dc650dSSadaf Ebrahimi 
1001*22dc650dSSadaf Ebrahimi BADESCAPE:
1002*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADREPESCAPE;
1003*22dc650dSSadaf Ebrahimi 
1004*22dc650dSSadaf Ebrahimi PTREXIT:
1005*22dc650dSSadaf Ebrahimi *blength = (PCRE2_SIZE)(ptr - replacement);
1006*22dc650dSSadaf Ebrahimi goto EXIT;
1007*22dc650dSSadaf Ebrahimi }
1008*22dc650dSSadaf Ebrahimi 
1009*22dc650dSSadaf Ebrahimi /* End of pcre2_substitute.c */
1010