1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2022 University of Cambridge
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
43*22dc650dSSadaf Ebrahimi #include "config.h"
44*22dc650dSSadaf Ebrahimi #endif
45*22dc650dSSadaf Ebrahimi
46*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
47*22dc650dSSadaf Ebrahimi
48*22dc650dSSadaf Ebrahimi #define PTR_STACK_SIZE 20
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimi #define SUBSTITUTE_OPTIONS \
51*22dc650dSSadaf Ebrahimi (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52*22dc650dSSadaf Ebrahimi PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
53*22dc650dSSadaf Ebrahimi PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
54*22dc650dSSadaf Ebrahimi PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55*22dc650dSSadaf Ebrahimi
56*22dc650dSSadaf Ebrahimi
57*22dc650dSSadaf Ebrahimi
58*22dc650dSSadaf Ebrahimi /*************************************************
59*22dc650dSSadaf Ebrahimi * Find end of substitute text *
60*22dc650dSSadaf Ebrahimi *************************************************/
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi /* In extended mode, we recognize ${name:+set text:unset text} and similar
63*22dc650dSSadaf Ebrahimi constructions. This requires the identification of unescaped : and }
64*22dc650dSSadaf Ebrahimi characters. This function scans for such. It must deal with nested ${
65*22dc650dSSadaf Ebrahimi constructions. The pointer to the text is updated, either to the required end
66*22dc650dSSadaf Ebrahimi character, or to where an error was detected.
67*22dc650dSSadaf Ebrahimi
68*22dc650dSSadaf Ebrahimi Arguments:
69*22dc650dSSadaf Ebrahimi code points to the compiled expression (for options)
70*22dc650dSSadaf Ebrahimi ptrptr points to the pointer to the start of the text (updated)
71*22dc650dSSadaf Ebrahimi ptrend end of the whole string
72*22dc650dSSadaf Ebrahimi last TRUE if the last expected string (only } recognized)
73*22dc650dSSadaf Ebrahimi
74*22dc650dSSadaf Ebrahimi Returns: 0 on success
75*22dc650dSSadaf Ebrahimi negative error code on failure
76*22dc650dSSadaf Ebrahimi */
77*22dc650dSSadaf Ebrahimi
78*22dc650dSSadaf Ebrahimi static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)79*22dc650dSSadaf Ebrahimi find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
80*22dc650dSSadaf Ebrahimi BOOL last)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi int rc = 0;
83*22dc650dSSadaf Ebrahimi uint32_t nestlevel = 0;
84*22dc650dSSadaf Ebrahimi BOOL literal = FALSE;
85*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptr = *ptrptr;
86*22dc650dSSadaf Ebrahimi
87*22dc650dSSadaf Ebrahimi for (; ptr < ptrend; ptr++)
88*22dc650dSSadaf Ebrahimi {
89*22dc650dSSadaf Ebrahimi if (literal)
90*22dc650dSSadaf Ebrahimi {
91*22dc650dSSadaf Ebrahimi if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
92*22dc650dSSadaf Ebrahimi {
93*22dc650dSSadaf Ebrahimi literal = FALSE;
94*22dc650dSSadaf Ebrahimi ptr += 1;
95*22dc650dSSadaf Ebrahimi }
96*22dc650dSSadaf Ebrahimi }
97*22dc650dSSadaf Ebrahimi
98*22dc650dSSadaf Ebrahimi else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99*22dc650dSSadaf Ebrahimi {
100*22dc650dSSadaf Ebrahimi if (nestlevel == 0) goto EXIT;
101*22dc650dSSadaf Ebrahimi nestlevel--;
102*22dc650dSSadaf Ebrahimi }
103*22dc650dSSadaf Ebrahimi
104*22dc650dSSadaf Ebrahimi else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
105*22dc650dSSadaf Ebrahimi
106*22dc650dSSadaf Ebrahimi else if (*ptr == CHAR_DOLLAR_SIGN)
107*22dc650dSSadaf Ebrahimi {
108*22dc650dSSadaf Ebrahimi if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
109*22dc650dSSadaf Ebrahimi {
110*22dc650dSSadaf Ebrahimi nestlevel++;
111*22dc650dSSadaf Ebrahimi ptr += 1;
112*22dc650dSSadaf Ebrahimi }
113*22dc650dSSadaf Ebrahimi }
114*22dc650dSSadaf Ebrahimi
115*22dc650dSSadaf Ebrahimi else if (*ptr == CHAR_BACKSLASH)
116*22dc650dSSadaf Ebrahimi {
117*22dc650dSSadaf Ebrahimi int erc;
118*22dc650dSSadaf Ebrahimi int errorcode;
119*22dc650dSSadaf Ebrahimi uint32_t ch;
120*22dc650dSSadaf Ebrahimi
121*22dc650dSSadaf Ebrahimi if (ptr < ptrend - 1) switch (ptr[1])
122*22dc650dSSadaf Ebrahimi {
123*22dc650dSSadaf Ebrahimi case CHAR_L:
124*22dc650dSSadaf Ebrahimi case CHAR_l:
125*22dc650dSSadaf Ebrahimi case CHAR_U:
126*22dc650dSSadaf Ebrahimi case CHAR_u:
127*22dc650dSSadaf Ebrahimi ptr += 1;
128*22dc650dSSadaf Ebrahimi continue;
129*22dc650dSSadaf Ebrahimi }
130*22dc650dSSadaf Ebrahimi
131*22dc650dSSadaf Ebrahimi ptr += 1; /* Must point after \ */
132*22dc650dSSadaf Ebrahimi erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133*22dc650dSSadaf Ebrahimi code->overall_options, code->extra_options, FALSE, NULL);
134*22dc650dSSadaf Ebrahimi ptr -= 1; /* Back to last code unit of escape */
135*22dc650dSSadaf Ebrahimi if (errorcode != 0)
136*22dc650dSSadaf Ebrahimi {
137*22dc650dSSadaf Ebrahimi rc = errorcode;
138*22dc650dSSadaf Ebrahimi goto EXIT;
139*22dc650dSSadaf Ebrahimi }
140*22dc650dSSadaf Ebrahimi
141*22dc650dSSadaf Ebrahimi switch(erc)
142*22dc650dSSadaf Ebrahimi {
143*22dc650dSSadaf Ebrahimi case 0: /* Data character */
144*22dc650dSSadaf Ebrahimi case ESC_E: /* Isolated \E is ignored */
145*22dc650dSSadaf Ebrahimi break;
146*22dc650dSSadaf Ebrahimi
147*22dc650dSSadaf Ebrahimi case ESC_Q:
148*22dc650dSSadaf Ebrahimi literal = TRUE;
149*22dc650dSSadaf Ebrahimi break;
150*22dc650dSSadaf Ebrahimi
151*22dc650dSSadaf Ebrahimi default:
152*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADREPESCAPE;
153*22dc650dSSadaf Ebrahimi goto EXIT;
154*22dc650dSSadaf Ebrahimi }
155*22dc650dSSadaf Ebrahimi }
156*22dc650dSSadaf Ebrahimi }
157*22dc650dSSadaf Ebrahimi
158*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
159*22dc650dSSadaf Ebrahimi
160*22dc650dSSadaf Ebrahimi EXIT:
161*22dc650dSSadaf Ebrahimi *ptrptr = ptr;
162*22dc650dSSadaf Ebrahimi return rc;
163*22dc650dSSadaf Ebrahimi }
164*22dc650dSSadaf Ebrahimi
165*22dc650dSSadaf Ebrahimi
166*22dc650dSSadaf Ebrahimi
167*22dc650dSSadaf Ebrahimi /*************************************************
168*22dc650dSSadaf Ebrahimi * Match and substitute *
169*22dc650dSSadaf Ebrahimi *************************************************/
170*22dc650dSSadaf Ebrahimi
171*22dc650dSSadaf Ebrahimi /* This function applies a compiled re to a subject string and creates a new
172*22dc650dSSadaf Ebrahimi string with substitutions. The first 7 arguments are the same as for
173*22dc650dSSadaf Ebrahimi pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174*22dc650dSSadaf Ebrahimi
175*22dc650dSSadaf Ebrahimi Arguments:
176*22dc650dSSadaf Ebrahimi code points to the compiled expression
177*22dc650dSSadaf Ebrahimi subject points to the subject string
178*22dc650dSSadaf Ebrahimi length length of subject string (may contain binary zeros)
179*22dc650dSSadaf Ebrahimi start_offset where to start in the subject string
180*22dc650dSSadaf Ebrahimi options option bits
181*22dc650dSSadaf Ebrahimi match_data points to a match_data block, or is NULL
182*22dc650dSSadaf Ebrahimi context points a PCRE2 context
183*22dc650dSSadaf Ebrahimi replacement points to the replacement string
184*22dc650dSSadaf Ebrahimi rlength length of replacement string
185*22dc650dSSadaf Ebrahimi buffer where to put the substituted string
186*22dc650dSSadaf Ebrahimi blength points to length of buffer; updated to length of string
187*22dc650dSSadaf Ebrahimi
188*22dc650dSSadaf Ebrahimi Returns: >= 0 number of substitutions made
189*22dc650dSSadaf Ebrahimi < 0 an error code
190*22dc650dSSadaf Ebrahimi PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191*22dc650dSSadaf Ebrahimi */
192*22dc650dSSadaf Ebrahimi
193*22dc650dSSadaf Ebrahimi /* This macro checks for space in the buffer before copying into it. On
194*22dc650dSSadaf Ebrahimi overflow, either give an error immediately, or keep on, accumulating the
195*22dc650dSSadaf Ebrahimi length. */
196*22dc650dSSadaf Ebrahimi
197*22dc650dSSadaf Ebrahimi #define CHECKMEMCPY(from,length) \
198*22dc650dSSadaf Ebrahimi { \
199*22dc650dSSadaf Ebrahimi if (!overflowed && lengthleft < length) \
200*22dc650dSSadaf Ebrahimi { \
201*22dc650dSSadaf Ebrahimi if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202*22dc650dSSadaf Ebrahimi overflowed = TRUE; \
203*22dc650dSSadaf Ebrahimi extra_needed = length - lengthleft; \
204*22dc650dSSadaf Ebrahimi } \
205*22dc650dSSadaf Ebrahimi else if (overflowed) \
206*22dc650dSSadaf Ebrahimi { \
207*22dc650dSSadaf Ebrahimi extra_needed += length; \
208*22dc650dSSadaf Ebrahimi } \
209*22dc650dSSadaf Ebrahimi else \
210*22dc650dSSadaf Ebrahimi { \
211*22dc650dSSadaf Ebrahimi memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212*22dc650dSSadaf Ebrahimi buff_offset += length; \
213*22dc650dSSadaf Ebrahimi lengthleft -= length; \
214*22dc650dSSadaf Ebrahimi } \
215*22dc650dSSadaf Ebrahimi }
216*22dc650dSSadaf Ebrahimi
217*22dc650dSSadaf Ebrahimi /* Here's the function */
218*22dc650dSSadaf Ebrahimi
219*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)220*22dc650dSSadaf Ebrahimi pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221*22dc650dSSadaf Ebrahimi PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222*22dc650dSSadaf Ebrahimi pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
224*22dc650dSSadaf Ebrahimi {
225*22dc650dSSadaf Ebrahimi int rc;
226*22dc650dSSadaf Ebrahimi int subs;
227*22dc650dSSadaf Ebrahimi int forcecase = 0;
228*22dc650dSSadaf Ebrahimi int forcecasereset = 0;
229*22dc650dSSadaf Ebrahimi uint32_t ovector_count;
230*22dc650dSSadaf Ebrahimi uint32_t goptions = 0;
231*22dc650dSSadaf Ebrahimi uint32_t suboptions;
232*22dc650dSSadaf Ebrahimi pcre2_match_data *internal_match_data = NULL;
233*22dc650dSSadaf Ebrahimi BOOL escaped_literal = FALSE;
234*22dc650dSSadaf Ebrahimi BOOL overflowed = FALSE;
235*22dc650dSSadaf Ebrahimi BOOL use_existing_match;
236*22dc650dSSadaf Ebrahimi BOOL replacement_only;
237*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
238*22dc650dSSadaf Ebrahimi BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
239*22dc650dSSadaf Ebrahimi BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
240*22dc650dSSadaf Ebrahimi #endif
241*22dc650dSSadaf Ebrahimi PCRE2_UCHAR temp[6];
242*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptr;
243*22dc650dSSadaf Ebrahimi PCRE2_SPTR repend;
244*22dc650dSSadaf Ebrahimi PCRE2_SIZE extra_needed = 0;
245*22dc650dSSadaf Ebrahimi PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector;
247*22dc650dSSadaf Ebrahimi PCRE2_SIZE ovecsave[3];
248*22dc650dSSadaf Ebrahimi pcre2_substitute_callout_block scb;
249*22dc650dSSadaf Ebrahimi
250*22dc650dSSadaf Ebrahimi /* General initialization */
251*22dc650dSSadaf Ebrahimi
252*22dc650dSSadaf Ebrahimi buff_offset = 0;
253*22dc650dSSadaf Ebrahimi lengthleft = buff_length = *blength;
254*22dc650dSSadaf Ebrahimi *blength = PCRE2_UNSET;
255*22dc650dSSadaf Ebrahimi ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
256*22dc650dSSadaf Ebrahimi
257*22dc650dSSadaf Ebrahimi /* Partial matching is not valid. This must come after setting *blength to
258*22dc650dSSadaf Ebrahimi PCRE2_UNSET, so as not to imply an offset in the replacement. */
259*22dc650dSSadaf Ebrahimi
260*22dc650dSSadaf Ebrahimi if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
261*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_BADOPTION;
262*22dc650dSSadaf Ebrahimi
263*22dc650dSSadaf Ebrahimi /* Validate length and find the end of the replacement. A NULL replacement of
264*22dc650dSSadaf Ebrahimi zero length is interpreted as an empty string. */
265*22dc650dSSadaf Ebrahimi
266*22dc650dSSadaf Ebrahimi if (replacement == NULL)
267*22dc650dSSadaf Ebrahimi {
268*22dc650dSSadaf Ebrahimi if (rlength != 0) return PCRE2_ERROR_NULL;
269*22dc650dSSadaf Ebrahimi replacement = (PCRE2_SPTR)"";
270*22dc650dSSadaf Ebrahimi }
271*22dc650dSSadaf Ebrahimi
272*22dc650dSSadaf Ebrahimi if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
273*22dc650dSSadaf Ebrahimi repend = replacement + rlength;
274*22dc650dSSadaf Ebrahimi
275*22dc650dSSadaf Ebrahimi /* Check for using a match that has already happened. Note that the subject
276*22dc650dSSadaf Ebrahimi pointer in the match data may be NULL after a no-match. */
277*22dc650dSSadaf Ebrahimi
278*22dc650dSSadaf Ebrahimi use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
279*22dc650dSSadaf Ebrahimi replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
280*22dc650dSSadaf Ebrahimi
281*22dc650dSSadaf Ebrahimi /* If starting from an existing match, there must be an externally provided
282*22dc650dSSadaf Ebrahimi match data block. We create an internal match_data block in two cases: (a) an
283*22dc650dSSadaf Ebrahimi external one is not supplied (and we are not starting from an existing match);
284*22dc650dSSadaf Ebrahimi (b) an existing match is to be used for the first substitution. In the latter
285*22dc650dSSadaf Ebrahimi case, we copy the existing match into the internal block, except for any cached
286*22dc650dSSadaf Ebrahimi heap frame size and pointer. This ensures that no changes are made to the
287*22dc650dSSadaf Ebrahimi external match data block. */
288*22dc650dSSadaf Ebrahimi
289*22dc650dSSadaf Ebrahimi if (match_data == NULL)
290*22dc650dSSadaf Ebrahimi {
291*22dc650dSSadaf Ebrahimi pcre2_general_context *gcontext;
292*22dc650dSSadaf Ebrahimi if (use_existing_match) return PCRE2_ERROR_NULL;
293*22dc650dSSadaf Ebrahimi gcontext = (mcontext == NULL)?
294*22dc650dSSadaf Ebrahimi (pcre2_general_context *)code :
295*22dc650dSSadaf Ebrahimi (pcre2_general_context *)mcontext;
296*22dc650dSSadaf Ebrahimi match_data = internal_match_data =
297*22dc650dSSadaf Ebrahimi pcre2_match_data_create_from_pattern(code, gcontext);
298*22dc650dSSadaf Ebrahimi if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
299*22dc650dSSadaf Ebrahimi }
300*22dc650dSSadaf Ebrahimi
301*22dc650dSSadaf Ebrahimi else if (use_existing_match)
302*22dc650dSSadaf Ebrahimi {
303*22dc650dSSadaf Ebrahimi pcre2_general_context *gcontext = (mcontext == NULL)?
304*22dc650dSSadaf Ebrahimi (pcre2_general_context *)code :
305*22dc650dSSadaf Ebrahimi (pcre2_general_context *)mcontext;
306*22dc650dSSadaf Ebrahimi int pairs = (code->top_bracket + 1 < match_data->oveccount)?
307*22dc650dSSadaf Ebrahimi code->top_bracket + 1 : match_data->oveccount;
308*22dc650dSSadaf Ebrahimi internal_match_data = pcre2_match_data_create(match_data->oveccount,
309*22dc650dSSadaf Ebrahimi gcontext);
310*22dc650dSSadaf Ebrahimi if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
311*22dc650dSSadaf Ebrahimi memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
312*22dc650dSSadaf Ebrahimi + 2*pairs*sizeof(PCRE2_SIZE));
313*22dc650dSSadaf Ebrahimi internal_match_data->heapframes = NULL;
314*22dc650dSSadaf Ebrahimi internal_match_data->heapframes_size = 0;
315*22dc650dSSadaf Ebrahimi match_data = internal_match_data;
316*22dc650dSSadaf Ebrahimi }
317*22dc650dSSadaf Ebrahimi
318*22dc650dSSadaf Ebrahimi /* Remember ovector details */
319*22dc650dSSadaf Ebrahimi
320*22dc650dSSadaf Ebrahimi ovector = pcre2_get_ovector_pointer(match_data);
321*22dc650dSSadaf Ebrahimi ovector_count = pcre2_get_ovector_count(match_data);
322*22dc650dSSadaf Ebrahimi
323*22dc650dSSadaf Ebrahimi /* Fixed things in the callout block */
324*22dc650dSSadaf Ebrahimi
325*22dc650dSSadaf Ebrahimi scb.version = 0;
326*22dc650dSSadaf Ebrahimi scb.input = subject;
327*22dc650dSSadaf Ebrahimi scb.output = (PCRE2_SPTR)buffer;
328*22dc650dSSadaf Ebrahimi scb.ovector = ovector;
329*22dc650dSSadaf Ebrahimi
330*22dc650dSSadaf Ebrahimi /* A NULL subject of zero length is treated as an empty string. */
331*22dc650dSSadaf Ebrahimi
332*22dc650dSSadaf Ebrahimi if (subject == NULL)
333*22dc650dSSadaf Ebrahimi {
334*22dc650dSSadaf Ebrahimi if (length != 0) return PCRE2_ERROR_NULL;
335*22dc650dSSadaf Ebrahimi subject = (PCRE2_SPTR)"";
336*22dc650dSSadaf Ebrahimi }
337*22dc650dSSadaf Ebrahimi
338*22dc650dSSadaf Ebrahimi /* Find length of zero-terminated subject */
339*22dc650dSSadaf Ebrahimi
340*22dc650dSSadaf Ebrahimi if (length == PCRE2_ZERO_TERMINATED)
341*22dc650dSSadaf Ebrahimi length = subject? PRIV(strlen)(subject) : 0;
342*22dc650dSSadaf Ebrahimi
343*22dc650dSSadaf Ebrahimi /* Check UTF replacement string if necessary. */
344*22dc650dSSadaf Ebrahimi
345*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
346*22dc650dSSadaf Ebrahimi if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
347*22dc650dSSadaf Ebrahimi {
348*22dc650dSSadaf Ebrahimi rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
349*22dc650dSSadaf Ebrahimi if (rc != 0)
350*22dc650dSSadaf Ebrahimi {
351*22dc650dSSadaf Ebrahimi match_data->leftchar = 0;
352*22dc650dSSadaf Ebrahimi goto EXIT;
353*22dc650dSSadaf Ebrahimi }
354*22dc650dSSadaf Ebrahimi }
355*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
356*22dc650dSSadaf Ebrahimi
357*22dc650dSSadaf Ebrahimi /* Save the substitute options and remove them from the match options. */
358*22dc650dSSadaf Ebrahimi
359*22dc650dSSadaf Ebrahimi suboptions = options & SUBSTITUTE_OPTIONS;
360*22dc650dSSadaf Ebrahimi options &= ~SUBSTITUTE_OPTIONS;
361*22dc650dSSadaf Ebrahimi
362*22dc650dSSadaf Ebrahimi /* Error if the start match offset is greater than the length of the subject. */
363*22dc650dSSadaf Ebrahimi
364*22dc650dSSadaf Ebrahimi if (start_offset > length)
365*22dc650dSSadaf Ebrahimi {
366*22dc650dSSadaf Ebrahimi match_data->leftchar = 0;
367*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADOFFSET;
368*22dc650dSSadaf Ebrahimi goto EXIT;
369*22dc650dSSadaf Ebrahimi }
370*22dc650dSSadaf Ebrahimi
371*22dc650dSSadaf Ebrahimi /* Copy up to the start offset, unless only the replacement is required. */
372*22dc650dSSadaf Ebrahimi
373*22dc650dSSadaf Ebrahimi if (!replacement_only) CHECKMEMCPY(subject, start_offset);
374*22dc650dSSadaf Ebrahimi
375*22dc650dSSadaf Ebrahimi /* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
376*22dc650dSSadaf Ebrahimi match is taken from the match_data that was passed in. */
377*22dc650dSSadaf Ebrahimi
378*22dc650dSSadaf Ebrahimi subs = 0;
379*22dc650dSSadaf Ebrahimi do
380*22dc650dSSadaf Ebrahimi {
381*22dc650dSSadaf Ebrahimi PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
382*22dc650dSSadaf Ebrahimi uint32_t ptrstackptr = 0;
383*22dc650dSSadaf Ebrahimi
384*22dc650dSSadaf Ebrahimi if (use_existing_match)
385*22dc650dSSadaf Ebrahimi {
386*22dc650dSSadaf Ebrahimi rc = match_data->rc;
387*22dc650dSSadaf Ebrahimi use_existing_match = FALSE;
388*22dc650dSSadaf Ebrahimi }
389*22dc650dSSadaf Ebrahimi else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
390*22dc650dSSadaf Ebrahimi match_data, mcontext);
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
393*22dc650dSSadaf Ebrahimi if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
394*22dc650dSSadaf Ebrahimi #endif
395*22dc650dSSadaf Ebrahimi
396*22dc650dSSadaf Ebrahimi /* Any error other than no match returns the error code. No match when not
397*22dc650dSSadaf Ebrahimi doing the special after-empty-match global rematch, or when at the end of the
398*22dc650dSSadaf Ebrahimi subject, breaks the global loop. Otherwise, advance the starting point by one
399*22dc650dSSadaf Ebrahimi character, copying it to the output, and try again. */
400*22dc650dSSadaf Ebrahimi
401*22dc650dSSadaf Ebrahimi if (rc < 0)
402*22dc650dSSadaf Ebrahimi {
403*22dc650dSSadaf Ebrahimi PCRE2_SIZE save_start;
404*22dc650dSSadaf Ebrahimi
405*22dc650dSSadaf Ebrahimi if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
406*22dc650dSSadaf Ebrahimi if (goptions == 0 || start_offset >= length) break;
407*22dc650dSSadaf Ebrahimi
408*22dc650dSSadaf Ebrahimi /* Advance by one code point. Then, if CRLF is a valid newline sequence and
409*22dc650dSSadaf Ebrahimi we have advanced into the middle of it, advance one more code point. In
410*22dc650dSSadaf Ebrahimi other words, do not start in the middle of CRLF, even if CR and LF on their
411*22dc650dSSadaf Ebrahimi own are valid newlines. */
412*22dc650dSSadaf Ebrahimi
413*22dc650dSSadaf Ebrahimi save_start = start_offset++;
414*22dc650dSSadaf Ebrahimi if (subject[start_offset-1] == CHAR_CR &&
415*22dc650dSSadaf Ebrahimi code->newline_convention != PCRE2_NEWLINE_CR &&
416*22dc650dSSadaf Ebrahimi code->newline_convention != PCRE2_NEWLINE_LF &&
417*22dc650dSSadaf Ebrahimi start_offset < length &&
418*22dc650dSSadaf Ebrahimi subject[start_offset] == CHAR_LF)
419*22dc650dSSadaf Ebrahimi start_offset++;
420*22dc650dSSadaf Ebrahimi
421*22dc650dSSadaf Ebrahimi /* Otherwise, in UTF mode, advance past any secondary code points. */
422*22dc650dSSadaf Ebrahimi
423*22dc650dSSadaf Ebrahimi else if ((code->overall_options & PCRE2_UTF) != 0)
424*22dc650dSSadaf Ebrahimi {
425*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
426*22dc650dSSadaf Ebrahimi while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
427*22dc650dSSadaf Ebrahimi start_offset++;
428*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
429*22dc650dSSadaf Ebrahimi while (start_offset < length &&
430*22dc650dSSadaf Ebrahimi (subject[start_offset] & 0xfc00) == 0xdc00)
431*22dc650dSSadaf Ebrahimi start_offset++;
432*22dc650dSSadaf Ebrahimi #endif
433*22dc650dSSadaf Ebrahimi }
434*22dc650dSSadaf Ebrahimi
435*22dc650dSSadaf Ebrahimi /* Copy what we have advanced past (unless not required), reset the special
436*22dc650dSSadaf Ebrahimi global options, and continue to the next match. */
437*22dc650dSSadaf Ebrahimi
438*22dc650dSSadaf Ebrahimi fraglength = start_offset - save_start;
439*22dc650dSSadaf Ebrahimi if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
440*22dc650dSSadaf Ebrahimi goptions = 0;
441*22dc650dSSadaf Ebrahimi continue;
442*22dc650dSSadaf Ebrahimi }
443*22dc650dSSadaf Ebrahimi
444*22dc650dSSadaf Ebrahimi /* Handle a successful match. Matches that use \K to end before they start
445*22dc650dSSadaf Ebrahimi or start before the current point in the subject are not supported. */
446*22dc650dSSadaf Ebrahimi
447*22dc650dSSadaf Ebrahimi if (ovector[1] < ovector[0] || ovector[0] < start_offset)
448*22dc650dSSadaf Ebrahimi {
449*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADSUBSPATTERN;
450*22dc650dSSadaf Ebrahimi goto EXIT;
451*22dc650dSSadaf Ebrahimi }
452*22dc650dSSadaf Ebrahimi
453*22dc650dSSadaf Ebrahimi /* Check for the same match as previous. This is legitimate after matching an
454*22dc650dSSadaf Ebrahimi empty string that starts after the initial match offset. We have tried again
455*22dc650dSSadaf Ebrahimi at the match point in case the pattern is one like /(?<=\G.)/ which can never
456*22dc650dSSadaf Ebrahimi match at its starting point, so running the match achieves the bumpalong. If
457*22dc650dSSadaf Ebrahimi we do get the same (null) match at the original match point, it isn't such a
458*22dc650dSSadaf Ebrahimi pattern, so we now do the empty string magic. In all other cases, a repeat
459*22dc650dSSadaf Ebrahimi match should never occur. */
460*22dc650dSSadaf Ebrahimi
461*22dc650dSSadaf Ebrahimi if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
462*22dc650dSSadaf Ebrahimi {
463*22dc650dSSadaf Ebrahimi if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
464*22dc650dSSadaf Ebrahimi {
465*22dc650dSSadaf Ebrahimi goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
466*22dc650dSSadaf Ebrahimi ovecsave[2] = start_offset;
467*22dc650dSSadaf Ebrahimi continue; /* Back to the top of the loop */
468*22dc650dSSadaf Ebrahimi }
469*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
470*22dc650dSSadaf Ebrahimi goto EXIT;
471*22dc650dSSadaf Ebrahimi }
472*22dc650dSSadaf Ebrahimi
473*22dc650dSSadaf Ebrahimi /* Count substitutions with a paranoid check for integer overflow; surely no
474*22dc650dSSadaf Ebrahimi real call to this function would ever hit this! */
475*22dc650dSSadaf Ebrahimi
476*22dc650dSSadaf Ebrahimi if (subs == INT_MAX)
477*22dc650dSSadaf Ebrahimi {
478*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_TOOMANYREPLACE;
479*22dc650dSSadaf Ebrahimi goto EXIT;
480*22dc650dSSadaf Ebrahimi }
481*22dc650dSSadaf Ebrahimi subs++;
482*22dc650dSSadaf Ebrahimi
483*22dc650dSSadaf Ebrahimi /* Copy the text leading up to the match (unless not required), and remember
484*22dc650dSSadaf Ebrahimi where the insert begins and how many ovector pairs are set. */
485*22dc650dSSadaf Ebrahimi
486*22dc650dSSadaf Ebrahimi if (rc == 0) rc = ovector_count;
487*22dc650dSSadaf Ebrahimi fraglength = ovector[0] - start_offset;
488*22dc650dSSadaf Ebrahimi if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
489*22dc650dSSadaf Ebrahimi scb.output_offsets[0] = buff_offset;
490*22dc650dSSadaf Ebrahimi scb.oveccount = rc;
491*22dc650dSSadaf Ebrahimi
492*22dc650dSSadaf Ebrahimi /* Process the replacement string. If the entire replacement is literal, just
493*22dc650dSSadaf Ebrahimi copy it with length check. */
494*22dc650dSSadaf Ebrahimi
495*22dc650dSSadaf Ebrahimi ptr = replacement;
496*22dc650dSSadaf Ebrahimi if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
497*22dc650dSSadaf Ebrahimi {
498*22dc650dSSadaf Ebrahimi CHECKMEMCPY(ptr, rlength);
499*22dc650dSSadaf Ebrahimi }
500*22dc650dSSadaf Ebrahimi
501*22dc650dSSadaf Ebrahimi /* Within a non-literal replacement, which must be scanned character by
502*22dc650dSSadaf Ebrahimi character, local literal mode can be set by \Q, but only in extended mode
503*22dc650dSSadaf Ebrahimi when backslashes are being interpreted. In extended mode we must handle
504*22dc650dSSadaf Ebrahimi nested substrings that are to be reprocessed. */
505*22dc650dSSadaf Ebrahimi
506*22dc650dSSadaf Ebrahimi else for (;;)
507*22dc650dSSadaf Ebrahimi {
508*22dc650dSSadaf Ebrahimi uint32_t ch;
509*22dc650dSSadaf Ebrahimi unsigned int chlen;
510*22dc650dSSadaf Ebrahimi
511*22dc650dSSadaf Ebrahimi /* If at the end of a nested substring, pop the stack. */
512*22dc650dSSadaf Ebrahimi
513*22dc650dSSadaf Ebrahimi if (ptr >= repend)
514*22dc650dSSadaf Ebrahimi {
515*22dc650dSSadaf Ebrahimi if (ptrstackptr == 0) break; /* End of replacement string */
516*22dc650dSSadaf Ebrahimi repend = ptrstack[--ptrstackptr];
517*22dc650dSSadaf Ebrahimi ptr = ptrstack[--ptrstackptr];
518*22dc650dSSadaf Ebrahimi continue;
519*22dc650dSSadaf Ebrahimi }
520*22dc650dSSadaf Ebrahimi
521*22dc650dSSadaf Ebrahimi /* Handle the next character */
522*22dc650dSSadaf Ebrahimi
523*22dc650dSSadaf Ebrahimi if (escaped_literal)
524*22dc650dSSadaf Ebrahimi {
525*22dc650dSSadaf Ebrahimi if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
526*22dc650dSSadaf Ebrahimi {
527*22dc650dSSadaf Ebrahimi escaped_literal = FALSE;
528*22dc650dSSadaf Ebrahimi ptr += 2;
529*22dc650dSSadaf Ebrahimi continue;
530*22dc650dSSadaf Ebrahimi }
531*22dc650dSSadaf Ebrahimi goto LOADLITERAL;
532*22dc650dSSadaf Ebrahimi }
533*22dc650dSSadaf Ebrahimi
534*22dc650dSSadaf Ebrahimi /* Not in literal mode. */
535*22dc650dSSadaf Ebrahimi
536*22dc650dSSadaf Ebrahimi if (*ptr == CHAR_DOLLAR_SIGN)
537*22dc650dSSadaf Ebrahimi {
538*22dc650dSSadaf Ebrahimi int group, n;
539*22dc650dSSadaf Ebrahimi uint32_t special = 0;
540*22dc650dSSadaf Ebrahimi BOOL inparens;
541*22dc650dSSadaf Ebrahimi BOOL star;
542*22dc650dSSadaf Ebrahimi PCRE2_SIZE sublength;
543*22dc650dSSadaf Ebrahimi PCRE2_SPTR text1_start = NULL;
544*22dc650dSSadaf Ebrahimi PCRE2_SPTR text1_end = NULL;
545*22dc650dSSadaf Ebrahimi PCRE2_SPTR text2_start = NULL;
546*22dc650dSSadaf Ebrahimi PCRE2_SPTR text2_end = NULL;
547*22dc650dSSadaf Ebrahimi PCRE2_UCHAR next;
548*22dc650dSSadaf Ebrahimi PCRE2_UCHAR name[33];
549*22dc650dSSadaf Ebrahimi
550*22dc650dSSadaf Ebrahimi if (++ptr >= repend) goto BAD;
551*22dc650dSSadaf Ebrahimi if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
552*22dc650dSSadaf Ebrahimi
553*22dc650dSSadaf Ebrahimi group = -1;
554*22dc650dSSadaf Ebrahimi n = 0;
555*22dc650dSSadaf Ebrahimi inparens = FALSE;
556*22dc650dSSadaf Ebrahimi star = FALSE;
557*22dc650dSSadaf Ebrahimi
558*22dc650dSSadaf Ebrahimi if (next == CHAR_LEFT_CURLY_BRACKET)
559*22dc650dSSadaf Ebrahimi {
560*22dc650dSSadaf Ebrahimi if (++ptr >= repend) goto BAD;
561*22dc650dSSadaf Ebrahimi next = *ptr;
562*22dc650dSSadaf Ebrahimi inparens = TRUE;
563*22dc650dSSadaf Ebrahimi }
564*22dc650dSSadaf Ebrahimi
565*22dc650dSSadaf Ebrahimi if (next == CHAR_ASTERISK)
566*22dc650dSSadaf Ebrahimi {
567*22dc650dSSadaf Ebrahimi if (++ptr >= repend) goto BAD;
568*22dc650dSSadaf Ebrahimi next = *ptr;
569*22dc650dSSadaf Ebrahimi star = TRUE;
570*22dc650dSSadaf Ebrahimi }
571*22dc650dSSadaf Ebrahimi
572*22dc650dSSadaf Ebrahimi if (!star && next >= CHAR_0 && next <= CHAR_9)
573*22dc650dSSadaf Ebrahimi {
574*22dc650dSSadaf Ebrahimi group = next - CHAR_0;
575*22dc650dSSadaf Ebrahimi while (++ptr < repend)
576*22dc650dSSadaf Ebrahimi {
577*22dc650dSSadaf Ebrahimi next = *ptr;
578*22dc650dSSadaf Ebrahimi if (next < CHAR_0 || next > CHAR_9) break;
579*22dc650dSSadaf Ebrahimi group = group * 10 + next - CHAR_0;
580*22dc650dSSadaf Ebrahimi
581*22dc650dSSadaf Ebrahimi /* A check for a number greater than the hightest captured group
582*22dc650dSSadaf Ebrahimi is sufficient here; no need for a separate overflow check. If unknown
583*22dc650dSSadaf Ebrahimi groups are to be treated as unset, just skip over any remaining
584*22dc650dSSadaf Ebrahimi digits and carry on. */
585*22dc650dSSadaf Ebrahimi
586*22dc650dSSadaf Ebrahimi if (group > code->top_bracket)
587*22dc650dSSadaf Ebrahimi {
588*22dc650dSSadaf Ebrahimi if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
589*22dc650dSSadaf Ebrahimi {
590*22dc650dSSadaf Ebrahimi while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
591*22dc650dSSadaf Ebrahimi break;
592*22dc650dSSadaf Ebrahimi }
593*22dc650dSSadaf Ebrahimi else
594*22dc650dSSadaf Ebrahimi {
595*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_NOSUBSTRING;
596*22dc650dSSadaf Ebrahimi goto PTREXIT;
597*22dc650dSSadaf Ebrahimi }
598*22dc650dSSadaf Ebrahimi }
599*22dc650dSSadaf Ebrahimi }
600*22dc650dSSadaf Ebrahimi }
601*22dc650dSSadaf Ebrahimi else
602*22dc650dSSadaf Ebrahimi {
603*22dc650dSSadaf Ebrahimi const uint8_t *ctypes = code->tables + ctypes_offset;
604*22dc650dSSadaf Ebrahimi while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
605*22dc650dSSadaf Ebrahimi {
606*22dc650dSSadaf Ebrahimi name[n++] = next;
607*22dc650dSSadaf Ebrahimi if (n > 32) goto BAD;
608*22dc650dSSadaf Ebrahimi if (++ptr >= repend) break;
609*22dc650dSSadaf Ebrahimi next = *ptr;
610*22dc650dSSadaf Ebrahimi }
611*22dc650dSSadaf Ebrahimi if (n == 0) goto BAD;
612*22dc650dSSadaf Ebrahimi name[n] = 0;
613*22dc650dSSadaf Ebrahimi }
614*22dc650dSSadaf Ebrahimi
615*22dc650dSSadaf Ebrahimi /* In extended mode we recognize ${name:+set text:unset text} and
616*22dc650dSSadaf Ebrahimi ${name:-default text}. */
617*22dc650dSSadaf Ebrahimi
618*22dc650dSSadaf Ebrahimi if (inparens)
619*22dc650dSSadaf Ebrahimi {
620*22dc650dSSadaf Ebrahimi if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
621*22dc650dSSadaf Ebrahimi !star && ptr < repend - 2 && next == CHAR_COLON)
622*22dc650dSSadaf Ebrahimi {
623*22dc650dSSadaf Ebrahimi special = *(++ptr);
624*22dc650dSSadaf Ebrahimi if (special != CHAR_PLUS && special != CHAR_MINUS)
625*22dc650dSSadaf Ebrahimi {
626*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADSUBSTITUTION;
627*22dc650dSSadaf Ebrahimi goto PTREXIT;
628*22dc650dSSadaf Ebrahimi }
629*22dc650dSSadaf Ebrahimi
630*22dc650dSSadaf Ebrahimi text1_start = ++ptr;
631*22dc650dSSadaf Ebrahimi rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
632*22dc650dSSadaf Ebrahimi if (rc != 0) goto PTREXIT;
633*22dc650dSSadaf Ebrahimi text1_end = ptr;
634*22dc650dSSadaf Ebrahimi
635*22dc650dSSadaf Ebrahimi if (special == CHAR_PLUS && *ptr == CHAR_COLON)
636*22dc650dSSadaf Ebrahimi {
637*22dc650dSSadaf Ebrahimi text2_start = ++ptr;
638*22dc650dSSadaf Ebrahimi rc = find_text_end(code, &ptr, repend, TRUE);
639*22dc650dSSadaf Ebrahimi if (rc != 0) goto PTREXIT;
640*22dc650dSSadaf Ebrahimi text2_end = ptr;
641*22dc650dSSadaf Ebrahimi }
642*22dc650dSSadaf Ebrahimi }
643*22dc650dSSadaf Ebrahimi
644*22dc650dSSadaf Ebrahimi else
645*22dc650dSSadaf Ebrahimi {
646*22dc650dSSadaf Ebrahimi if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
647*22dc650dSSadaf Ebrahimi {
648*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_REPMISSINGBRACE;
649*22dc650dSSadaf Ebrahimi goto PTREXIT;
650*22dc650dSSadaf Ebrahimi }
651*22dc650dSSadaf Ebrahimi }
652*22dc650dSSadaf Ebrahimi
653*22dc650dSSadaf Ebrahimi ptr++;
654*22dc650dSSadaf Ebrahimi }
655*22dc650dSSadaf Ebrahimi
656*22dc650dSSadaf Ebrahimi /* Have found a syntactically correct group number or name, or *name.
657*22dc650dSSadaf Ebrahimi Only *MARK is currently recognized. */
658*22dc650dSSadaf Ebrahimi
659*22dc650dSSadaf Ebrahimi if (star)
660*22dc650dSSadaf Ebrahimi {
661*22dc650dSSadaf Ebrahimi if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
662*22dc650dSSadaf Ebrahimi {
663*22dc650dSSadaf Ebrahimi PCRE2_SPTR mark = pcre2_get_mark(match_data);
664*22dc650dSSadaf Ebrahimi if (mark != NULL)
665*22dc650dSSadaf Ebrahimi {
666*22dc650dSSadaf Ebrahimi PCRE2_SPTR mark_start = mark;
667*22dc650dSSadaf Ebrahimi while (*mark != 0) mark++;
668*22dc650dSSadaf Ebrahimi fraglength = mark - mark_start;
669*22dc650dSSadaf Ebrahimi CHECKMEMCPY(mark_start, fraglength);
670*22dc650dSSadaf Ebrahimi }
671*22dc650dSSadaf Ebrahimi }
672*22dc650dSSadaf Ebrahimi else goto BAD;
673*22dc650dSSadaf Ebrahimi }
674*22dc650dSSadaf Ebrahimi
675*22dc650dSSadaf Ebrahimi /* Substitute the contents of a group. We don't use substring_copy
676*22dc650dSSadaf Ebrahimi functions any more, in order to support case forcing. */
677*22dc650dSSadaf Ebrahimi
678*22dc650dSSadaf Ebrahimi else
679*22dc650dSSadaf Ebrahimi {
680*22dc650dSSadaf Ebrahimi PCRE2_SPTR subptr, subptrend;
681*22dc650dSSadaf Ebrahimi
682*22dc650dSSadaf Ebrahimi /* Find a number for a named group. In case there are duplicate names,
683*22dc650dSSadaf Ebrahimi search for the first one that is set. If the name is not found when
684*22dc650dSSadaf Ebrahimi PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
685*22dc650dSSadaf Ebrahimi non-existent group. */
686*22dc650dSSadaf Ebrahimi
687*22dc650dSSadaf Ebrahimi if (group < 0)
688*22dc650dSSadaf Ebrahimi {
689*22dc650dSSadaf Ebrahimi PCRE2_SPTR first, last, entry;
690*22dc650dSSadaf Ebrahimi rc = pcre2_substring_nametable_scan(code, name, &first, &last);
691*22dc650dSSadaf Ebrahimi if (rc == PCRE2_ERROR_NOSUBSTRING &&
692*22dc650dSSadaf Ebrahimi (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
693*22dc650dSSadaf Ebrahimi {
694*22dc650dSSadaf Ebrahimi group = code->top_bracket + 1;
695*22dc650dSSadaf Ebrahimi }
696*22dc650dSSadaf Ebrahimi else
697*22dc650dSSadaf Ebrahimi {
698*22dc650dSSadaf Ebrahimi if (rc < 0) goto PTREXIT;
699*22dc650dSSadaf Ebrahimi for (entry = first; entry <= last; entry += rc)
700*22dc650dSSadaf Ebrahimi {
701*22dc650dSSadaf Ebrahimi uint32_t ng = GET2(entry, 0);
702*22dc650dSSadaf Ebrahimi if (ng < ovector_count)
703*22dc650dSSadaf Ebrahimi {
704*22dc650dSSadaf Ebrahimi if (group < 0) group = ng; /* First in ovector */
705*22dc650dSSadaf Ebrahimi if (ovector[ng*2] != PCRE2_UNSET)
706*22dc650dSSadaf Ebrahimi {
707*22dc650dSSadaf Ebrahimi group = ng; /* First that is set */
708*22dc650dSSadaf Ebrahimi break;
709*22dc650dSSadaf Ebrahimi }
710*22dc650dSSadaf Ebrahimi }
711*22dc650dSSadaf Ebrahimi }
712*22dc650dSSadaf Ebrahimi
713*22dc650dSSadaf Ebrahimi /* If group is still negative, it means we did not find a group
714*22dc650dSSadaf Ebrahimi that is in the ovector. Just set the first group. */
715*22dc650dSSadaf Ebrahimi
716*22dc650dSSadaf Ebrahimi if (group < 0) group = GET2(first, 0);
717*22dc650dSSadaf Ebrahimi }
718*22dc650dSSadaf Ebrahimi }
719*22dc650dSSadaf Ebrahimi
720*22dc650dSSadaf Ebrahimi /* We now have a group that is identified by number. Find the length of
721*22dc650dSSadaf Ebrahimi the captured string. If a group in a non-special substitution is unset
722*22dc650dSSadaf Ebrahimi when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
723*22dc650dSSadaf Ebrahimi
724*22dc650dSSadaf Ebrahimi rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
725*22dc650dSSadaf Ebrahimi if (rc < 0)
726*22dc650dSSadaf Ebrahimi {
727*22dc650dSSadaf Ebrahimi if (rc == PCRE2_ERROR_NOSUBSTRING &&
728*22dc650dSSadaf Ebrahimi (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
729*22dc650dSSadaf Ebrahimi {
730*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_UNSET;
731*22dc650dSSadaf Ebrahimi }
732*22dc650dSSadaf Ebrahimi if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
733*22dc650dSSadaf Ebrahimi if (special == 0) /* Plain substitution */
734*22dc650dSSadaf Ebrahimi {
735*22dc650dSSadaf Ebrahimi if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
736*22dc650dSSadaf Ebrahimi goto PTREXIT; /* Else error */
737*22dc650dSSadaf Ebrahimi }
738*22dc650dSSadaf Ebrahimi }
739*22dc650dSSadaf Ebrahimi
740*22dc650dSSadaf Ebrahimi /* If special is '+' we have a 'set' and possibly an 'unset' text,
741*22dc650dSSadaf Ebrahimi both of which are reprocessed when used. If special is '-' we have a
742*22dc650dSSadaf Ebrahimi default text for when the group is unset; it must be reprocessed. */
743*22dc650dSSadaf Ebrahimi
744*22dc650dSSadaf Ebrahimi if (special != 0)
745*22dc650dSSadaf Ebrahimi {
746*22dc650dSSadaf Ebrahimi if (special == CHAR_MINUS)
747*22dc650dSSadaf Ebrahimi {
748*22dc650dSSadaf Ebrahimi if (rc == 0) goto LITERAL_SUBSTITUTE;
749*22dc650dSSadaf Ebrahimi text2_start = text1_start;
750*22dc650dSSadaf Ebrahimi text2_end = text1_end;
751*22dc650dSSadaf Ebrahimi }
752*22dc650dSSadaf Ebrahimi
753*22dc650dSSadaf Ebrahimi if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
754*22dc650dSSadaf Ebrahimi ptrstack[ptrstackptr++] = ptr;
755*22dc650dSSadaf Ebrahimi ptrstack[ptrstackptr++] = repend;
756*22dc650dSSadaf Ebrahimi
757*22dc650dSSadaf Ebrahimi if (rc == 0)
758*22dc650dSSadaf Ebrahimi {
759*22dc650dSSadaf Ebrahimi ptr = text1_start;
760*22dc650dSSadaf Ebrahimi repend = text1_end;
761*22dc650dSSadaf Ebrahimi }
762*22dc650dSSadaf Ebrahimi else
763*22dc650dSSadaf Ebrahimi {
764*22dc650dSSadaf Ebrahimi ptr = text2_start;
765*22dc650dSSadaf Ebrahimi repend = text2_end;
766*22dc650dSSadaf Ebrahimi }
767*22dc650dSSadaf Ebrahimi continue;
768*22dc650dSSadaf Ebrahimi }
769*22dc650dSSadaf Ebrahimi
770*22dc650dSSadaf Ebrahimi /* Otherwise we have a literal substitution of a group's contents. */
771*22dc650dSSadaf Ebrahimi
772*22dc650dSSadaf Ebrahimi LITERAL_SUBSTITUTE:
773*22dc650dSSadaf Ebrahimi subptr = subject + ovector[group*2];
774*22dc650dSSadaf Ebrahimi subptrend = subject + ovector[group*2 + 1];
775*22dc650dSSadaf Ebrahimi
776*22dc650dSSadaf Ebrahimi /* Substitute a literal string, possibly forcing alphabetic case. */
777*22dc650dSSadaf Ebrahimi
778*22dc650dSSadaf Ebrahimi while (subptr < subptrend)
779*22dc650dSSadaf Ebrahimi {
780*22dc650dSSadaf Ebrahimi GETCHARINCTEST(ch, subptr);
781*22dc650dSSadaf Ebrahimi if (forcecase != 0)
782*22dc650dSSadaf Ebrahimi {
783*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
784*22dc650dSSadaf Ebrahimi if (utf || ucp)
785*22dc650dSSadaf Ebrahimi {
786*22dc650dSSadaf Ebrahimi uint32_t type = UCD_CHARTYPE(ch);
787*22dc650dSSadaf Ebrahimi if (PRIV(ucp_gentype)[type] == ucp_L &&
788*22dc650dSSadaf Ebrahimi type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
789*22dc650dSSadaf Ebrahimi ch = UCD_OTHERCASE(ch);
790*22dc650dSSadaf Ebrahimi }
791*22dc650dSSadaf Ebrahimi else
792*22dc650dSSadaf Ebrahimi #endif
793*22dc650dSSadaf Ebrahimi {
794*22dc650dSSadaf Ebrahimi if (((code->tables + cbits_offset +
795*22dc650dSSadaf Ebrahimi ((forcecase > 0)? cbit_upper:cbit_lower)
796*22dc650dSSadaf Ebrahimi )[ch/8] & (1u << (ch%8))) == 0)
797*22dc650dSSadaf Ebrahimi ch = (code->tables + fcc_offset)[ch];
798*22dc650dSSadaf Ebrahimi }
799*22dc650dSSadaf Ebrahimi forcecase = forcecasereset;
800*22dc650dSSadaf Ebrahimi }
801*22dc650dSSadaf Ebrahimi
802*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
803*22dc650dSSadaf Ebrahimi if (utf) chlen = PRIV(ord2utf)(ch, temp); else
804*22dc650dSSadaf Ebrahimi #endif
805*22dc650dSSadaf Ebrahimi {
806*22dc650dSSadaf Ebrahimi temp[0] = ch;
807*22dc650dSSadaf Ebrahimi chlen = 1;
808*22dc650dSSadaf Ebrahimi }
809*22dc650dSSadaf Ebrahimi CHECKMEMCPY(temp, chlen);
810*22dc650dSSadaf Ebrahimi }
811*22dc650dSSadaf Ebrahimi }
812*22dc650dSSadaf Ebrahimi }
813*22dc650dSSadaf Ebrahimi
814*22dc650dSSadaf Ebrahimi /* Handle an escape sequence in extended mode. We can use check_escape()
815*22dc650dSSadaf Ebrahimi to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
816*22dc650dSSadaf Ebrahimi the case-forcing escapes are not supported in pcre2_compile() so must be
817*22dc650dSSadaf Ebrahimi recognized here. */
818*22dc650dSSadaf Ebrahimi
819*22dc650dSSadaf Ebrahimi else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
820*22dc650dSSadaf Ebrahimi *ptr == CHAR_BACKSLASH)
821*22dc650dSSadaf Ebrahimi {
822*22dc650dSSadaf Ebrahimi int errorcode;
823*22dc650dSSadaf Ebrahimi
824*22dc650dSSadaf Ebrahimi if (ptr < repend - 1) switch (ptr[1])
825*22dc650dSSadaf Ebrahimi {
826*22dc650dSSadaf Ebrahimi case CHAR_L:
827*22dc650dSSadaf Ebrahimi forcecase = forcecasereset = -1;
828*22dc650dSSadaf Ebrahimi ptr += 2;
829*22dc650dSSadaf Ebrahimi continue;
830*22dc650dSSadaf Ebrahimi
831*22dc650dSSadaf Ebrahimi case CHAR_l:
832*22dc650dSSadaf Ebrahimi forcecase = -1;
833*22dc650dSSadaf Ebrahimi forcecasereset = 0;
834*22dc650dSSadaf Ebrahimi ptr += 2;
835*22dc650dSSadaf Ebrahimi continue;
836*22dc650dSSadaf Ebrahimi
837*22dc650dSSadaf Ebrahimi case CHAR_U:
838*22dc650dSSadaf Ebrahimi forcecase = forcecasereset = 1;
839*22dc650dSSadaf Ebrahimi ptr += 2;
840*22dc650dSSadaf Ebrahimi continue;
841*22dc650dSSadaf Ebrahimi
842*22dc650dSSadaf Ebrahimi case CHAR_u:
843*22dc650dSSadaf Ebrahimi forcecase = 1;
844*22dc650dSSadaf Ebrahimi forcecasereset = 0;
845*22dc650dSSadaf Ebrahimi ptr += 2;
846*22dc650dSSadaf Ebrahimi continue;
847*22dc650dSSadaf Ebrahimi
848*22dc650dSSadaf Ebrahimi default:
849*22dc650dSSadaf Ebrahimi break;
850*22dc650dSSadaf Ebrahimi }
851*22dc650dSSadaf Ebrahimi
852*22dc650dSSadaf Ebrahimi ptr++; /* Point after \ */
853*22dc650dSSadaf Ebrahimi rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
854*22dc650dSSadaf Ebrahimi code->overall_options, code->extra_options, FALSE, NULL);
855*22dc650dSSadaf Ebrahimi if (errorcode != 0) goto BADESCAPE;
856*22dc650dSSadaf Ebrahimi
857*22dc650dSSadaf Ebrahimi switch(rc)
858*22dc650dSSadaf Ebrahimi {
859*22dc650dSSadaf Ebrahimi case ESC_E:
860*22dc650dSSadaf Ebrahimi forcecase = forcecasereset = 0;
861*22dc650dSSadaf Ebrahimi continue;
862*22dc650dSSadaf Ebrahimi
863*22dc650dSSadaf Ebrahimi case ESC_Q:
864*22dc650dSSadaf Ebrahimi escaped_literal = TRUE;
865*22dc650dSSadaf Ebrahimi continue;
866*22dc650dSSadaf Ebrahimi
867*22dc650dSSadaf Ebrahimi case 0: /* Data character */
868*22dc650dSSadaf Ebrahimi goto LITERAL;
869*22dc650dSSadaf Ebrahimi
870*22dc650dSSadaf Ebrahimi default:
871*22dc650dSSadaf Ebrahimi goto BADESCAPE;
872*22dc650dSSadaf Ebrahimi }
873*22dc650dSSadaf Ebrahimi }
874*22dc650dSSadaf Ebrahimi
875*22dc650dSSadaf Ebrahimi /* Handle a literal code unit */
876*22dc650dSSadaf Ebrahimi
877*22dc650dSSadaf Ebrahimi else
878*22dc650dSSadaf Ebrahimi {
879*22dc650dSSadaf Ebrahimi LOADLITERAL:
880*22dc650dSSadaf Ebrahimi GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
881*22dc650dSSadaf Ebrahimi
882*22dc650dSSadaf Ebrahimi LITERAL:
883*22dc650dSSadaf Ebrahimi if (forcecase != 0)
884*22dc650dSSadaf Ebrahimi {
885*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
886*22dc650dSSadaf Ebrahimi if (utf || ucp)
887*22dc650dSSadaf Ebrahimi {
888*22dc650dSSadaf Ebrahimi uint32_t type = UCD_CHARTYPE(ch);
889*22dc650dSSadaf Ebrahimi if (PRIV(ucp_gentype)[type] == ucp_L &&
890*22dc650dSSadaf Ebrahimi type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
891*22dc650dSSadaf Ebrahimi ch = UCD_OTHERCASE(ch);
892*22dc650dSSadaf Ebrahimi }
893*22dc650dSSadaf Ebrahimi else
894*22dc650dSSadaf Ebrahimi #endif
895*22dc650dSSadaf Ebrahimi {
896*22dc650dSSadaf Ebrahimi if (((code->tables + cbits_offset +
897*22dc650dSSadaf Ebrahimi ((forcecase > 0)? cbit_upper:cbit_lower)
898*22dc650dSSadaf Ebrahimi )[ch/8] & (1u << (ch%8))) == 0)
899*22dc650dSSadaf Ebrahimi ch = (code->tables + fcc_offset)[ch];
900*22dc650dSSadaf Ebrahimi }
901*22dc650dSSadaf Ebrahimi forcecase = forcecasereset;
902*22dc650dSSadaf Ebrahimi }
903*22dc650dSSadaf Ebrahimi
904*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
905*22dc650dSSadaf Ebrahimi if (utf) chlen = PRIV(ord2utf)(ch, temp); else
906*22dc650dSSadaf Ebrahimi #endif
907*22dc650dSSadaf Ebrahimi {
908*22dc650dSSadaf Ebrahimi temp[0] = ch;
909*22dc650dSSadaf Ebrahimi chlen = 1;
910*22dc650dSSadaf Ebrahimi }
911*22dc650dSSadaf Ebrahimi CHECKMEMCPY(temp, chlen);
912*22dc650dSSadaf Ebrahimi } /* End handling a literal code unit */
913*22dc650dSSadaf Ebrahimi } /* End of loop for scanning the replacement. */
914*22dc650dSSadaf Ebrahimi
915*22dc650dSSadaf Ebrahimi /* The replacement has been copied to the output, or its size has been
916*22dc650dSSadaf Ebrahimi remembered. Do the callout if there is one and we have done an actual
917*22dc650dSSadaf Ebrahimi replacement. */
918*22dc650dSSadaf Ebrahimi
919*22dc650dSSadaf Ebrahimi if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
920*22dc650dSSadaf Ebrahimi {
921*22dc650dSSadaf Ebrahimi scb.subscount = subs;
922*22dc650dSSadaf Ebrahimi scb.output_offsets[1] = buff_offset;
923*22dc650dSSadaf Ebrahimi rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
924*22dc650dSSadaf Ebrahimi
925*22dc650dSSadaf Ebrahimi /* A non-zero return means cancel this substitution. Instead, copy the
926*22dc650dSSadaf Ebrahimi matched string fragment. */
927*22dc650dSSadaf Ebrahimi
928*22dc650dSSadaf Ebrahimi if (rc != 0)
929*22dc650dSSadaf Ebrahimi {
930*22dc650dSSadaf Ebrahimi PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
931*22dc650dSSadaf Ebrahimi PCRE2_SIZE oldlength = ovector[1] - ovector[0];
932*22dc650dSSadaf Ebrahimi
933*22dc650dSSadaf Ebrahimi buff_offset -= newlength;
934*22dc650dSSadaf Ebrahimi lengthleft += newlength;
935*22dc650dSSadaf Ebrahimi if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
936*22dc650dSSadaf Ebrahimi
937*22dc650dSSadaf Ebrahimi /* A negative return means do not do any more. */
938*22dc650dSSadaf Ebrahimi
939*22dc650dSSadaf Ebrahimi if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
940*22dc650dSSadaf Ebrahimi }
941*22dc650dSSadaf Ebrahimi }
942*22dc650dSSadaf Ebrahimi
943*22dc650dSSadaf Ebrahimi /* Save the details of this match. See above for how this data is used. If we
944*22dc650dSSadaf Ebrahimi matched an empty string, do the magic for global matches. Update the start
945*22dc650dSSadaf Ebrahimi offset to point to the rest of the subject string. If we re-used an existing
946*22dc650dSSadaf Ebrahimi match for the first match, switch to the internal match data block. */
947*22dc650dSSadaf Ebrahimi
948*22dc650dSSadaf Ebrahimi ovecsave[0] = ovector[0];
949*22dc650dSSadaf Ebrahimi ovecsave[1] = ovector[1];
950*22dc650dSSadaf Ebrahimi ovecsave[2] = start_offset;
951*22dc650dSSadaf Ebrahimi
952*22dc650dSSadaf Ebrahimi goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
953*22dc650dSSadaf Ebrahimi PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
954*22dc650dSSadaf Ebrahimi start_offset = ovector[1];
955*22dc650dSSadaf Ebrahimi } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
956*22dc650dSSadaf Ebrahimi
957*22dc650dSSadaf Ebrahimi /* Copy the rest of the subject unless not required, and terminate the output
958*22dc650dSSadaf Ebrahimi with a binary zero. */
959*22dc650dSSadaf Ebrahimi
960*22dc650dSSadaf Ebrahimi if (!replacement_only)
961*22dc650dSSadaf Ebrahimi {
962*22dc650dSSadaf Ebrahimi fraglength = length - start_offset;
963*22dc650dSSadaf Ebrahimi CHECKMEMCPY(subject + start_offset, fraglength);
964*22dc650dSSadaf Ebrahimi }
965*22dc650dSSadaf Ebrahimi
966*22dc650dSSadaf Ebrahimi temp[0] = 0;
967*22dc650dSSadaf Ebrahimi CHECKMEMCPY(temp, 1);
968*22dc650dSSadaf Ebrahimi
969*22dc650dSSadaf Ebrahimi /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
970*22dc650dSSadaf Ebrahimi and matching has carried on after a full buffer, in order to compute the length
971*22dc650dSSadaf Ebrahimi needed. Otherwise, an overflow generates an immediate error return. */
972*22dc650dSSadaf Ebrahimi
973*22dc650dSSadaf Ebrahimi if (overflowed)
974*22dc650dSSadaf Ebrahimi {
975*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_NOMEMORY;
976*22dc650dSSadaf Ebrahimi *blength = buff_length + extra_needed;
977*22dc650dSSadaf Ebrahimi }
978*22dc650dSSadaf Ebrahimi
979*22dc650dSSadaf Ebrahimi /* After a successful execution, return the number of substitutions and set the
980*22dc650dSSadaf Ebrahimi length of buffer used, excluding the trailing zero. */
981*22dc650dSSadaf Ebrahimi
982*22dc650dSSadaf Ebrahimi else
983*22dc650dSSadaf Ebrahimi {
984*22dc650dSSadaf Ebrahimi rc = subs;
985*22dc650dSSadaf Ebrahimi *blength = buff_offset - 1;
986*22dc650dSSadaf Ebrahimi }
987*22dc650dSSadaf Ebrahimi
988*22dc650dSSadaf Ebrahimi EXIT:
989*22dc650dSSadaf Ebrahimi if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
990*22dc650dSSadaf Ebrahimi else match_data->rc = rc;
991*22dc650dSSadaf Ebrahimi return rc;
992*22dc650dSSadaf Ebrahimi
993*22dc650dSSadaf Ebrahimi NOROOM:
994*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_NOMEMORY;
995*22dc650dSSadaf Ebrahimi goto EXIT;
996*22dc650dSSadaf Ebrahimi
997*22dc650dSSadaf Ebrahimi BAD:
998*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADREPLACEMENT;
999*22dc650dSSadaf Ebrahimi goto PTREXIT;
1000*22dc650dSSadaf Ebrahimi
1001*22dc650dSSadaf Ebrahimi BADESCAPE:
1002*22dc650dSSadaf Ebrahimi rc = PCRE2_ERROR_BADREPESCAPE;
1003*22dc650dSSadaf Ebrahimi
1004*22dc650dSSadaf Ebrahimi PTREXIT:
1005*22dc650dSSadaf Ebrahimi *blength = (PCRE2_SIZE)(ptr - replacement);
1006*22dc650dSSadaf Ebrahimi goto EXIT;
1007*22dc650dSSadaf Ebrahimi }
1008*22dc650dSSadaf Ebrahimi
1009*22dc650dSSadaf Ebrahimi /* End of pcre2_substitute.c */
1010