1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the sre.c file for information on usage and redistribution.
9  */
10 
11 /* String matching engine */
12 
13 /* This file is included three times, with different character settings */
14 
15 LOCAL(int)
SRE(at)16 SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17 {
18     /* check if pointer is at given position */
19 
20     Py_ssize_t thisp, thatp;
21 
22     switch (at) {
23 
24     case SRE_AT_BEGINNING:
25     case SRE_AT_BEGINNING_STRING:
26         return ((void*) ptr == state->beginning);
27 
28     case SRE_AT_BEGINNING_LINE:
29         return ((void*) ptr == state->beginning ||
30                 SRE_IS_LINEBREAK((int) ptr[-1]));
31 
32     case SRE_AT_END:
33         return (((SRE_CHAR *)state->end - ptr == 1 &&
34                  SRE_IS_LINEBREAK((int) ptr[0])) ||
35                 ((void*) ptr == state->end));
36 
37     case SRE_AT_END_LINE:
38         return ((void*) ptr == state->end ||
39                 SRE_IS_LINEBREAK((int) ptr[0]));
40 
41     case SRE_AT_END_STRING:
42         return ((void*) ptr == state->end);
43 
44     case SRE_AT_BOUNDARY:
45         if (state->beginning == state->end)
46             return 0;
47         thatp = ((void*) ptr > state->beginning) ?
48             SRE_IS_WORD((int) ptr[-1]) : 0;
49         thisp = ((void*) ptr < state->end) ?
50             SRE_IS_WORD((int) ptr[0]) : 0;
51         return thisp != thatp;
52 
53     case SRE_AT_NON_BOUNDARY:
54         if (state->beginning == state->end)
55             return 0;
56         thatp = ((void*) ptr > state->beginning) ?
57             SRE_IS_WORD((int) ptr[-1]) : 0;
58         thisp = ((void*) ptr < state->end) ?
59             SRE_IS_WORD((int) ptr[0]) : 0;
60         return thisp == thatp;
61 
62     case SRE_AT_LOC_BOUNDARY:
63         if (state->beginning == state->end)
64             return 0;
65         thatp = ((void*) ptr > state->beginning) ?
66             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67         thisp = ((void*) ptr < state->end) ?
68             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69         return thisp != thatp;
70 
71     case SRE_AT_LOC_NON_BOUNDARY:
72         if (state->beginning == state->end)
73             return 0;
74         thatp = ((void*) ptr > state->beginning) ?
75             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76         thisp = ((void*) ptr < state->end) ?
77             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78         return thisp == thatp;
79 
80     case SRE_AT_UNI_BOUNDARY:
81         if (state->beginning == state->end)
82             return 0;
83         thatp = ((void*) ptr > state->beginning) ?
84             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85         thisp = ((void*) ptr < state->end) ?
86             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87         return thisp != thatp;
88 
89     case SRE_AT_UNI_NON_BOUNDARY:
90         if (state->beginning == state->end)
91             return 0;
92         thatp = ((void*) ptr > state->beginning) ?
93             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94         thisp = ((void*) ptr < state->end) ?
95             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96         return thisp == thatp;
97 
98     }
99 
100     return 0;
101 }
102 
103 LOCAL(int)
SRE(charset)104 SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
105 {
106     /* check if character is a member of the given set */
107 
108     int ok = 1;
109 
110     for (;;) {
111         switch (*set++) {
112 
113         case SRE_OP_FAILURE:
114             return !ok;
115 
116         case SRE_OP_LITERAL:
117             /* <LITERAL> <code> */
118             if (ch == set[0])
119                 return ok;
120             set++;
121             break;
122 
123         case SRE_OP_CATEGORY:
124             /* <CATEGORY> <code> */
125             if (sre_category(set[0], (int) ch))
126                 return ok;
127             set++;
128             break;
129 
130         case SRE_OP_CHARSET:
131             /* <CHARSET> <bitmap> */
132             if (ch < 256 &&
133                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                 return ok;
135             set += 256/SRE_CODE_BITS;
136             break;
137 
138         case SRE_OP_RANGE:
139             /* <RANGE> <lower> <upper> */
140             if (set[0] <= ch && ch <= set[1])
141                 return ok;
142             set += 2;
143             break;
144 
145         case SRE_OP_RANGE_UNI_IGNORE:
146             /* <RANGE_UNI_IGNORE> <lower> <upper> */
147         {
148             SRE_CODE uch;
149             /* ch is already lower cased */
150             if (set[0] <= ch && ch <= set[1])
151                 return ok;
152             uch = sre_upper_unicode(ch);
153             if (set[0] <= uch && uch <= set[1])
154                 return ok;
155             set += 2;
156             break;
157         }
158 
159         case SRE_OP_NEGATE:
160             ok = !ok;
161             break;
162 
163         case SRE_OP_BIGCHARSET:
164             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165         {
166             Py_ssize_t count, block;
167             count = *(set++);
168 
169             if (ch < 0x10000u)
170                 block = ((unsigned char*)set)[ch >> 8];
171             else
172                 block = -1;
173             set += 256/sizeof(SRE_CODE);
174             if (block >=0 &&
175                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                     (1u << (ch & (SRE_CODE_BITS-1)))))
177                 return ok;
178             set += count * (256/SRE_CODE_BITS);
179             break;
180         }
181 
182         default:
183             /* internal error -- there's not much we can do about it
184                here, so let's just pretend it didn't match... */
185             return 0;
186         }
187     }
188 }
189 
190 LOCAL(int)
SRE(charset_loc_ignore)191 SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
192 {
193     SRE_CODE lo, up;
194     lo = sre_lower_locale(ch);
195     if (SRE(charset)(state, set, lo))
196        return 1;
197 
198     up = sre_upper_locale(ch);
199     return up != lo && SRE(charset)(state, set, up);
200 }
201 
202 LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
203 
204 LOCAL(Py_ssize_t)
SRE(count)205 SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
206 {
207     SRE_CODE chr;
208     SRE_CHAR c;
209     const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
210     const SRE_CHAR* end = (const SRE_CHAR *)state->end;
211     Py_ssize_t i;
212 
213     /* adjust end */
214     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215         end = ptr + maxcount;
216 
217     switch (pattern[0]) {
218 
219     case SRE_OP_IN:
220         /* repeated set */
221         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223             ptr++;
224         break;
225 
226     case SRE_OP_ANY:
227         /* repeated dot wildcard. */
228         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230             ptr++;
231         break;
232 
233     case SRE_OP_ANY_ALL:
234         /* repeated dot wildcard.  skip to the end of the target
235            string, and backtrack from there */
236         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237         ptr = end;
238         break;
239 
240     case SRE_OP_LITERAL:
241         /* repeated literal */
242         chr = pattern[1];
243         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244         c = (SRE_CHAR) chr;
245 #if SIZEOF_SRE_CHAR < 4
246         if ((SRE_CODE) c != chr)
247             ; /* literal can't match: doesn't fit in char width */
248         else
249 #endif
250         while (ptr < end && *ptr == c)
251             ptr++;
252         break;
253 
254     case SRE_OP_LITERAL_IGNORE:
255         /* repeated literal */
256         chr = pattern[1];
257         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259             ptr++;
260         break;
261 
262     case SRE_OP_LITERAL_UNI_IGNORE:
263         /* repeated literal */
264         chr = pattern[1];
265         TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267             ptr++;
268         break;
269 
270     case SRE_OP_LITERAL_LOC_IGNORE:
271         /* repeated literal */
272         chr = pattern[1];
273         TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274         while (ptr < end && char_loc_ignore(chr, *ptr))
275             ptr++;
276         break;
277 
278     case SRE_OP_NOT_LITERAL:
279         /* repeated non-literal */
280         chr = pattern[1];
281         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282         c = (SRE_CHAR) chr;
283 #if SIZEOF_SRE_CHAR < 4
284         if ((SRE_CODE) c != chr)
285             ptr = end; /* literal can't match: doesn't fit in char width */
286         else
287 #endif
288         while (ptr < end && *ptr != c)
289             ptr++;
290         break;
291 
292     case SRE_OP_NOT_LITERAL_IGNORE:
293         /* repeated non-literal */
294         chr = pattern[1];
295         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297             ptr++;
298         break;
299 
300     case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301         /* repeated non-literal */
302         chr = pattern[1];
303         TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305             ptr++;
306         break;
307 
308     case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309         /* repeated non-literal */
310         chr = pattern[1];
311         TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312         while (ptr < end && !char_loc_ignore(chr, *ptr))
313             ptr++;
314         break;
315 
316     default:
317         /* repeated single character pattern */
318         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319         while ((SRE_CHAR*) state->ptr < end) {
320             i = SRE(match)(state, pattern, 0);
321             if (i < 0)
322                 return i;
323             if (!i)
324                 break;
325         }
326         TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
327                (SRE_CHAR*) state->ptr - ptr));
328         return (SRE_CHAR*) state->ptr - ptr;
329     }
330 
331     TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
332            ptr - (SRE_CHAR*) state->ptr));
333     return ptr - (SRE_CHAR*) state->ptr;
334 }
335 
336 /* The macros below should be used to protect recursive SRE(match)()
337  * calls that *failed* and do *not* return immediately (IOW, those
338  * that will backtrack). Explaining:
339  *
340  * - Recursive SRE(match)() returned true: that's usually a success
341  *   (besides atypical cases like ASSERT_NOT), therefore there's no
342  *   reason to restore lastmark;
343  *
344  * - Recursive SRE(match)() returned false but the current SRE(match)()
345  *   is returning to the caller: If the current SRE(match)() is the
346  *   top function of the recursion, returning false will be a matching
347  *   failure, and it doesn't matter where lastmark is pointing to.
348  *   If it's *not* the top function, it will be a recursive SRE(match)()
349  *   failure by itself, and the calling SRE(match)() will have to deal
350  *   with the failure by the same rules explained here (it will restore
351  *   lastmark by itself if necessary);
352  *
353  * - Recursive SRE(match)() returned false, and will continue the
354  *   outside 'for' loop: must be protected when breaking, since the next
355  *   OP could potentially depend on lastmark;
356  *
357  * - Recursive SRE(match)() returned false, and will be called again
358  *   inside a local for/while loop: must be protected between each
359  *   loop iteration, since the recursive SRE(match)() could do anything,
360  *   and could potentially depend on lastmark.
361  *
362  * For more information, check the discussion at SF patch #712900.
363  */
364 #define LASTMARK_SAVE()     \
365     do { \
366         ctx->lastmark = state->lastmark; \
367         ctx->lastindex = state->lastindex; \
368     } while (0)
369 #define LASTMARK_RESTORE()  \
370     do { \
371         state->lastmark = ctx->lastmark; \
372         state->lastindex = ctx->lastindex; \
373     } while (0)
374 
375 #define RETURN_ERROR(i) do { return i; } while(0)
376 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
377 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
378 
379 #define RETURN_ON_ERROR(i) \
380     do { if (i < 0) RETURN_ERROR(i); } while (0)
381 #define RETURN_ON_SUCCESS(i) \
382     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
383 #define RETURN_ON_FAILURE(i) \
384     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
385 
386 #define DATA_STACK_ALLOC(state, type, ptr) \
387 do { \
388     alloc_pos = state->data_stack_base; \
389     TRACE(("allocating %s in %zd (%zd)\n", \
390            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
391     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
392         int j = data_stack_grow(state, sizeof(type)); \
393         if (j < 0) return j; \
394         if (ctx_pos != -1) \
395             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
396     } \
397     ptr = (type*)(state->data_stack+alloc_pos); \
398     state->data_stack_base += sizeof(type); \
399 } while (0)
400 
401 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
402 do { \
403     TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
404     ptr = (type*)(state->data_stack+pos); \
405 } while (0)
406 
407 #define DATA_STACK_PUSH(state, data, size) \
408 do { \
409     TRACE(("copy data in %p to %zd (%zd)\n", \
410            data, state->data_stack_base, size)); \
411     if (size > state->data_stack_size - state->data_stack_base) { \
412         int j = data_stack_grow(state, size); \
413         if (j < 0) return j; \
414         if (ctx_pos != -1) \
415             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
416     } \
417     memcpy(state->data_stack+state->data_stack_base, data, size); \
418     state->data_stack_base += size; \
419 } while (0)
420 
421 /* We add an explicit cast to memcpy here because MSVC has a bug when
422    compiling C code where it believes that `const void**` cannot be
423    safely casted to `void*`, see bpo-39943 for details. */
424 #define DATA_STACK_POP(state, data, size, discard) \
425 do { \
426     TRACE(("copy data to %p from %zd (%zd)\n", \
427            data, state->data_stack_base-size, size)); \
428     memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
429     if (discard) \
430         state->data_stack_base -= size; \
431 } while (0)
432 
433 #define DATA_STACK_POP_DISCARD(state, size) \
434 do { \
435     TRACE(("discard data from %zd (%zd)\n", \
436            state->data_stack_base-size, size)); \
437     state->data_stack_base -= size; \
438 } while(0)
439 
440 #define DATA_PUSH(x) \
441     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
442 #define DATA_POP(x) \
443     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
444 #define DATA_POP_DISCARD(x) \
445     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
446 #define DATA_ALLOC(t,p) \
447     DATA_STACK_ALLOC(state, t, p)
448 #define DATA_LOOKUP_AT(t,p,pos) \
449     DATA_STACK_LOOKUP_AT(state,t,p,pos)
450 
451 #define MARK_PUSH(lastmark) \
452     do if (lastmark >= 0) { \
453         size_t _marks_size = (lastmark+1) * sizeof(void*); \
454         DATA_STACK_PUSH(state, state->mark, _marks_size); \
455     } while (0)
456 #define MARK_POP(lastmark) \
457     do if (lastmark >= 0) { \
458         size_t _marks_size = (lastmark+1) * sizeof(void*); \
459         DATA_STACK_POP(state, state->mark, _marks_size, 1); \
460     } while (0)
461 #define MARK_POP_KEEP(lastmark) \
462     do if (lastmark >= 0) { \
463         size_t _marks_size = (lastmark+1) * sizeof(void*); \
464         DATA_STACK_POP(state, state->mark, _marks_size, 0); \
465     } while (0)
466 #define MARK_POP_DISCARD(lastmark) \
467     do if (lastmark >= 0) { \
468         size_t _marks_size = (lastmark+1) * sizeof(void*); \
469         DATA_STACK_POP_DISCARD(state, _marks_size); \
470     } while (0)
471 
472 #define JUMP_NONE            0
473 #define JUMP_MAX_UNTIL_1     1
474 #define JUMP_MAX_UNTIL_2     2
475 #define JUMP_MAX_UNTIL_3     3
476 #define JUMP_MIN_UNTIL_1     4
477 #define JUMP_MIN_UNTIL_2     5
478 #define JUMP_MIN_UNTIL_3     6
479 #define JUMP_REPEAT          7
480 #define JUMP_REPEAT_ONE_1    8
481 #define JUMP_REPEAT_ONE_2    9
482 #define JUMP_MIN_REPEAT_ONE  10
483 #define JUMP_BRANCH          11
484 #define JUMP_ASSERT          12
485 #define JUMP_ASSERT_NOT      13
486 #define JUMP_POSS_REPEAT_1   14
487 #define JUMP_POSS_REPEAT_2   15
488 #define JUMP_ATOMIC_GROUP    16
489 
490 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
491     ctx->pattern = pattern; \
492     ctx->ptr = ptr; \
493     DATA_ALLOC(SRE(match_context), nextctx); \
494     nextctx->pattern = nextpattern; \
495     nextctx->toplevel = toplevel_; \
496     nextctx->jump = jumpvalue; \
497     nextctx->last_ctx_pos = ctx_pos; \
498     pattern = nextpattern; \
499     ctx_pos = alloc_pos; \
500     ctx = nextctx; \
501     goto entrance; \
502     jumplabel: \
503     pattern = ctx->pattern; \
504     ptr = ctx->ptr;
505 
506 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
507     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
508 
509 #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
510     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
511 
512 typedef struct {
513     Py_ssize_t count;
514     union {
515         SRE_CODE chr;
516         SRE_REPEAT* rep;
517     } u;
518     int lastmark;
519     int lastindex;
520     const SRE_CODE* pattern;
521     const SRE_CHAR* ptr;
522     int toplevel;
523     int jump;
524     Py_ssize_t last_ctx_pos;
525 } SRE(match_context);
526 
527 #define MAYBE_CHECK_SIGNALS                                        \
528     do {                                                           \
529         if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
530             RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
531         }                                                          \
532     } while (0)
533 
534 #ifdef HAVE_COMPUTED_GOTOS
535     #ifndef USE_COMPUTED_GOTOS
536     #define USE_COMPUTED_GOTOS 1
537     #endif
538 #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
539     #error "Computed gotos are not supported on this compiler."
540 #else
541     #undef USE_COMPUTED_GOTOS
542     #define USE_COMPUTED_GOTOS 0
543 #endif
544 
545 #if USE_COMPUTED_GOTOS
546     #define TARGET(OP) TARGET_ ## OP
547     #define DISPATCH                       \
548         do {                               \
549             MAYBE_CHECK_SIGNALS;           \
550             goto *sre_targets[*pattern++]; \
551         } while (0)
552 #else
553     #define TARGET(OP) case OP
554     #define DISPATCH goto dispatch
555 #endif
556 
557 /* check if string matches the given pattern.  returns <0 for
558    error, 0 for failure, and 1 for success */
559 LOCAL(Py_ssize_t)
SRE(match)560 SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
561 {
562     const SRE_CHAR* end = (const SRE_CHAR *)state->end;
563     Py_ssize_t alloc_pos, ctx_pos = -1;
564     Py_ssize_t ret = 0;
565     int jump;
566     unsigned int sigcount=0;
567 
568     SRE(match_context)* ctx;
569     SRE(match_context)* nextctx;
570 
571     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
572 
573     DATA_ALLOC(SRE(match_context), ctx);
574     ctx->last_ctx_pos = -1;
575     ctx->jump = JUMP_NONE;
576     ctx->toplevel = toplevel;
577     ctx_pos = alloc_pos;
578 
579 #if USE_COMPUTED_GOTOS
580 #include "sre_targets.h"
581 #endif
582 
583 entrance:
584 
585     ;  // Fashion statement.
586     const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
587 
588     if (pattern[0] == SRE_OP_INFO) {
589         /* optimization info block */
590         /* <INFO> <1=skip> <2=flags> <3=min> ... */
591         if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
592             TRACE(("reject (got %zd chars, need %zd)\n",
593                    end - ptr, (Py_ssize_t) pattern[3]));
594             RETURN_FAILURE;
595         }
596         pattern += pattern[1] + 1;
597     }
598 
599 #if USE_COMPUTED_GOTOS
600     DISPATCH;
601 #else
602 dispatch:
603     MAYBE_CHECK_SIGNALS;
604     switch (*pattern++)
605 #endif
606     {
607 
608         TARGET(SRE_OP_MARK):
609             /* set mark */
610             /* <MARK> <gid> */
611             TRACE(("|%p|%p|MARK %d\n", pattern,
612                    ptr, pattern[0]));
613             {
614                 int i = pattern[0];
615                 if (i & 1)
616                     state->lastindex = i/2 + 1;
617                 if (i > state->lastmark) {
618                     /* state->lastmark is the highest valid index in the
619                        state->mark array.  If it is increased by more than 1,
620                        the intervening marks must be set to NULL to signal
621                        that these marks have not been encountered. */
622                     int j = state->lastmark + 1;
623                     while (j < i)
624                         state->mark[j++] = NULL;
625                     state->lastmark = i;
626                 }
627                 state->mark[i] = ptr;
628             }
629             pattern++;
630             DISPATCH;
631 
632         TARGET(SRE_OP_LITERAL):
633             /* match literal string */
634             /* <LITERAL> <code> */
635             TRACE(("|%p|%p|LITERAL %d\n", pattern,
636                    ptr, *pattern));
637             if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
638                 RETURN_FAILURE;
639             pattern++;
640             ptr++;
641             DISPATCH;
642 
643         TARGET(SRE_OP_NOT_LITERAL):
644             /* match anything that is not literal character */
645             /* <NOT_LITERAL> <code> */
646             TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
647                    ptr, *pattern));
648             if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
649                 RETURN_FAILURE;
650             pattern++;
651             ptr++;
652             DISPATCH;
653 
654         TARGET(SRE_OP_SUCCESS):
655             /* end of pattern */
656             TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
657             if (ctx->toplevel &&
658                 ((state->match_all && ptr != state->end) ||
659                  (state->must_advance && ptr == state->start)))
660             {
661                 RETURN_FAILURE;
662             }
663             state->ptr = ptr;
664             RETURN_SUCCESS;
665 
666         TARGET(SRE_OP_AT):
667             /* match at given position */
668             /* <AT> <code> */
669             TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
670             if (!SRE(at)(state, ptr, *pattern))
671                 RETURN_FAILURE;
672             pattern++;
673             DISPATCH;
674 
675         TARGET(SRE_OP_CATEGORY):
676             /* match at given category */
677             /* <CATEGORY> <code> */
678             TRACE(("|%p|%p|CATEGORY %d\n", pattern,
679                    ptr, *pattern));
680             if (ptr >= end || !sre_category(pattern[0], ptr[0]))
681                 RETURN_FAILURE;
682             pattern++;
683             ptr++;
684             DISPATCH;
685 
686         TARGET(SRE_OP_ANY):
687             /* match anything (except a newline) */
688             /* <ANY> */
689             TRACE(("|%p|%p|ANY\n", pattern, ptr));
690             if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
691                 RETURN_FAILURE;
692             ptr++;
693             DISPATCH;
694 
695         TARGET(SRE_OP_ANY_ALL):
696             /* match anything */
697             /* <ANY_ALL> */
698             TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
699             if (ptr >= end)
700                 RETURN_FAILURE;
701             ptr++;
702             DISPATCH;
703 
704         TARGET(SRE_OP_IN):
705             /* match set member (or non_member) */
706             /* <IN> <skip> <set> */
707             TRACE(("|%p|%p|IN\n", pattern, ptr));
708             if (ptr >= end ||
709                 !SRE(charset)(state, pattern + 1, *ptr))
710                 RETURN_FAILURE;
711             pattern += pattern[0];
712             ptr++;
713             DISPATCH;
714 
715         TARGET(SRE_OP_LITERAL_IGNORE):
716             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
717                    pattern, ptr, pattern[0]));
718             if (ptr >= end ||
719                 sre_lower_ascii(*ptr) != *pattern)
720                 RETURN_FAILURE;
721             pattern++;
722             ptr++;
723             DISPATCH;
724 
725         TARGET(SRE_OP_LITERAL_UNI_IGNORE):
726             TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
727                    pattern, ptr, pattern[0]));
728             if (ptr >= end ||
729                 sre_lower_unicode(*ptr) != *pattern)
730                 RETURN_FAILURE;
731             pattern++;
732             ptr++;
733             DISPATCH;
734 
735         TARGET(SRE_OP_LITERAL_LOC_IGNORE):
736             TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
737                    pattern, ptr, pattern[0]));
738             if (ptr >= end
739                 || !char_loc_ignore(*pattern, *ptr))
740                 RETURN_FAILURE;
741             pattern++;
742             ptr++;
743             DISPATCH;
744 
745         TARGET(SRE_OP_NOT_LITERAL_IGNORE):
746             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
747                    pattern, ptr, *pattern));
748             if (ptr >= end ||
749                 sre_lower_ascii(*ptr) == *pattern)
750                 RETURN_FAILURE;
751             pattern++;
752             ptr++;
753             DISPATCH;
754 
755         TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
756             TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
757                    pattern, ptr, *pattern));
758             if (ptr >= end ||
759                 sre_lower_unicode(*ptr) == *pattern)
760                 RETURN_FAILURE;
761             pattern++;
762             ptr++;
763             DISPATCH;
764 
765         TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
766             TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
767                    pattern, ptr, *pattern));
768             if (ptr >= end
769                 || char_loc_ignore(*pattern, *ptr))
770                 RETURN_FAILURE;
771             pattern++;
772             ptr++;
773             DISPATCH;
774 
775         TARGET(SRE_OP_IN_IGNORE):
776             TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
777             if (ptr >= end
778                 || !SRE(charset)(state, pattern+1,
779                                  (SRE_CODE)sre_lower_ascii(*ptr)))
780                 RETURN_FAILURE;
781             pattern += pattern[0];
782             ptr++;
783             DISPATCH;
784 
785         TARGET(SRE_OP_IN_UNI_IGNORE):
786             TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
787             if (ptr >= end
788                 || !SRE(charset)(state, pattern+1,
789                                  (SRE_CODE)sre_lower_unicode(*ptr)))
790                 RETURN_FAILURE;
791             pattern += pattern[0];
792             ptr++;
793             DISPATCH;
794 
795         TARGET(SRE_OP_IN_LOC_IGNORE):
796             TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
797             if (ptr >= end
798                 || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
799                 RETURN_FAILURE;
800             pattern += pattern[0];
801             ptr++;
802             DISPATCH;
803 
804         TARGET(SRE_OP_JUMP):
805         TARGET(SRE_OP_INFO):
806             /* jump forward */
807             /* <JUMP> <offset> */
808             TRACE(("|%p|%p|JUMP %d\n", pattern,
809                    ptr, pattern[0]));
810             pattern += pattern[0];
811             DISPATCH;
812 
813         TARGET(SRE_OP_BRANCH):
814             /* alternation */
815             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
816             TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
817             LASTMARK_SAVE();
818             if (state->repeat)
819                 MARK_PUSH(ctx->lastmark);
820             for (; pattern[0]; pattern += pattern[0]) {
821                 if (pattern[1] == SRE_OP_LITERAL &&
822                     (ptr >= end ||
823                      (SRE_CODE) *ptr != pattern[2]))
824                     continue;
825                 if (pattern[1] == SRE_OP_IN &&
826                     (ptr >= end ||
827                      !SRE(charset)(state, pattern + 3,
828                                    (SRE_CODE) *ptr)))
829                     continue;
830                 state->ptr = ptr;
831                 DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
832                 if (ret) {
833                     if (state->repeat)
834                         MARK_POP_DISCARD(ctx->lastmark);
835                     RETURN_ON_ERROR(ret);
836                     RETURN_SUCCESS;
837                 }
838                 if (state->repeat)
839                     MARK_POP_KEEP(ctx->lastmark);
840                 LASTMARK_RESTORE();
841             }
842             if (state->repeat)
843                 MARK_POP_DISCARD(ctx->lastmark);
844             RETURN_FAILURE;
845 
846         TARGET(SRE_OP_REPEAT_ONE):
847             /* match repeated sequence (maximizing regexp) */
848 
849             /* this operator only works if the repeated item is
850                exactly one character wide, and we're not already
851                collecting backtracking points.  for other cases,
852                use the MAX_REPEAT operator */
853 
854             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
855 
856             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
857                    pattern[1], pattern[2]));
858 
859             if ((Py_ssize_t) pattern[1] > end - ptr)
860                 RETURN_FAILURE; /* cannot match */
861 
862             state->ptr = ptr;
863 
864             ret = SRE(count)(state, pattern+3, pattern[2]);
865             RETURN_ON_ERROR(ret);
866             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
867             ctx->count = ret;
868             ptr += ctx->count;
869 
870             /* when we arrive here, count contains the number of
871                matches, and ptr points to the tail of the target
872                string.  check if the rest of the pattern matches,
873                and backtrack if not. */
874 
875             if (ctx->count < (Py_ssize_t) pattern[1])
876                 RETURN_FAILURE;
877 
878             if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
879                 ptr == state->end &&
880                 !(ctx->toplevel && state->must_advance && ptr == state->start))
881             {
882                 /* tail is empty.  we're finished */
883                 state->ptr = ptr;
884                 RETURN_SUCCESS;
885             }
886 
887             LASTMARK_SAVE();
888             if (state->repeat)
889                 MARK_PUSH(ctx->lastmark);
890 
891             if (pattern[pattern[0]] == SRE_OP_LITERAL) {
892                 /* tail starts with a literal. skip positions where
893                    the rest of the pattern cannot possibly match */
894                 ctx->u.chr = pattern[pattern[0]+1];
895                 for (;;) {
896                     while (ctx->count >= (Py_ssize_t) pattern[1] &&
897                            (ptr >= end || *ptr != ctx->u.chr)) {
898                         ptr--;
899                         ctx->count--;
900                     }
901                     if (ctx->count < (Py_ssize_t) pattern[1])
902                         break;
903                     state->ptr = ptr;
904                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
905                             pattern+pattern[0]);
906                     if (ret) {
907                         if (state->repeat)
908                             MARK_POP_DISCARD(ctx->lastmark);
909                         RETURN_ON_ERROR(ret);
910                         RETURN_SUCCESS;
911                     }
912                     if (state->repeat)
913                         MARK_POP_KEEP(ctx->lastmark);
914                     LASTMARK_RESTORE();
915 
916                     ptr--;
917                     ctx->count--;
918                 }
919                 if (state->repeat)
920                     MARK_POP_DISCARD(ctx->lastmark);
921             } else {
922                 /* general case */
923                 while (ctx->count >= (Py_ssize_t) pattern[1]) {
924                     state->ptr = ptr;
925                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
926                             pattern+pattern[0]);
927                     if (ret) {
928                         if (state->repeat)
929                             MARK_POP_DISCARD(ctx->lastmark);
930                         RETURN_ON_ERROR(ret);
931                         RETURN_SUCCESS;
932                     }
933                     if (state->repeat)
934                         MARK_POP_KEEP(ctx->lastmark);
935                     LASTMARK_RESTORE();
936 
937                     ptr--;
938                     ctx->count--;
939                 }
940                 if (state->repeat)
941                     MARK_POP_DISCARD(ctx->lastmark);
942             }
943             RETURN_FAILURE;
944 
945         TARGET(SRE_OP_MIN_REPEAT_ONE):
946             /* match repeated sequence (minimizing regexp) */
947 
948             /* this operator only works if the repeated item is
949                exactly one character wide, and we're not already
950                collecting backtracking points.  for other cases,
951                use the MIN_REPEAT operator */
952 
953             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
954 
955             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
956                    pattern[1], pattern[2]));
957 
958             if ((Py_ssize_t) pattern[1] > end - ptr)
959                 RETURN_FAILURE; /* cannot match */
960 
961             state->ptr = ptr;
962 
963             if (pattern[1] == 0)
964                 ctx->count = 0;
965             else {
966                 /* count using pattern min as the maximum */
967                 ret = SRE(count)(state, pattern+3, pattern[1]);
968                 RETURN_ON_ERROR(ret);
969                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
970                 if (ret < (Py_ssize_t) pattern[1])
971                     /* didn't match minimum number of times */
972                     RETURN_FAILURE;
973                 /* advance past minimum matches of repeat */
974                 ctx->count = ret;
975                 ptr += ctx->count;
976             }
977 
978             if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
979                 !(ctx->toplevel &&
980                   ((state->match_all && ptr != state->end) ||
981                    (state->must_advance && ptr == state->start))))
982             {
983                 /* tail is empty.  we're finished */
984                 state->ptr = ptr;
985                 RETURN_SUCCESS;
986 
987             } else {
988                 /* general case */
989                 LASTMARK_SAVE();
990                 if (state->repeat)
991                     MARK_PUSH(ctx->lastmark);
992 
993                 while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
994                        || ctx->count <= (Py_ssize_t)pattern[2]) {
995                     state->ptr = ptr;
996                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
997                             pattern+pattern[0]);
998                     if (ret) {
999                         if (state->repeat)
1000                             MARK_POP_DISCARD(ctx->lastmark);
1001                         RETURN_ON_ERROR(ret);
1002                         RETURN_SUCCESS;
1003                     }
1004                     if (state->repeat)
1005                         MARK_POP_KEEP(ctx->lastmark);
1006                     LASTMARK_RESTORE();
1007 
1008                     state->ptr = ptr;
1009                     ret = SRE(count)(state, pattern+3, 1);
1010                     RETURN_ON_ERROR(ret);
1011                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1012                     if (ret == 0)
1013                         break;
1014                     assert(ret == 1);
1015                     ptr++;
1016                     ctx->count++;
1017                 }
1018                 if (state->repeat)
1019                     MARK_POP_DISCARD(ctx->lastmark);
1020             }
1021             RETURN_FAILURE;
1022 
1023         TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1024             /* match repeated sequence (maximizing regexp) without
1025                backtracking */
1026 
1027             /* this operator only works if the repeated item is
1028                exactly one character wide, and we're not already
1029                collecting backtracking points.  for other cases,
1030                use the MAX_REPEAT operator */
1031 
1032             /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1033                tail */
1034 
1035             TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1036                    ptr, pattern[1], pattern[2]));
1037 
1038             if (ptr + pattern[1] > end) {
1039                 RETURN_FAILURE; /* cannot match */
1040             }
1041 
1042             state->ptr = ptr;
1043 
1044             ret = SRE(count)(state, pattern + 3, pattern[2]);
1045             RETURN_ON_ERROR(ret);
1046             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1047             ctx->count = ret;
1048             ptr += ctx->count;
1049 
1050             /* when we arrive here, count contains the number of
1051                matches, and ptr points to the tail of the target
1052                string.  check if the rest of the pattern matches,
1053                and fail if not. */
1054 
1055             /* Test for not enough repetitions in match */
1056             if (ctx->count < (Py_ssize_t) pattern[1]) {
1057                 RETURN_FAILURE;
1058             }
1059 
1060             /* Update the pattern to point to the next op code */
1061             pattern += pattern[0];
1062 
1063             /* Let the tail be evaluated separately and consider this
1064                match successful. */
1065             if (*pattern == SRE_OP_SUCCESS &&
1066                 ptr == state->end &&
1067                 !(ctx->toplevel && state->must_advance && ptr == state->start))
1068             {
1069                 /* tail is empty.  we're finished */
1070                 state->ptr = ptr;
1071                 RETURN_SUCCESS;
1072             }
1073 
1074             /* Attempt to match the rest of the string */
1075             DISPATCH;
1076 
1077         TARGET(SRE_OP_REPEAT):
1078             /* create repeat context.  all the hard work is done
1079                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1080             /* <REPEAT> <skip> <1=min> <2=max>
1081                <3=repeat_index> item <UNTIL> tail */
1082             TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1083                    pattern[1], pattern[2]));
1084 
1085             /* install new repeat context */
1086             /* TODO(https://github.com/python/cpython/issues/67877): Fix this
1087              * potential memory leak. */
1088             ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
1089             if (!ctx->u.rep) {
1090                 PyErr_NoMemory();
1091                 RETURN_FAILURE;
1092             }
1093             ctx->u.rep->count = -1;
1094             ctx->u.rep->pattern = pattern;
1095             ctx->u.rep->prev = state->repeat;
1096             ctx->u.rep->last_ptr = NULL;
1097             state->repeat = ctx->u.rep;
1098 
1099             state->ptr = ptr;
1100             DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1101             state->repeat = ctx->u.rep->prev;
1102             PyObject_Free(ctx->u.rep);
1103 
1104             if (ret) {
1105                 RETURN_ON_ERROR(ret);
1106                 RETURN_SUCCESS;
1107             }
1108             RETURN_FAILURE;
1109 
1110         TARGET(SRE_OP_MAX_UNTIL):
1111             /* maximizing repeat */
1112             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1113 
1114             /* FIXME: we probably need to deal with zero-width
1115                matches in here... */
1116 
1117             ctx->u.rep = state->repeat;
1118             if (!ctx->u.rep)
1119                 RETURN_ERROR(SRE_ERROR_STATE);
1120 
1121             state->ptr = ptr;
1122 
1123             ctx->count = ctx->u.rep->count+1;
1124 
1125             TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1126                    ptr, ctx->count));
1127 
1128             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1129                 /* not enough matches */
1130                 ctx->u.rep->count = ctx->count;
1131                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1132                         ctx->u.rep->pattern+3);
1133                 if (ret) {
1134                     RETURN_ON_ERROR(ret);
1135                     RETURN_SUCCESS;
1136                 }
1137                 ctx->u.rep->count = ctx->count-1;
1138                 state->ptr = ptr;
1139                 RETURN_FAILURE;
1140             }
1141 
1142             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1143                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1144                 state->ptr != ctx->u.rep->last_ptr) {
1145                 /* we may have enough matches, but if we can
1146                    match another item, do so */
1147                 ctx->u.rep->count = ctx->count;
1148                 LASTMARK_SAVE();
1149                 MARK_PUSH(ctx->lastmark);
1150                 /* zero-width match protection */
1151                 DATA_PUSH(&ctx->u.rep->last_ptr);
1152                 ctx->u.rep->last_ptr = state->ptr;
1153                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1154                         ctx->u.rep->pattern+3);
1155                 DATA_POP(&ctx->u.rep->last_ptr);
1156                 if (ret) {
1157                     MARK_POP_DISCARD(ctx->lastmark);
1158                     RETURN_ON_ERROR(ret);
1159                     RETURN_SUCCESS;
1160                 }
1161                 MARK_POP(ctx->lastmark);
1162                 LASTMARK_RESTORE();
1163                 ctx->u.rep->count = ctx->count-1;
1164                 state->ptr = ptr;
1165             }
1166 
1167             /* cannot match more repeated items here.  make sure the
1168                tail matches */
1169             state->repeat = ctx->u.rep->prev;
1170             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1171             state->repeat = ctx->u.rep; // restore repeat before return
1172 
1173             RETURN_ON_SUCCESS(ret);
1174             state->ptr = ptr;
1175             RETURN_FAILURE;
1176 
1177         TARGET(SRE_OP_MIN_UNTIL):
1178             /* minimizing repeat */
1179             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1180 
1181             ctx->u.rep = state->repeat;
1182             if (!ctx->u.rep)
1183                 RETURN_ERROR(SRE_ERROR_STATE);
1184 
1185             state->ptr = ptr;
1186 
1187             ctx->count = ctx->u.rep->count+1;
1188 
1189             TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1190                    ptr, ctx->count, ctx->u.rep->pattern));
1191 
1192             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1193                 /* not enough matches */
1194                 ctx->u.rep->count = ctx->count;
1195                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1196                         ctx->u.rep->pattern+3);
1197                 if (ret) {
1198                     RETURN_ON_ERROR(ret);
1199                     RETURN_SUCCESS;
1200                 }
1201                 ctx->u.rep->count = ctx->count-1;
1202                 state->ptr = ptr;
1203                 RETURN_FAILURE;
1204             }
1205 
1206             /* see if the tail matches */
1207             state->repeat = ctx->u.rep->prev;
1208 
1209             LASTMARK_SAVE();
1210             if (state->repeat)
1211                 MARK_PUSH(ctx->lastmark);
1212 
1213             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1214             SRE_REPEAT *repeat_of_tail = state->repeat;
1215             state->repeat = ctx->u.rep; // restore repeat before return
1216 
1217             if (ret) {
1218                 if (repeat_of_tail)
1219                     MARK_POP_DISCARD(ctx->lastmark);
1220                 RETURN_ON_ERROR(ret);
1221                 RETURN_SUCCESS;
1222             }
1223             if (repeat_of_tail)
1224                 MARK_POP(ctx->lastmark);
1225             LASTMARK_RESTORE();
1226 
1227             state->ptr = ptr;
1228 
1229             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1230                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1231                 state->ptr == ctx->u.rep->last_ptr)
1232                 RETURN_FAILURE;
1233 
1234             ctx->u.rep->count = ctx->count;
1235             /* zero-width match protection */
1236             DATA_PUSH(&ctx->u.rep->last_ptr);
1237             ctx->u.rep->last_ptr = state->ptr;
1238             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1239                     ctx->u.rep->pattern+3);
1240             DATA_POP(&ctx->u.rep->last_ptr);
1241             if (ret) {
1242                 RETURN_ON_ERROR(ret);
1243                 RETURN_SUCCESS;
1244             }
1245             ctx->u.rep->count = ctx->count-1;
1246             state->ptr = ptr;
1247             RETURN_FAILURE;
1248 
1249         TARGET(SRE_OP_POSSESSIVE_REPEAT):
1250             /* create possessive repeat contexts. */
1251             /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1252                <SUCCESS> tail */
1253             TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1254                    ptr, pattern[1], pattern[2]));
1255 
1256             /* Set the global Input pointer to this context's Input
1257                pointer */
1258             state->ptr = ptr;
1259 
1260             /* Initialize Count to 0 */
1261             ctx->count = 0;
1262 
1263             /* Check for minimum required matches. */
1264             while (ctx->count < (Py_ssize_t)pattern[1]) {
1265                 /* not enough matches */
1266                 DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1267                          &pattern[3]);
1268                 if (ret) {
1269                     RETURN_ON_ERROR(ret);
1270                     ctx->count++;
1271                 }
1272                 else {
1273                     state->ptr = ptr;
1274                     RETURN_FAILURE;
1275                 }
1276             }
1277 
1278             /* Clear the context's Input stream pointer so that it
1279                doesn't match the global state so that the while loop can
1280                be entered. */
1281             ptr = NULL;
1282 
1283             /* Keep trying to parse the <pattern> sub-pattern until the
1284                end is reached, creating a new context each time. */
1285             while ((ctx->count < (Py_ssize_t)pattern[2] ||
1286                     (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1287                    state->ptr != ptr) {
1288                 /* Save the Capture Group Marker state into the current
1289                    Context and back up the current highest number
1290                    Capture Group marker. */
1291                 LASTMARK_SAVE();
1292                 MARK_PUSH(ctx->lastmark);
1293 
1294                 /* zero-width match protection */
1295                 /* Set the context's Input Stream pointer to be the
1296                    current Input Stream pointer from the global
1297                    state.  When the loop reaches the next iteration,
1298                    the context will then store the last known good
1299                    position with the global state holding the Input
1300                    Input Stream position that has been updated with
1301                    the most recent match.  Thus, if state's Input
1302                    stream remains the same as the one stored in the
1303                    current Context, we know we have successfully
1304                    matched an empty string and that all subsequent
1305                    matches will also be the empty string until the
1306                    maximum number of matches are counted, and because
1307                    of this, we could immediately stop at that point and
1308                    consider this match successful. */
1309                 ptr = state->ptr;
1310 
1311                 /* We have not reached the maximin matches, so try to
1312                    match once more. */
1313                 DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1314                          &pattern[3]);
1315 
1316                 /* Check to see if the last attempted match
1317                    succeeded. */
1318                 if (ret) {
1319                     /* Drop the saved highest number Capture Group
1320                        marker saved above and use the newly updated
1321                        value. */
1322                     MARK_POP_DISCARD(ctx->lastmark);
1323                     RETURN_ON_ERROR(ret);
1324 
1325                     /* Success, increment the count. */
1326                     ctx->count++;
1327                 }
1328                 /* Last attempted match failed. */
1329                 else {
1330                     /* Restore the previously saved highest number
1331                        Capture Group marker since the last iteration
1332                        did not match, then restore that to the global
1333                        state. */
1334                     MARK_POP(ctx->lastmark);
1335                     LASTMARK_RESTORE();
1336 
1337                     /* We have sufficient matches, so exit loop. */
1338                     break;
1339                 }
1340             }
1341 
1342             /* Evaluate Tail */
1343             /* Jump to end of pattern indicated by skip, and then skip
1344                the SUCCESS op code that follows it. */
1345             pattern += pattern[0] + 1;
1346             ptr = state->ptr;
1347             DISPATCH;
1348 
1349         TARGET(SRE_OP_ATOMIC_GROUP):
1350             /* Atomic Group Sub Pattern */
1351             /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1352             TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1353 
1354             /* Set the global Input pointer to this context's Input
1355                pointer */
1356             state->ptr = ptr;
1357 
1358             /* Evaluate the Atomic Group in a new context, terminating
1359                when the end of the group, represented by a SUCCESS op
1360                code, is reached. */
1361             /* Group Pattern begins at an offset of 1 code. */
1362             DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1363                      &pattern[1]);
1364 
1365             /* Test Exit Condition */
1366             RETURN_ON_ERROR(ret);
1367 
1368             if (ret == 0) {
1369                 /* Atomic Group failed to Match. */
1370                 state->ptr = ptr;
1371                 RETURN_FAILURE;
1372             }
1373 
1374             /* Evaluate Tail */
1375             /* Jump to end of pattern indicated by skip, and then skip
1376                the SUCCESS op code that follows it. */
1377             pattern += pattern[0];
1378             ptr = state->ptr;
1379             DISPATCH;
1380 
1381         TARGET(SRE_OP_GROUPREF):
1382             /* match backreference */
1383             TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1384                    ptr, pattern[0]));
1385             {
1386                 int groupref = pattern[0] * 2;
1387                 if (groupref >= state->lastmark) {
1388                     RETURN_FAILURE;
1389                 } else {
1390                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1391                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1392                     if (!p || !e || e < p)
1393                         RETURN_FAILURE;
1394                     while (p < e) {
1395                         if (ptr >= end || *ptr != *p)
1396                             RETURN_FAILURE;
1397                         p++;
1398                         ptr++;
1399                     }
1400                 }
1401             }
1402             pattern++;
1403             DISPATCH;
1404 
1405         TARGET(SRE_OP_GROUPREF_IGNORE):
1406             /* match backreference */
1407             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1408                    ptr, pattern[0]));
1409             {
1410                 int groupref = pattern[0] * 2;
1411                 if (groupref >= state->lastmark) {
1412                     RETURN_FAILURE;
1413                 } else {
1414                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1415                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1416                     if (!p || !e || e < p)
1417                         RETURN_FAILURE;
1418                     while (p < e) {
1419                         if (ptr >= end ||
1420                             sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1421                             RETURN_FAILURE;
1422                         p++;
1423                         ptr++;
1424                     }
1425                 }
1426             }
1427             pattern++;
1428             DISPATCH;
1429 
1430         TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1431             /* match backreference */
1432             TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1433                    ptr, pattern[0]));
1434             {
1435                 int groupref = pattern[0] * 2;
1436                 if (groupref >= state->lastmark) {
1437                     RETURN_FAILURE;
1438                 } else {
1439                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1440                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1441                     if (!p || !e || e < p)
1442                         RETURN_FAILURE;
1443                     while (p < e) {
1444                         if (ptr >= end ||
1445                             sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1446                             RETURN_FAILURE;
1447                         p++;
1448                         ptr++;
1449                     }
1450                 }
1451             }
1452             pattern++;
1453             DISPATCH;
1454 
1455         TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1456             /* match backreference */
1457             TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1458                    ptr, pattern[0]));
1459             {
1460                 int groupref = pattern[0] * 2;
1461                 if (groupref >= state->lastmark) {
1462                     RETURN_FAILURE;
1463                 } else {
1464                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1465                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1466                     if (!p || !e || e < p)
1467                         RETURN_FAILURE;
1468                     while (p < e) {
1469                         if (ptr >= end ||
1470                             sre_lower_locale(*ptr) != sre_lower_locale(*p))
1471                             RETURN_FAILURE;
1472                         p++;
1473                         ptr++;
1474                     }
1475                 }
1476             }
1477             pattern++;
1478             DISPATCH;
1479 
1480         TARGET(SRE_OP_GROUPREF_EXISTS):
1481             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1482                    ptr, pattern[0]));
1483             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1484             {
1485                 int groupref = pattern[0] * 2;
1486                 if (groupref >= state->lastmark) {
1487                     pattern += pattern[1];
1488                     DISPATCH;
1489                 } else {
1490                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1491                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1492                     if (!p || !e || e < p) {
1493                         pattern += pattern[1];
1494                         DISPATCH;
1495                     }
1496                 }
1497             }
1498             pattern += 2;
1499             DISPATCH;
1500 
1501         TARGET(SRE_OP_ASSERT):
1502             /* assert subpattern */
1503             /* <ASSERT> <skip> <back> <pattern> */
1504             TRACE(("|%p|%p|ASSERT %d\n", pattern,
1505                    ptr, pattern[1]));
1506             if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1])
1507                 RETURN_FAILURE;
1508             state->ptr = ptr - pattern[1];
1509             DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1510             RETURN_ON_FAILURE(ret);
1511             pattern += pattern[0];
1512             DISPATCH;
1513 
1514         TARGET(SRE_OP_ASSERT_NOT):
1515             /* assert not subpattern */
1516             /* <ASSERT_NOT> <skip> <back> <pattern> */
1517             TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1518                    ptr, pattern[1]));
1519             if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) {
1520                 state->ptr = ptr - pattern[1];
1521                 LASTMARK_SAVE();
1522                 if (state->repeat)
1523                     MARK_PUSH(ctx->lastmark);
1524 
1525                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1526                 if (ret) {
1527                     if (state->repeat)
1528                         MARK_POP_DISCARD(ctx->lastmark);
1529                     RETURN_ON_ERROR(ret);
1530                     RETURN_FAILURE;
1531                 }
1532                 if (state->repeat)
1533                     MARK_POP(ctx->lastmark);
1534                 LASTMARK_RESTORE();
1535             }
1536             pattern += pattern[0];
1537             DISPATCH;
1538 
1539         TARGET(SRE_OP_FAILURE):
1540             /* immediate failure */
1541             TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1542             RETURN_FAILURE;
1543 
1544 #if !USE_COMPUTED_GOTOS
1545         default:
1546 #endif
1547         // Also any unused opcodes:
1548         TARGET(SRE_OP_RANGE_UNI_IGNORE):
1549         TARGET(SRE_OP_SUBPATTERN):
1550         TARGET(SRE_OP_RANGE):
1551         TARGET(SRE_OP_NEGATE):
1552         TARGET(SRE_OP_BIGCHARSET):
1553         TARGET(SRE_OP_CHARSET):
1554             TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1555                    pattern[-1]));
1556             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1557 
1558     }
1559 
1560 exit:
1561     ctx_pos = ctx->last_ctx_pos;
1562     jump = ctx->jump;
1563     DATA_POP_DISCARD(ctx);
1564     if (ctx_pos == -1)
1565         return ret;
1566     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1567 
1568     switch (jump) {
1569         case JUMP_MAX_UNTIL_2:
1570             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1571             goto jump_max_until_2;
1572         case JUMP_MAX_UNTIL_3:
1573             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1574             goto jump_max_until_3;
1575         case JUMP_MIN_UNTIL_2:
1576             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1577             goto jump_min_until_2;
1578         case JUMP_MIN_UNTIL_3:
1579             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1580             goto jump_min_until_3;
1581         case JUMP_BRANCH:
1582             TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1583             goto jump_branch;
1584         case JUMP_MAX_UNTIL_1:
1585             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1586             goto jump_max_until_1;
1587         case JUMP_MIN_UNTIL_1:
1588             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1589             goto jump_min_until_1;
1590         case JUMP_POSS_REPEAT_1:
1591             TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1592             goto jump_poss_repeat_1;
1593         case JUMP_POSS_REPEAT_2:
1594             TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1595             goto jump_poss_repeat_2;
1596         case JUMP_REPEAT:
1597             TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1598             goto jump_repeat;
1599         case JUMP_REPEAT_ONE_1:
1600             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1601             goto jump_repeat_one_1;
1602         case JUMP_REPEAT_ONE_2:
1603             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1604             goto jump_repeat_one_2;
1605         case JUMP_MIN_REPEAT_ONE:
1606             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1607             goto jump_min_repeat_one;
1608         case JUMP_ATOMIC_GROUP:
1609             TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1610             goto jump_atomic_group;
1611         case JUMP_ASSERT:
1612             TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1613             goto jump_assert;
1614         case JUMP_ASSERT_NOT:
1615             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1616             goto jump_assert_not;
1617         case JUMP_NONE:
1618             TRACE(("|%p|%p|RETURN %zd\n", pattern,
1619                    ptr, ret));
1620             break;
1621     }
1622 
1623     return ret; /* should never get here */
1624 }
1625 
1626 /* need to reset capturing groups between two SRE(match) callings in loops */
1627 #define RESET_CAPTURE_GROUP() \
1628     do { state->lastmark = state->lastindex = -1; } while (0)
1629 
1630 LOCAL(Py_ssize_t)
SRE(search)1631 SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1632 {
1633     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1634     SRE_CHAR* end = (SRE_CHAR *)state->end;
1635     Py_ssize_t status = 0;
1636     Py_ssize_t prefix_len = 0;
1637     Py_ssize_t prefix_skip = 0;
1638     SRE_CODE* prefix = NULL;
1639     SRE_CODE* charset = NULL;
1640     SRE_CODE* overlap = NULL;
1641     int flags = 0;
1642 
1643     if (ptr > end)
1644         return 0;
1645 
1646     if (pattern[0] == SRE_OP_INFO) {
1647         /* optimization info block */
1648         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1649 
1650         flags = pattern[2];
1651 
1652         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1653             TRACE(("reject (got %u chars, need %u)\n",
1654                    (unsigned int)(end - ptr), pattern[3]));
1655             return 0;
1656         }
1657         if (pattern[3] > 1) {
1658             /* adjust end point (but make sure we leave at least one
1659                character in there, so literal search will work) */
1660             end -= pattern[3] - 1;
1661             if (end <= ptr)
1662                 end = ptr;
1663         }
1664 
1665         if (flags & SRE_INFO_PREFIX) {
1666             /* pattern starts with a known prefix */
1667             /* <length> <skip> <prefix data> <overlap data> */
1668             prefix_len = pattern[5];
1669             prefix_skip = pattern[6];
1670             prefix = pattern + 7;
1671             overlap = prefix + prefix_len - 1;
1672         } else if (flags & SRE_INFO_CHARSET)
1673             /* pattern starts with a character from a known set */
1674             /* <charset> */
1675             charset = pattern + 5;
1676 
1677         pattern += 1 + pattern[1];
1678     }
1679 
1680     TRACE(("prefix = %p %zd %zd\n",
1681            prefix, prefix_len, prefix_skip));
1682     TRACE(("charset = %p\n", charset));
1683 
1684     if (prefix_len == 1) {
1685         /* pattern starts with a literal character */
1686         SRE_CHAR c = (SRE_CHAR) prefix[0];
1687 #if SIZEOF_SRE_CHAR < 4
1688         if ((SRE_CODE) c != prefix[0])
1689             return 0; /* literal can't match: doesn't fit in char width */
1690 #endif
1691         end = (SRE_CHAR *)state->end;
1692         state->must_advance = 0;
1693         while (ptr < end) {
1694             while (*ptr != c) {
1695                 if (++ptr >= end)
1696                     return 0;
1697             }
1698             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1699             state->start = ptr;
1700             state->ptr = ptr + prefix_skip;
1701             if (flags & SRE_INFO_LITERAL)
1702                 return 1; /* we got all of it */
1703             status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1704             if (status != 0)
1705                 return status;
1706             ++ptr;
1707             RESET_CAPTURE_GROUP();
1708         }
1709         return 0;
1710     }
1711 
1712     if (prefix_len > 1) {
1713         /* pattern starts with a known prefix.  use the overlap
1714            table to skip forward as fast as we possibly can */
1715         Py_ssize_t i = 0;
1716 
1717         end = (SRE_CHAR *)state->end;
1718         if (prefix_len > end - ptr)
1719             return 0;
1720 #if SIZEOF_SRE_CHAR < 4
1721         for (i = 0; i < prefix_len; i++)
1722             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1723                 return 0; /* literal can't match: doesn't fit in char width */
1724 #endif
1725         while (ptr < end) {
1726             SRE_CHAR c = (SRE_CHAR) prefix[0];
1727             while (*ptr++ != c) {
1728                 if (ptr >= end)
1729                     return 0;
1730             }
1731             if (ptr >= end)
1732                 return 0;
1733 
1734             i = 1;
1735             state->must_advance = 0;
1736             do {
1737                 if (*ptr == (SRE_CHAR) prefix[i]) {
1738                     if (++i != prefix_len) {
1739                         if (++ptr >= end)
1740                             return 0;
1741                         continue;
1742                     }
1743                     /* found a potential match */
1744                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1745                     state->start = ptr - (prefix_len - 1);
1746                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
1747                     if (flags & SRE_INFO_LITERAL)
1748                         return 1; /* we got all of it */
1749                     status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1750                     if (status != 0)
1751                         return status;
1752                     /* close but no cigar -- try again */
1753                     if (++ptr >= end)
1754                         return 0;
1755                     RESET_CAPTURE_GROUP();
1756                 }
1757                 i = overlap[i];
1758             } while (i != 0);
1759         }
1760         return 0;
1761     }
1762 
1763     if (charset) {
1764         /* pattern starts with a character from a known set */
1765         end = (SRE_CHAR *)state->end;
1766         state->must_advance = 0;
1767         for (;;) {
1768             while (ptr < end && !SRE(charset)(state, charset, *ptr))
1769                 ptr++;
1770             if (ptr >= end)
1771                 return 0;
1772             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1773             state->start = ptr;
1774             state->ptr = ptr;
1775             status = SRE(match)(state, pattern, 0);
1776             if (status != 0)
1777                 break;
1778             ptr++;
1779             RESET_CAPTURE_GROUP();
1780         }
1781     } else {
1782         /* general case */
1783         assert(ptr <= end);
1784         TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1785         state->start = state->ptr = ptr;
1786         status = SRE(match)(state, pattern, 1);
1787         state->must_advance = 0;
1788         if (status == 0 && pattern[0] == SRE_OP_AT &&
1789             (pattern[1] == SRE_AT_BEGINNING ||
1790              pattern[1] == SRE_AT_BEGINNING_STRING))
1791         {
1792             state->start = state->ptr = ptr = end;
1793             return 0;
1794         }
1795         while (status == 0 && ptr < end) {
1796             ptr++;
1797             RESET_CAPTURE_GROUP();
1798             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1799             state->start = state->ptr = ptr;
1800             status = SRE(match)(state, pattern, 0);
1801         }
1802     }
1803 
1804     return status;
1805 }
1806 
1807 #undef SRE_CHAR
1808 #undef SIZEOF_SRE_CHAR
1809 #undef SRE
1810 
1811 /* vim:ts=4:sw=4:et
1812 */
1813