1 /* stringlib: fastsearch implementation */
2 
3 #define STRINGLIB_FASTSEARCH_H
4 
5 /* fast search/count implementation, based on a mix between boyer-
6    moore and horspool, with a few more bells and whistles on the top.
7    for some more background, see:
8    https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9 
10 /* note: fastsearch may access s[n], which isn't a problem when using
11    Python's ordinary string types, but may cause problems if you're
12    using this code in other contexts.  also, the count mode returns -1
13    if there cannot possibly be a match in the target string, and 0 if
14    it has actually checked for matches, but didn't find any.  callers
15    beware! */
16 
17 /* If the strings are long enough, use Crochemore and Perrin's Two-Way
18    algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19    Also compute a table of shifts to achieve O(n/k) in more cases,
20    and often (data dependent) deduce larger shifts than pure C&P can
21    deduce. */
22 
23 #define FAST_COUNT 0
24 #define FAST_SEARCH 1
25 #define FAST_RSEARCH 2
26 
27 #if LONG_BIT >= 128
28 #define STRINGLIB_BLOOM_WIDTH 128
29 #elif LONG_BIT >= 64
30 #define STRINGLIB_BLOOM_WIDTH 64
31 #elif LONG_BIT >= 32
32 #define STRINGLIB_BLOOM_WIDTH 32
33 #else
34 #error "LONG_BIT is smaller than 32"
35 #endif
36 
37 #define STRINGLIB_BLOOM_ADD(mask, ch) \
38     ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
39 #define STRINGLIB_BLOOM(mask, ch)     \
40     ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
41 
42 #if STRINGLIB_SIZEOF_CHAR == 1
43 #  define MEMCHR_CUT_OFF 15
44 #else
45 #  define MEMCHR_CUT_OFF 40
46 #endif
47 
48 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)49 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
50 {
51     const STRINGLIB_CHAR *p, *e;
52 
53     p = s;
54     e = s + n;
55     if (n > MEMCHR_CUT_OFF) {
56 #if STRINGLIB_SIZEOF_CHAR == 1
57         p = memchr(s, ch, n);
58         if (p != NULL)
59             return (p - s);
60         return -1;
61 #else
62         /* use memchr if we can choose a needle without too many likely
63            false positives */
64         const STRINGLIB_CHAR *s1, *e1;
65         unsigned char needle = ch & 0xff;
66         /* If looking for a multiple of 256, we'd have too
67            many false positives looking for the '\0' byte in UCS2
68            and UCS4 representations. */
69         if (needle != 0) {
70             do {
71                 void *candidate = memchr(p, needle,
72                                          (e - p) * sizeof(STRINGLIB_CHAR));
73                 if (candidate == NULL)
74                     return -1;
75                 s1 = p;
76                 p = (const STRINGLIB_CHAR *)
77                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78                 if (*p == ch)
79                     return (p - s);
80                 /* False positive */
81                 p++;
82                 if (p - s1 > MEMCHR_CUT_OFF)
83                     continue;
84                 if (e - p <= MEMCHR_CUT_OFF)
85                     break;
86                 e1 = p + MEMCHR_CUT_OFF;
87                 while (p != e1) {
88                     if (*p == ch)
89                         return (p - s);
90                     p++;
91                 }
92             }
93             while (e - p > MEMCHR_CUT_OFF);
94         }
95 #endif
96     }
97     while (p < e) {
98         if (*p == ch)
99             return (p - s);
100         p++;
101     }
102     return -1;
103 }
104 
105 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)106 STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
107 {
108     const STRINGLIB_CHAR *p;
109 #ifdef HAVE_MEMRCHR
110     /* memrchr() is a GNU extension, available since glibc 2.1.91.
111        it doesn't seem as optimized as memchr(), but is still quite
112        faster than our hand-written loop below */
113 
114     if (n > MEMCHR_CUT_OFF) {
115 #if STRINGLIB_SIZEOF_CHAR == 1
116         p = memrchr(s, ch, n);
117         if (p != NULL)
118             return (p - s);
119         return -1;
120 #else
121         /* use memrchr if we can choose a needle without too many likely
122            false positives */
123         const STRINGLIB_CHAR *s1;
124         Py_ssize_t n1;
125         unsigned char needle = ch & 0xff;
126         /* If looking for a multiple of 256, we'd have too
127            many false positives looking for the '\0' byte in UCS2
128            and UCS4 representations. */
129         if (needle != 0) {
130             do {
131                 void *candidate = memrchr(s, needle,
132                                           n * sizeof(STRINGLIB_CHAR));
133                 if (candidate == NULL)
134                     return -1;
135                 n1 = n;
136                 p = (const STRINGLIB_CHAR *)
137                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
138                 n = p - s;
139                 if (*p == ch)
140                     return n;
141                 /* False positive */
142                 if (n1 - n > MEMCHR_CUT_OFF)
143                     continue;
144                 if (n <= MEMCHR_CUT_OFF)
145                     break;
146                 s1 = p - MEMCHR_CUT_OFF;
147                 while (p > s1) {
148                     p--;
149                     if (*p == ch)
150                         return (p - s);
151                 }
152                 n = p - s;
153             }
154             while (n > MEMCHR_CUT_OFF);
155         }
156 #endif
157     }
158 #endif  /* HAVE_MEMRCHR */
159     p = s + n;
160     while (p > s) {
161         p--;
162         if (*p == ch)
163             return (p - s);
164     }
165     return -1;
166 }
167 
168 #undef MEMCHR_CUT_OFF
169 
170 /* Change to a 1 to see logging comments walk through the algorithm. */
171 #if 0 && STRINGLIB_SIZEOF_CHAR == 1
172 # define LOG(...) printf(__VA_ARGS__)
173 # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
174 # define LOG_LINEUP() do {                                         \
175     LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
176     LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
177     LOG_STRING(needle, len_needle); LOG("\n");                     \
178 } while(0)
179 #else
180 # define LOG(...)
181 # define LOG_STRING(s, n)
182 # define LOG_LINEUP()
183 #endif
184 
185 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_lex_search)186 STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
187                        Py_ssize_t *return_period, int invert_alphabet)
188 {
189     /* Do a lexicographic search. Essentially this:
190            >>> max(needle[i:] for i in range(len(needle)+1))
191        Also find the period of the right half.   */
192     Py_ssize_t max_suffix = 0;
193     Py_ssize_t candidate = 1;
194     Py_ssize_t k = 0;
195     // The period of the right half.
196     Py_ssize_t period = 1;
197 
198     while (candidate + k < len_needle) {
199         // each loop increases candidate + k + max_suffix
200         STRINGLIB_CHAR a = needle[candidate + k];
201         STRINGLIB_CHAR b = needle[max_suffix + k];
202         // check if the suffix at candidate is better than max_suffix
203         if (invert_alphabet ? (b < a) : (a < b)) {
204             // Fell short of max_suffix.
205             // The next k + 1 characters are non-increasing
206             // from candidate, so they won't start a maximal suffix.
207             candidate += k + 1;
208             k = 0;
209             // We've ruled out any period smaller than what's
210             // been scanned since max_suffix.
211             period = candidate - max_suffix;
212         }
213         else if (a == b) {
214             if (k + 1 != period) {
215                 // Keep scanning the equal strings
216                 k++;
217             }
218             else {
219                 // Matched a whole period.
220                 // Start matching the next period.
221                 candidate += period;
222                 k = 0;
223             }
224         }
225         else {
226             // Did better than max_suffix, so replace it.
227             max_suffix = candidate;
228             candidate++;
229             k = 0;
230             period = 1;
231         }
232     }
233     *return_period = period;
234     return max_suffix;
235 }
236 
237 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_factorize)238 STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
239                       Py_ssize_t len_needle,
240                       Py_ssize_t *return_period)
241 {
242     /* Do a "critical factorization", making it so that:
243        >>> needle = (left := needle[:cut]) + (right := needle[cut:])
244        where the "local period" of the cut is maximal.
245 
246        The local period of the cut is the minimal length of a string w
247        such that (left endswith w or w endswith left)
248        and (right startswith w or w startswith left).
249 
250        The Critical Factorization Theorem says that this maximal local
251        period is the global period of the string.
252 
253        Crochemore and Perrin (1991) show that this cut can be computed
254        as the later of two cuts: one that gives a lexicographically
255        maximal right half, and one that gives the same with the
256        with respect to a reversed alphabet-ordering.
257 
258        This is what we want to happen:
259            >>> x = "GCAGAGAG"
260            >>> cut, period = factorize(x)
261            >>> x[:cut], (right := x[cut:])
262            ('GC', 'AGAGAG')
263            >>> period  # right half period
264            2
265            >>> right[period:] == right[:-period]
266            True
267 
268        This is how the local period lines up in the above example:
269                 GC | AGAGAG
270            AGAGAGC = AGAGAGC
271        The length of this minimal repetition is 7, which is indeed the
272        period of the original string. */
273 
274     Py_ssize_t cut1, period1, cut2, period2, cut, period;
275     cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
276     cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
277 
278     // Take the later cut.
279     if (cut1 > cut2) {
280         period = period1;
281         cut = cut1;
282     }
283     else {
284         period = period2;
285         cut = cut2;
286     }
287 
288     LOG("split: "); LOG_STRING(needle, cut);
289     LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
290     LOG("\n");
291 
292     *return_period = period;
293     return cut;
294 }
295 
296 
297 #define SHIFT_TYPE uint8_t
298 #define MAX_SHIFT UINT8_MAX
299 
300 #define TABLE_SIZE_BITS 6u
301 #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
302 #define TABLE_MASK (TABLE_SIZE - 1U)
303 
304 typedef struct STRINGLIB(_pre) {
305     const STRINGLIB_CHAR *needle;
306     Py_ssize_t len_needle;
307     Py_ssize_t cut;
308     Py_ssize_t period;
309     Py_ssize_t gap;
310     int is_periodic;
311     SHIFT_TYPE table[TABLE_SIZE];
312 } STRINGLIB(prework);
313 
314 
315 static void
STRINGLIB(_preprocess)316 STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
317                        STRINGLIB(prework) *p)
318 {
319     p->needle = needle;
320     p->len_needle = len_needle;
321     p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
322     assert(p->period + p->cut <= len_needle);
323     p->is_periodic = (0 == memcmp(needle,
324                                   needle + p->period,
325                                   p->cut * STRINGLIB_SIZEOF_CHAR));
326     if (p->is_periodic) {
327         assert(p->cut <= len_needle/2);
328         assert(p->cut < p->period);
329         p->gap = 0; // unused
330     }
331     else {
332         // A lower bound on the period
333         p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
334         // The gap between the last character and the previous
335         // occurrence of an equivalent character (modulo TABLE_SIZE)
336         p->gap = len_needle;
337         STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
338         for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
339             STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
340             if (x == last) {
341                 p->gap = len_needle - 1 - i;
342                 break;
343             }
344         }
345     }
346     // Fill up a compressed Boyer-Moore "Bad Character" table
347     Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
348     for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
349         p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
350                                        Py_ssize_t, SHIFT_TYPE);
351     }
352     for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
353         SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
354                                             Py_ssize_t, SHIFT_TYPE);
355         p->table[needle[i] & TABLE_MASK] = shift;
356     }
357 }
358 
359 static Py_ssize_t
STRINGLIB(_two_way)360 STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
361                     STRINGLIB(prework) *p)
362 {
363     // Crochemore and Perrin's (1991) Two-Way algorithm.
364     // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
365     const Py_ssize_t len_needle = p->len_needle;
366     const Py_ssize_t cut = p->cut;
367     Py_ssize_t period = p->period;
368     const STRINGLIB_CHAR *const needle = p->needle;
369     const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
370     const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
371     SHIFT_TYPE *table = p->table;
372     const STRINGLIB_CHAR *window;
373     LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
374 
375     if (p->is_periodic) {
376         LOG("Needle is periodic.\n");
377         Py_ssize_t memory = 0;
378       periodicwindowloop:
379         while (window_last < haystack_end) {
380             assert(memory == 0);
381             for (;;) {
382                 LOG_LINEUP();
383                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
384                 window_last += shift;
385                 if (shift == 0) {
386                     break;
387                 }
388                 if (window_last >= haystack_end) {
389                     return -1;
390                 }
391                 LOG("Horspool skip");
392             }
393           no_shift:
394             window = window_last - len_needle + 1;
395             assert((window[len_needle - 1] & TABLE_MASK) ==
396                    (needle[len_needle - 1] & TABLE_MASK));
397             Py_ssize_t i = Py_MAX(cut, memory);
398             for (; i < len_needle; i++) {
399                 if (needle[i] != window[i]) {
400                     LOG("Right half does not match.\n");
401                     window_last += i - cut + 1;
402                     memory = 0;
403                     goto periodicwindowloop;
404                 }
405             }
406             for (i = memory; i < cut; i++) {
407                 if (needle[i] != window[i]) {
408                     LOG("Left half does not match.\n");
409                     window_last += period;
410                     memory = len_needle - period;
411                     if (window_last >= haystack_end) {
412                         return -1;
413                     }
414                     Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
415                     if (shift) {
416                         // A mismatch has been identified to the right
417                         // of where i will next start, so we can jump
418                         // at least as far as if the mismatch occurred
419                         // on the first comparison.
420                         Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
421                         LOG("Skip with Memory.\n");
422                         memory = 0;
423                         window_last += Py_MAX(shift, mem_jump);
424                         goto periodicwindowloop;
425                     }
426                     goto no_shift;
427                 }
428             }
429             LOG("Found a match!\n");
430             return window - haystack;
431         }
432     }
433     else {
434         Py_ssize_t gap = p->gap;
435         period = Py_MAX(gap, period);
436         LOG("Needle is not periodic.\n");
437         Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
438       windowloop:
439         while (window_last < haystack_end) {
440             for (;;) {
441                 LOG_LINEUP();
442                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
443                 window_last += shift;
444                 if (shift == 0) {
445                     break;
446                 }
447                 if (window_last >= haystack_end) {
448                     return -1;
449                 }
450                 LOG("Horspool skip");
451             }
452             window = window_last - len_needle + 1;
453             assert((window[len_needle - 1] & TABLE_MASK) ==
454                    (needle[len_needle - 1] & TABLE_MASK));
455             for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
456                 if (needle[i] != window[i]) {
457                     LOG("Early right half mismatch: jump by gap.\n");
458                     assert(gap >= i - cut + 1);
459                     window_last += gap;
460                     goto windowloop;
461                 }
462             }
463             for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
464                 if (needle[i] != window[i]) {
465                     LOG("Late right half mismatch.\n");
466                     assert(i - cut + 1 > gap);
467                     window_last += i - cut + 1;
468                     goto windowloop;
469                 }
470             }
471             for (Py_ssize_t i = 0; i < cut; i++) {
472                 if (needle[i] != window[i]) {
473                     LOG("Left half does not match.\n");
474                     window_last += period;
475                     goto windowloop;
476                 }
477             }
478             LOG("Found a match!\n");
479             return window - haystack;
480         }
481     }
482     LOG("Not found. Returning -1.\n");
483     return -1;
484 }
485 
486 
487 static Py_ssize_t
STRINGLIB(_two_way_find)488 STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
489                          Py_ssize_t len_haystack,
490                          const STRINGLIB_CHAR *needle,
491                          Py_ssize_t len_needle)
492 {
493     LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
494     STRINGLIB(prework) p;
495     STRINGLIB(_preprocess)(needle, len_needle, &p);
496     return STRINGLIB(_two_way)(haystack, len_haystack, &p);
497 }
498 
499 
500 static Py_ssize_t
STRINGLIB(_two_way_count)501 STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
502                           Py_ssize_t len_haystack,
503                           const STRINGLIB_CHAR *needle,
504                           Py_ssize_t len_needle,
505                           Py_ssize_t maxcount)
506 {
507     LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
508     STRINGLIB(prework) p;
509     STRINGLIB(_preprocess)(needle, len_needle, &p);
510     Py_ssize_t index = 0, count = 0;
511     while (1) {
512         Py_ssize_t result;
513         result = STRINGLIB(_two_way)(haystack + index,
514                                      len_haystack - index, &p);
515         if (result == -1) {
516             return count;
517         }
518         count++;
519         if (count == maxcount) {
520             return maxcount;
521         }
522         index += result + len_needle;
523     }
524     return count;
525 }
526 
527 #undef SHIFT_TYPE
528 #undef NOT_FOUND
529 #undef SHIFT_OVERFLOW
530 #undef TABLE_SIZE_BITS
531 #undef TABLE_SIZE
532 #undef TABLE_MASK
533 
534 #undef LOG
535 #undef LOG_STRING
536 #undef LOG_LINEUP
537 
538 static inline Py_ssize_t
STRINGLIB(default_find)539 STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
540                         const STRINGLIB_CHAR* p, Py_ssize_t m,
541                         Py_ssize_t maxcount, int mode)
542 {
543     const Py_ssize_t w = n - m;
544     Py_ssize_t mlast = m - 1, count = 0;
545     Py_ssize_t gap = mlast;
546     const STRINGLIB_CHAR last = p[mlast];
547     const STRINGLIB_CHAR *const ss = &s[mlast];
548 
549     unsigned long mask = 0;
550     for (Py_ssize_t i = 0; i < mlast; i++) {
551         STRINGLIB_BLOOM_ADD(mask, p[i]);
552         if (p[i] == last) {
553             gap = mlast - i - 1;
554         }
555     }
556     STRINGLIB_BLOOM_ADD(mask, last);
557 
558     for (Py_ssize_t i = 0; i <= w; i++) {
559         if (ss[i] == last) {
560             /* candidate match */
561             Py_ssize_t j;
562             for (j = 0; j < mlast; j++) {
563                 if (s[i+j] != p[j]) {
564                     break;
565                 }
566             }
567             if (j == mlast) {
568                 /* got a match! */
569                 if (mode != FAST_COUNT) {
570                     return i;
571                 }
572                 count++;
573                 if (count == maxcount) {
574                     return maxcount;
575                 }
576                 i = i + mlast;
577                 continue;
578             }
579             /* miss: check if next character is part of pattern */
580             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
581                 i = i + m;
582             }
583             else {
584                 i = i + gap;
585             }
586         }
587         else {
588             /* skip: check if next character is part of pattern */
589             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
590                 i = i + m;
591             }
592         }
593     }
594     return mode == FAST_COUNT ? count : -1;
595 }
596 
597 
598 static Py_ssize_t
STRINGLIB(adaptive_find)599 STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
600                          const STRINGLIB_CHAR* p, Py_ssize_t m,
601                          Py_ssize_t maxcount, int mode)
602 {
603     const Py_ssize_t w = n - m;
604     Py_ssize_t mlast = m - 1, count = 0;
605     Py_ssize_t gap = mlast;
606     Py_ssize_t hits = 0, res;
607     const STRINGLIB_CHAR last = p[mlast];
608     const STRINGLIB_CHAR *const ss = &s[mlast];
609 
610     unsigned long mask = 0;
611     for (Py_ssize_t i = 0; i < mlast; i++) {
612         STRINGLIB_BLOOM_ADD(mask, p[i]);
613         if (p[i] == last) {
614             gap = mlast - i - 1;
615         }
616     }
617     STRINGLIB_BLOOM_ADD(mask, last);
618 
619     for (Py_ssize_t i = 0; i <= w; i++) {
620         if (ss[i] == last) {
621             /* candidate match */
622             Py_ssize_t j;
623             for (j = 0; j < mlast; j++) {
624                 if (s[i+j] != p[j]) {
625                     break;
626                 }
627             }
628             if (j == mlast) {
629                 /* got a match! */
630                 if (mode != FAST_COUNT) {
631                     return i;
632                 }
633                 count++;
634                 if (count == maxcount) {
635                     return maxcount;
636                 }
637                 i = i + mlast;
638                 continue;
639             }
640             hits += j + 1;
641             if (hits > m / 4 && w - i > 2000) {
642                 if (mode == FAST_SEARCH) {
643                     res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
644                     return res == -1 ? -1 : res + i;
645                 }
646                 else {
647                     res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
648                                                     maxcount - count);
649                     return res + count;
650                 }
651             }
652             /* miss: check if next character is part of pattern */
653             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
654                 i = i + m;
655             }
656             else {
657                 i = i + gap;
658             }
659         }
660         else {
661             /* skip: check if next character is part of pattern */
662             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
663                 i = i + m;
664             }
665         }
666     }
667     return mode == FAST_COUNT ? count : -1;
668 }
669 
670 
671 static Py_ssize_t
STRINGLIB(default_rfind)672 STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
673                          const STRINGLIB_CHAR* p, Py_ssize_t m,
674                          Py_ssize_t maxcount, int mode)
675 {
676     /* create compressed boyer-moore delta 1 table */
677     unsigned long mask = 0;
678     Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
679 
680     /* process pattern[0] outside the loop */
681     STRINGLIB_BLOOM_ADD(mask, p[0]);
682     /* process pattern[:0:-1] */
683     for (i = mlast; i > 0; i--) {
684         STRINGLIB_BLOOM_ADD(mask, p[i]);
685         if (p[i] == p[0]) {
686             skip = i - 1;
687         }
688     }
689 
690     for (i = w; i >= 0; i--) {
691         if (s[i] == p[0]) {
692             /* candidate match */
693             for (j = mlast; j > 0; j--) {
694                 if (s[i+j] != p[j]) {
695                     break;
696                 }
697             }
698             if (j == 0) {
699                 /* got a match! */
700                 return i;
701             }
702             /* miss: check if previous character is part of pattern */
703             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
704                 i = i - m;
705             }
706             else {
707                 i = i - skip;
708             }
709         }
710         else {
711             /* skip: check if previous character is part of pattern */
712             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
713                 i = i - m;
714             }
715         }
716     }
717     return -1;
718 }
719 
720 
721 static inline Py_ssize_t
STRINGLIB(count_char)722 STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
723                       const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
724 {
725     Py_ssize_t i, count = 0;
726     for (i = 0; i < n; i++) {
727         if (s[i] == p0) {
728             count++;
729             if (count == maxcount) {
730                 return maxcount;
731             }
732         }
733     }
734     return count;
735 }
736 
737 
738 Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)739 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
740            const STRINGLIB_CHAR* p, Py_ssize_t m,
741            Py_ssize_t maxcount, int mode)
742 {
743     if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
744         return -1;
745     }
746 
747     /* look for special cases */
748     if (m <= 1) {
749         if (m <= 0) {
750             return -1;
751         }
752         /* use special case for 1-character strings */
753         if (mode == FAST_SEARCH)
754             return STRINGLIB(find_char)(s, n, p[0]);
755         else if (mode == FAST_RSEARCH)
756             return STRINGLIB(rfind_char)(s, n, p[0]);
757         else {
758             return STRINGLIB(count_char)(s, n, p[0], maxcount);
759         }
760     }
761 
762     if (mode != FAST_RSEARCH) {
763         if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
764             return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
765         }
766         else if ((m >> 2) * 3 < (n >> 2)) {
767             /* 33% threshold, but don't overflow. */
768             /* For larger problems where the needle isn't a huge
769                percentage of the size of the haystack, the relatively
770                expensive O(m) startup cost of the two-way algorithm
771                will surely pay off. */
772             if (mode == FAST_SEARCH) {
773                 return STRINGLIB(_two_way_find)(s, n, p, m);
774             }
775             else {
776                 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
777             }
778         }
779         else {
780             /* To ensure that we have good worst-case behavior,
781                here's an adaptive version of the algorithm, where if
782                we match O(m) characters without any matches of the
783                entire needle, then we predict that the startup cost of
784                the two-way algorithm will probably be worth it. */
785             return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
786         }
787     }
788     else {
789         /* FAST_RSEARCH */
790         return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
791     }
792 }
793 
794