xref: /aosp_15_r20/external/pcre/src/pcre2_fuzzsupport.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1 /***************************************************************************
2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3 tries to compile and match it, deriving options from the string itself. If
4 STANDALONE is defined, a main program that calls the driver with the contents
5 of specified files is compiled, and commentary on what is happening is output.
6 If an argument starts with '=' the rest of it it is taken as a literal string
7 rather than a file name. This allows easy testing of short strings.
8 
9 Written by Philip Hazel, October 2016
10 Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
11 Further updates March/April/May 2024 by PH
12 ***************************************************************************/
13 
14 #include <errno.h>
15 #include <stdarg.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 
21 /* stack size adjustment */
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 
25 #define STACK_SIZE_MB 256
26 #define JIT_SIZE_LIMIT (200 * 1024)
27 
28 #ifndef PCRE2_CODE_UNIT_WIDTH
29 #define PCRE2_CODE_UNIT_WIDTH 8
30 #endif
31 
32 #include "config.h"
33 #include "pcre2.h"
34 #include "pcre2_internal.h"
35 
36 #define MAX_MATCH_SIZE 1000
37 
38 #define DFA_WORKSPACE_COUNT 100
39 
40 /* When adding new compile or match options, remember to update the functions
41 below that output them. */
42 
43 #define ALLOWED_COMPILE_OPTIONS \
44   (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
45    PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
46    PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
47    PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
48    PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
49    PCRE2_NO_AUTO_CAPTURE| \
50    PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
51    PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
52    PCRE2_UTF)
53 
54 #define ALLOWED_MATCH_OPTIONS \
55   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
56    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
57    PCRE2_PARTIAL_SOFT)
58 
59 #define BASE_MATCH_OPTIONS \
60   (PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
61 
62 
63 #if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
print_compile_options(FILE * stream,uint32_t compile_options)64 static void print_compile_options(FILE *stream, uint32_t compile_options)
65 {
66 fprintf(stream, "Compile options %s%.8x =",
67   (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
68   compile_options);
69 
70 fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
71   ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
72   ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
73   ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
74   ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
75   ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
76   ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
77   ((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
78   ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
79   ((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
80   ((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
81   ((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
82   ((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
83   ((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
84   ((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
85   ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
86   ((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
87   ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
88   ((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
89   ((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
90   ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
91   ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
92   ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
93   ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
94   ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
95   ((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
96   ((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
97   ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
98   ((compile_options & PCRE2_UTF) != 0)? " utf" : "");
99 }
100 
print_match_options(FILE * stream,uint32_t match_options)101 static void print_match_options(FILE *stream, uint32_t match_options)
102 {
103 fprintf(stream, "Match options %s%.8x =",
104   (match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
105 
106 fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
107   ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
108   ((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
109   ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
110   ((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
111   ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
112   ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
113   ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
114   ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
115   ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
116   ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
117   ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
118 }
119 
120 
121 /* This function can print an error message at all code unit widths. */
122 
print_error(FILE * f,int errorcode,const char * text,...)123 static void print_error(FILE *f, int errorcode, const char *text, ...)
124 {
125 PCRE2_UCHAR buffer[256];
126 PCRE2_UCHAR *p = buffer;
127 va_list ap;
128 va_start(ap, text);
129 vfprintf(f, text, ap);
130 va_end(ap);
131 pcre2_get_error_message(errorcode, buffer, 256);
132 while (*p != 0) fprintf(f, "%c", *p++);
133 printf("\n");
134 }
135 #endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
136 
137 
138 #ifdef SUPPORT_JIT
139 #ifdef SUPPORT_DIFF_FUZZ
dump_matches(FILE * stream,int count,pcre2_match_data * match_data)140 static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
141 {
142 int errorcode;
143 
144 for (int index = 0; index < count; index++)
145   {
146   PCRE2_UCHAR *bufferptr = NULL;
147   PCRE2_SIZE bufflen = 0;
148 
149   errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
150     &bufflen);
151 
152   if (errorcode >= 0)
153     {
154     fprintf(stream, "Match %d (hex encoded): ", index);
155     for (PCRE2_SIZE i = 0; i < bufflen; i++)
156       {
157       fprintf(stream, "%02x", bufferptr[i]);
158       }
159     fprintf(stream, "\n");
160     }
161   else
162     {
163     print_error(stream, errorcode, "Match %d failed: ", index);
164     }
165   }
166 }
167 
168 /* This function describes the current test case being evaluated, then aborts */
169 
describe_failure(const char * task,const unsigned char * data,size_t size,uint32_t compile_options,uint32_t match_options,int errorcode,int errorcode_jit,int matches,int matches_jit,pcre2_match_data * match_data,pcre2_match_data * match_data_jit)170 static void describe_failure(
171   const char *task,
172   const unsigned char *data,
173   size_t size,
174   uint32_t compile_options,
175   uint32_t match_options,
176   int errorcode,
177   int errorcode_jit,
178   int matches,
179   int matches_jit,
180   pcre2_match_data *match_data,
181   pcre2_match_data *match_data_jit
182 ) {
183 
184 fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
185 
186 fprintf(stderr, "Pattern/sample string (hex encoded): ");
187 for (size_t i = 0; i < size; i++)
188   {
189   fprintf(stderr, "%02x", data[i]);
190   }
191 fprintf(stderr, "\n");
192 
193 print_compile_options(stderr, compile_options);
194 print_match_options(stderr, match_options);
195 
196 if (errorcode < 0)
197   {
198   print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
199   }
200 
201 if (matches >= 0)
202   {
203   fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
204   if (match_data != NULL)
205     {
206     fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
207     dump_matches(stderr, matches, match_data);
208     fprintf(stderr, "\n");
209     }
210   }
211 
212 if (errorcode_jit < 0)
213   {
214   print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
215     errorcode_jit);
216   }
217 
218 if (matches_jit >= 0)
219   {
220   fprintf(stderr, "JIT'd operation did not emit an error.\n");
221   if (match_data_jit != NULL)
222     {
223     fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
224     dump_matches(stderr, matches_jit, match_data_jit);
225     fprintf(stderr, "\n");
226     }
227   }
228 
229 abort();
230 }
231 #endif  /* SUPPORT_DIFF_FUZZ */
232 #endif  /* SUPPORT_JIT */
233 
234 /* This is the callout function. Its only purpose is to halt matching if there
235 are more than 100 callouts, as one way of stopping too much time being spent on
236 fruitless matches. The callout data is a pointer to the counter. */
237 
callout_function(pcre2_callout_block * cb,void * callout_data)238 static int callout_function(pcre2_callout_block *cb, void *callout_data)
239 {
240 (void)cb;  /* Avoid unused parameter warning */
241 *((uint32_t *)callout_data) += 1;
242 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
243 }
244 
245 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
246 "no previous prototype" warning when compiling at high warning level. */
247 
248 int LLVMFuzzerInitialize(int *, char ***);
249 
250 int LLVMFuzzerTestOneInput(unsigned char *, size_t);
251 
LLVMFuzzerInitialize(int * argc,char *** argv)252 int LLVMFuzzerInitialize(int *argc, char ***argv)
253 {
254 int rc;
255 struct rlimit rlim;
256 getrlimit(RLIMIT_STACK, &rlim);
257 rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
258 if (rlim.rlim_cur > rlim.rlim_max)
259   {
260   fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n");
261   _exit(1);
262   }
263 rc = setrlimit(RLIMIT_STACK, &rlim);
264 if (rc != 0)
265   {
266   fprintf(stderr, "Failed to expand stack size\n");
267   _exit(1);
268   }
269 
270 (void)argc;  /* Avoid "unused parameter" warnings */
271 (void)argv;
272 return 0;
273 }
274 
275 /* Here's the driving function. */
276 
LLVMFuzzerTestOneInput(unsigned char * data,size_t size)277 int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
278 {
279 PCRE2_UCHAR *wdata;
280 PCRE2_UCHAR *newwdata = NULL;
281 uint32_t compile_options;
282 uint32_t match_options;
283 uint64_t random_options;
284 pcre2_match_data *match_data = NULL;
285 #ifdef SUPPORT_JIT
286 pcre2_match_data *match_data_jit = NULL;
287 #endif
288 pcre2_compile_context *compile_context = NULL;
289 pcre2_match_context *match_context = NULL;
290 size_t match_size;
291 int dfa_workspace[DFA_WORKSPACE_COUNT];
292 
293 if (size < sizeof(random_options)) return -1;
294 
295 random_options = *(uint64_t *)(data);
296 data += sizeof(random_options);
297 wdata = (PCRE2_UCHAR *)data;
298 size -= sizeof(random_options);
299 size /= PCRE2_CODE_UNIT_WIDTH / 8;
300 
301 /* PCRE2 compiles quantified groups by replicating them. In certain cases of
302 very large quantifiers this can lead to unacceptably long JIT compile times. To
303 get around this, we scan the data string for large quantifiers that follow a
304 closing parenthesis, and reduce the value of the quantifier to 10, assuming
305 that this will make minimal difference to the detection of bugs.
306 
307 Do the same for quantifiers that follow a closing square bracket, because
308 classes that contain a number of non-ascii characters can take a lot of time
309 when matching.
310 
311 We have to make a copy of the input because oss-fuzz complains if we overwrite
312 the original. Start the scan at the second character so there can be a
313 lookbehind for a backslash, and end it before the end so that the next
314 character can be checked for an opening brace. */
315 
316 if (size > 3)
317   {
318   newwdata = malloc(size * sizeof(PCRE2_UCHAR));
319   memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
320   wdata = newwdata;
321 
322   for (size_t i = 1; i < size - 2; i++)
323     {
324     size_t j;
325 
326     if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
327          wdata[i+1] != '{')
328       continue;
329     i++;  /* Points to '{' */
330 
331     /* Loop for two values a quantifier. Offset i points to brace or comma at the
332     start of the loop.*/
333 
334     for (int ii = 0; ii < 2; ii++)
335       {
336       int q = 0;
337 
338       if (i >= size - 1) goto END_QSCAN;  /* Can happen for , */
339 
340       /* Ignore leading spaces */
341 
342       while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
343         {
344         i++;
345         if (i >= size - 1) goto END_QSCAN;
346         }
347 
348       /* Scan for a number ending in brace or comma in the first iteration,
349       optionally preceded by space. */
350 
351       for (j = i + 1; j < size && j < i + 7; j++)
352         {
353         if (wdata[j] == ' ' || wdata[j] == '\t')
354           {
355           j++;
356           while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
357           if (j >= size) goto OUTERLOOP;
358           if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
359           }
360         if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
361         if (wdata[j] < '0' || wdata[j] > '9')
362           {
363           j--;               /* Ensure this character is checked next. The */
364           goto OUTERLOOP;    /* string might be (e.g.) "){9){234}" */
365           }
366         q = q * 10 + wdata[j] - '0';
367         }
368 
369       if (j >= size) goto END_QSCAN;  /* End of data */
370 
371       /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is
372       the maximum quantifier. Leave such numbers alone. */
373 
374       if (j >= i + 7 || q > 65535) goto OUTERLOOP;
375 
376       /* Limit the quantifier size to 10 */
377 
378       if (q > 10)
379         {
380 #ifdef STANDALONE
381         printf("Reduced quantifier value %d to 10.\n", q);
382 #endif
383         for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
384         wdata[j - 2] = '1';
385         }
386 
387       /* Advance to end of number and break if reached closing brace (continue
388       after comma, which is only valid in the first time round this loop). */
389 
390       i = j;
391       if (wdata[i] == '}') break;
392       }
393 
394     /* Continue along the data string */
395 
396     OUTERLOOP:
397     i = j;
398     continue;
399     }
400   }
401 END_QSCAN:
402 
403 /* Limiting the length of the subject for matching stops fruitless searches
404 in large trees taking too much time. */
405 
406 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
407 
408 /* Create a compile context, and set a limit on the size of the compiled
409 pattern. This stops the fuzzer using vast amounts of memory. */
410 
411 compile_context = pcre2_compile_context_create(NULL);
412 if (compile_context == NULL)
413   {
414 #ifdef STANDALONE
415   fprintf(stderr, "** Failed to create compile context block\n");
416 #endif
417   abort();
418   }
419 pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
420 
421 /* Ensure that all undefined option bits are zero (waste of time trying them)
422 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
423 input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
424 no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
425 because \C in random patterns is highly likely to cause a crash. */
426 
427 compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
428   PCRE2_NEVER_BACKSLASH_C;
429 match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
430   BASE_MATCH_OPTIONS;
431 
432 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
433 allowed together and just give an immediate error return. */
434 
435 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
436   match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
437 
438 /* Do the compile with and without the options, and after a successful compile,
439 likewise do the match with and without the options. */
440 
441 for (int i = 0; i < 2; i++)
442   {
443   uint32_t callout_count;
444   int errorcode;
445 #ifdef SUPPORT_JIT
446   int errorcode_jit;
447 #ifdef SUPPORT_DIFF_FUZZ
448   int matches = 0;
449   int matches_jit = 0;
450 #endif
451 #endif
452   PCRE2_SIZE erroroffset;
453   pcre2_code *code;
454 
455 #ifdef STANDALONE
456   printf("\n");
457   print_compile_options(stdout, compile_options);
458 #endif
459 
460   code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
461     &errorcode, &erroroffset, compile_context);
462 
463   /* Compilation succeeded */
464 
465   if (code != NULL)
466     {
467     int j;
468     uint32_t save_match_options = match_options;
469 
470     /* Call JIT compile only if the compiled pattern is not too big. */
471 
472 #ifdef SUPPORT_JIT
473     int jit_ret = -1;
474     if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
475       {
476 #ifdef STANDALONE
477       printf("Compile succeeded; calling JIT compile\n");
478 #endif
479       jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
480 #ifdef STANDALONE
481       if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
482 #endif
483       }
484     else
485       {
486 #ifdef STANDALONE
487       printf("Not calling JIT: compiled pattern is too long "
488         "(%ld bytes; limit=%d)\n",
489         ((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
490 #endif
491       }
492 #endif  /* SUPPORT_JIT */
493 
494     /* Create match data and context blocks only when we first need them. Set
495     low match and depth limits to avoid wasting too much searching large
496     pattern trees. Almost all matches are going to fail. */
497 
498     if (match_data == NULL)
499       {
500       match_data = pcre2_match_data_create(32, NULL);
501 #ifdef SUPPORT_JIT
502       match_data_jit = pcre2_match_data_create(32, NULL);
503       if (match_data == NULL || match_data_jit == NULL)
504 #else
505       if (match_data == NULL)
506 #endif
507         {
508 #ifdef STANDALONE
509         fprintf(stderr, "** Failed to create match data block\n");
510 #endif
511         abort();
512         }
513       }
514 
515     if (match_context == NULL)
516       {
517       match_context = pcre2_match_context_create(NULL);
518       if (match_context == NULL)
519         {
520 #ifdef STANDALONE
521         fprintf(stderr, "** Failed to create match context block\n");
522 #endif
523         abort();
524         }
525       (void)pcre2_set_match_limit(match_context, 100);
526       (void)pcre2_set_depth_limit(match_context, 100);
527       (void)pcre2_set_callout(match_context, callout_function, &callout_count);
528       }
529 
530     /* Match twice, with and without options. */
531 
532 #ifdef STANDALONE
533     printf("\n");
534 #endif
535     for (j = 0; j < 2; j++)
536       {
537 #ifdef STANDALONE
538       print_match_options(stdout, match_options);
539 #endif
540 
541       callout_count = 0;
542       errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
543         match_options, match_data, match_context);
544 
545 #ifdef STANDALONE
546       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
547         print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
548 #endif
549 
550 /* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
551 with the interpreter. */
552 
553 #ifdef SUPPORT_JIT
554       if (jit_ret >= 0)
555         {
556 #ifdef STANDALONE
557         printf("Matching with JIT\n");
558 #endif
559         callout_count = 0;
560         errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
561           match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
562 
563 #ifdef STANDALONE
564         if (errorcode_jit >= 0)
565           printf("Match returned %d\n", errorcode_jit);
566         else
567           print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
568             errorcode_jit);
569 #else
570         (void)errorcode_jit;   /* Avoid compiler warning */
571 #endif  /* STANDALONE */
572 
573 /* With differential matching enabled, compare with interpreter. */
574 
575 #ifdef SUPPORT_DIFF_FUZZ
576         matches = errorcode;
577         matches_jit = errorcode_jit;
578 
579         if (errorcode_jit != errorcode)
580           {
581           if (!(errorcode < 0 && errorcode_jit < 0) &&
582                 errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
583                 errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
584             {
585             describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
586             }
587           }
588         else
589           {
590           for (int index = 0; index < errorcode; index++)
591             {
592             PCRE2_UCHAR *bufferptr, *bufferptr_jit;
593             PCRE2_SIZE bufflen, bufflen_jit;
594 
595             bufferptr = bufferptr_jit = NULL;
596             bufflen = bufflen_jit = 0;
597 
598             errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
599             errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
600 
601             if (errorcode != errorcode_jit)
602               {
603               describe_failure("match entry errorcode comparison", wdata, size,
604                 compile_options, match_options, errorcode, errorcode_jit,
605                 matches, matches_jit, match_data, match_data_jit);
606               }
607 
608             if (errorcode >= 0)
609               {
610               if (bufflen != bufflen_jit)
611                 {
612                 describe_failure("match entry length comparison", wdata, size,
613                   compile_options, match_options, errorcode, errorcode_jit,
614                   matches, matches_jit, match_data, match_data_jit);
615                 }
616 
617               if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
618                 {
619                 describe_failure("match entry content comparison", wdata, size,
620                   compile_options, match_options, errorcode, errorcode_jit,
621                   matches, matches_jit, match_data, match_data_jit);
622                 }
623               }
624 
625               pcre2_substring_free(bufferptr);
626               pcre2_substring_free(bufferptr_jit);
627             }
628           }
629 #endif  /* SUPPORT_DIFF_FUZZ */
630         }
631 #endif  /* SUPPORT_JIT */
632 
633       if (match_options == BASE_MATCH_OPTIONS) break;  /* Don't do same twice */
634       match_options = BASE_MATCH_OPTIONS;              /* For second time */
635       }
636 
637     /* Match with DFA twice, with and without options, but remove options that
638     are not allowed with DFA. */
639 
640     match_options = save_match_options & ~BASE_MATCH_OPTIONS;
641 
642 #ifdef STANDALONE
643     printf("\n");
644 #endif
645 
646     for (j = 0; j < 2; j++)
647       {
648 #ifdef STANDALONE
649       printf("DFA match options %.8x =", match_options);
650       printf("%s%s%s%s%s%s%s%s%s\n",
651         ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
652         ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
653         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
654         ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
655         ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
656         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
657         ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
658         ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
659         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
660 #endif
661 
662       callout_count = 0;
663       errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
664         (PCRE2_SIZE)match_size, 0, match_options, match_data,
665         match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
666 
667 #ifdef STANDALONE
668       if (errorcode >= 0)
669         printf("Match returned %d\n", errorcode);
670       else
671         print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
672 #endif
673 
674       if (match_options == 0) break;  /* No point doing same twice */
675       match_options = 0;              /* For second time */
676       }
677 
678     match_options = save_match_options;  /* Reset for the second compile */
679     pcre2_code_free(code);
680     }
681 
682   /* Compilation failed */
683 
684   else
685     {
686 #ifdef STANDALONE
687     print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
688       erroroffset);
689 #else
690     if (errorcode == PCRE2_ERROR_INTERNAL) abort();
691 #endif
692     }
693 
694   if (compile_options == PCRE2_NEVER_BACKSLASH_C) break;  /* Avoid same twice */
695   compile_options = PCRE2_NEVER_BACKSLASH_C;              /* For second time */
696   }
697 
698 /* Tidy up before exiting */
699 
700 if (match_data != NULL) pcre2_match_data_free(match_data);
701 #ifdef SUPPORT_JIT
702 if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
703 free(newwdata);
704 #endif
705 if (match_context != NULL) pcre2_match_context_free(match_context);
706 if (compile_context != NULL) pcre2_compile_context_free(compile_context);
707 return 0;
708 }
709 
710 
711 /* Optional main program.  */
712 
713 #ifdef STANDALONE
main(int argc,char ** argv)714 int main(int argc, char **argv)
715 {
716 LLVMFuzzerInitialize(&argc, &argv);
717 
718 if (argc < 2)
719   {
720   printf("** No arguments given\n");
721   return 0;
722   }
723 
724 for (int i = 1; i < argc; i++)
725   {
726   size_t filelen;
727   size_t readsize;
728   unsigned char *buffer;
729   FILE *f;
730 
731   /* Handle a literal string. Copy to an exact size buffer so that checks for
732   overrunning work. */
733 
734   if (argv[i][0] == '=')
735     {
736     readsize = strlen(argv[i]) - 1;
737     printf("------ <Literal> ------\n");
738     printf("Length = %lu\n", readsize);
739     printf("%.*s\n", (int)readsize, argv[i]+1);
740     buffer = (unsigned char *)malloc(readsize);
741     if (buffer == NULL)
742       printf("** Failed to allocate %lu bytes of memory\n", readsize);
743     else
744       {
745       memcpy(buffer, argv[i]+1, readsize);
746       LLVMFuzzerTestOneInput(buffer, readsize);
747       free(buffer);
748       }
749     continue;
750     }
751 
752   /* Handle a string given in a file */
753 
754   f = fopen(argv[i], "rb");
755   if (f == NULL)
756     {
757     printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
758     continue;
759     }
760 
761   printf("------ %s ------\n", argv[i]);
762 
763   fseek(f, 0, SEEK_END);
764   filelen = ftell(f);
765   fseek(f, 0, SEEK_SET);
766 
767   buffer = (unsigned char *)malloc(filelen);
768   if (buffer == NULL)
769     {
770     printf("** Failed to allocate %lu bytes of memory\n", filelen);
771     fclose(f);
772     continue;
773     }
774 
775   readsize = fread(buffer, 1, filelen, f);
776   fclose(f);
777 
778   if (readsize != filelen)
779     printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
780   else
781     {
782     printf("Length = %lu\n", filelen);
783     LLVMFuzzerTestOneInput(buffer, filelen);
784     }
785   free(buffer);
786   }
787 
788 return 0;
789 }
790 #endif  /* STANDALONE */
791 
792 /* End */
793