1 /***************************************************************************
2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3 tries to compile and match it, deriving options from the string itself. If
4 STANDALONE is defined, a main program that calls the driver with the contents
5 of specified files is compiled, and commentary on what is happening is output.
6 If an argument starts with '=' the rest of it it is taken as a literal string
7 rather than a file name. This allows easy testing of short strings.
8
9 Written by Philip Hazel, October 2016
10 Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
11 Further updates March/April/May 2024 by PH
12 ***************************************************************************/
13
14 #include <errno.h>
15 #include <stdarg.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20
21 /* stack size adjustment */
22 #include <sys/time.h>
23 #include <sys/resource.h>
24
25 #define STACK_SIZE_MB 256
26 #define JIT_SIZE_LIMIT (200 * 1024)
27
28 #ifndef PCRE2_CODE_UNIT_WIDTH
29 #define PCRE2_CODE_UNIT_WIDTH 8
30 #endif
31
32 #include "config.h"
33 #include "pcre2.h"
34 #include "pcre2_internal.h"
35
36 #define MAX_MATCH_SIZE 1000
37
38 #define DFA_WORKSPACE_COUNT 100
39
40 /* When adding new compile or match options, remember to update the functions
41 below that output them. */
42
43 #define ALLOWED_COMPILE_OPTIONS \
44 (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
45 PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
46 PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
47 PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
48 PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
49 PCRE2_NO_AUTO_CAPTURE| \
50 PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
51 PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
52 PCRE2_UTF)
53
54 #define ALLOWED_MATCH_OPTIONS \
55 (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
56 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
57 PCRE2_PARTIAL_SOFT)
58
59 #define BASE_MATCH_OPTIONS \
60 (PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
61
62
63 #if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
print_compile_options(FILE * stream,uint32_t compile_options)64 static void print_compile_options(FILE *stream, uint32_t compile_options)
65 {
66 fprintf(stream, "Compile options %s%.8x =",
67 (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
68 compile_options);
69
70 fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
71 ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
72 ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
73 ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
74 ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
75 ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
76 ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
77 ((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
78 ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
79 ((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
80 ((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
81 ((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
82 ((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
83 ((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
84 ((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
85 ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
86 ((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
87 ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
88 ((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
89 ((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
90 ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
91 ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
92 ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
93 ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
94 ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
95 ((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
96 ((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
97 ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
98 ((compile_options & PCRE2_UTF) != 0)? " utf" : "");
99 }
100
print_match_options(FILE * stream,uint32_t match_options)101 static void print_match_options(FILE *stream, uint32_t match_options)
102 {
103 fprintf(stream, "Match options %s%.8x =",
104 (match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
105
106 fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
107 ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
108 ((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
109 ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
110 ((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
111 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
112 ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
113 ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
114 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
115 ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
116 ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
117 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
118 }
119
120
121 /* This function can print an error message at all code unit widths. */
122
print_error(FILE * f,int errorcode,const char * text,...)123 static void print_error(FILE *f, int errorcode, const char *text, ...)
124 {
125 PCRE2_UCHAR buffer[256];
126 PCRE2_UCHAR *p = buffer;
127 va_list ap;
128 va_start(ap, text);
129 vfprintf(f, text, ap);
130 va_end(ap);
131 pcre2_get_error_message(errorcode, buffer, 256);
132 while (*p != 0) fprintf(f, "%c", *p++);
133 printf("\n");
134 }
135 #endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
136
137
138 #ifdef SUPPORT_JIT
139 #ifdef SUPPORT_DIFF_FUZZ
dump_matches(FILE * stream,int count,pcre2_match_data * match_data)140 static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
141 {
142 int errorcode;
143
144 for (int index = 0; index < count; index++)
145 {
146 PCRE2_UCHAR *bufferptr = NULL;
147 PCRE2_SIZE bufflen = 0;
148
149 errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
150 &bufflen);
151
152 if (errorcode >= 0)
153 {
154 fprintf(stream, "Match %d (hex encoded): ", index);
155 for (PCRE2_SIZE i = 0; i < bufflen; i++)
156 {
157 fprintf(stream, "%02x", bufferptr[i]);
158 }
159 fprintf(stream, "\n");
160 }
161 else
162 {
163 print_error(stream, errorcode, "Match %d failed: ", index);
164 }
165 }
166 }
167
168 /* This function describes the current test case being evaluated, then aborts */
169
describe_failure(const char * task,const unsigned char * data,size_t size,uint32_t compile_options,uint32_t match_options,int errorcode,int errorcode_jit,int matches,int matches_jit,pcre2_match_data * match_data,pcre2_match_data * match_data_jit)170 static void describe_failure(
171 const char *task,
172 const unsigned char *data,
173 size_t size,
174 uint32_t compile_options,
175 uint32_t match_options,
176 int errorcode,
177 int errorcode_jit,
178 int matches,
179 int matches_jit,
180 pcre2_match_data *match_data,
181 pcre2_match_data *match_data_jit
182 ) {
183
184 fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
185
186 fprintf(stderr, "Pattern/sample string (hex encoded): ");
187 for (size_t i = 0; i < size; i++)
188 {
189 fprintf(stderr, "%02x", data[i]);
190 }
191 fprintf(stderr, "\n");
192
193 print_compile_options(stderr, compile_options);
194 print_match_options(stderr, match_options);
195
196 if (errorcode < 0)
197 {
198 print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
199 }
200
201 if (matches >= 0)
202 {
203 fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
204 if (match_data != NULL)
205 {
206 fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
207 dump_matches(stderr, matches, match_data);
208 fprintf(stderr, "\n");
209 }
210 }
211
212 if (errorcode_jit < 0)
213 {
214 print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
215 errorcode_jit);
216 }
217
218 if (matches_jit >= 0)
219 {
220 fprintf(stderr, "JIT'd operation did not emit an error.\n");
221 if (match_data_jit != NULL)
222 {
223 fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
224 dump_matches(stderr, matches_jit, match_data_jit);
225 fprintf(stderr, "\n");
226 }
227 }
228
229 abort();
230 }
231 #endif /* SUPPORT_DIFF_FUZZ */
232 #endif /* SUPPORT_JIT */
233
234 /* This is the callout function. Its only purpose is to halt matching if there
235 are more than 100 callouts, as one way of stopping too much time being spent on
236 fruitless matches. The callout data is a pointer to the counter. */
237
callout_function(pcre2_callout_block * cb,void * callout_data)238 static int callout_function(pcre2_callout_block *cb, void *callout_data)
239 {
240 (void)cb; /* Avoid unused parameter warning */
241 *((uint32_t *)callout_data) += 1;
242 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
243 }
244
245 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
246 "no previous prototype" warning when compiling at high warning level. */
247
248 int LLVMFuzzerInitialize(int *, char ***);
249
250 int LLVMFuzzerTestOneInput(unsigned char *, size_t);
251
LLVMFuzzerInitialize(int * argc,char *** argv)252 int LLVMFuzzerInitialize(int *argc, char ***argv)
253 {
254 int rc;
255 struct rlimit rlim;
256 getrlimit(RLIMIT_STACK, &rlim);
257 rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
258 if (rlim.rlim_cur > rlim.rlim_max)
259 {
260 fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n");
261 _exit(1);
262 }
263 rc = setrlimit(RLIMIT_STACK, &rlim);
264 if (rc != 0)
265 {
266 fprintf(stderr, "Failed to expand stack size\n");
267 _exit(1);
268 }
269
270 (void)argc; /* Avoid "unused parameter" warnings */
271 (void)argv;
272 return 0;
273 }
274
275 /* Here's the driving function. */
276
LLVMFuzzerTestOneInput(unsigned char * data,size_t size)277 int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
278 {
279 PCRE2_UCHAR *wdata;
280 PCRE2_UCHAR *newwdata = NULL;
281 uint32_t compile_options;
282 uint32_t match_options;
283 uint64_t random_options;
284 pcre2_match_data *match_data = NULL;
285 #ifdef SUPPORT_JIT
286 pcre2_match_data *match_data_jit = NULL;
287 #endif
288 pcre2_compile_context *compile_context = NULL;
289 pcre2_match_context *match_context = NULL;
290 size_t match_size;
291 int dfa_workspace[DFA_WORKSPACE_COUNT];
292
293 if (size < sizeof(random_options)) return -1;
294
295 random_options = *(uint64_t *)(data);
296 data += sizeof(random_options);
297 wdata = (PCRE2_UCHAR *)data;
298 size -= sizeof(random_options);
299 size /= PCRE2_CODE_UNIT_WIDTH / 8;
300
301 /* PCRE2 compiles quantified groups by replicating them. In certain cases of
302 very large quantifiers this can lead to unacceptably long JIT compile times. To
303 get around this, we scan the data string for large quantifiers that follow a
304 closing parenthesis, and reduce the value of the quantifier to 10, assuming
305 that this will make minimal difference to the detection of bugs.
306
307 Do the same for quantifiers that follow a closing square bracket, because
308 classes that contain a number of non-ascii characters can take a lot of time
309 when matching.
310
311 We have to make a copy of the input because oss-fuzz complains if we overwrite
312 the original. Start the scan at the second character so there can be a
313 lookbehind for a backslash, and end it before the end so that the next
314 character can be checked for an opening brace. */
315
316 if (size > 3)
317 {
318 newwdata = malloc(size * sizeof(PCRE2_UCHAR));
319 memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
320 wdata = newwdata;
321
322 for (size_t i = 1; i < size - 2; i++)
323 {
324 size_t j;
325
326 if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
327 wdata[i+1] != '{')
328 continue;
329 i++; /* Points to '{' */
330
331 /* Loop for two values a quantifier. Offset i points to brace or comma at the
332 start of the loop.*/
333
334 for (int ii = 0; ii < 2; ii++)
335 {
336 int q = 0;
337
338 if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
339
340 /* Ignore leading spaces */
341
342 while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
343 {
344 i++;
345 if (i >= size - 1) goto END_QSCAN;
346 }
347
348 /* Scan for a number ending in brace or comma in the first iteration,
349 optionally preceded by space. */
350
351 for (j = i + 1; j < size && j < i + 7; j++)
352 {
353 if (wdata[j] == ' ' || wdata[j] == '\t')
354 {
355 j++;
356 while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
357 if (j >= size) goto OUTERLOOP;
358 if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
359 }
360 if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
361 if (wdata[j] < '0' || wdata[j] > '9')
362 {
363 j--; /* Ensure this character is checked next. The */
364 goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
365 }
366 q = q * 10 + wdata[j] - '0';
367 }
368
369 if (j >= size) goto END_QSCAN; /* End of data */
370
371 /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is
372 the maximum quantifier. Leave such numbers alone. */
373
374 if (j >= i + 7 || q > 65535) goto OUTERLOOP;
375
376 /* Limit the quantifier size to 10 */
377
378 if (q > 10)
379 {
380 #ifdef STANDALONE
381 printf("Reduced quantifier value %d to 10.\n", q);
382 #endif
383 for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
384 wdata[j - 2] = '1';
385 }
386
387 /* Advance to end of number and break if reached closing brace (continue
388 after comma, which is only valid in the first time round this loop). */
389
390 i = j;
391 if (wdata[i] == '}') break;
392 }
393
394 /* Continue along the data string */
395
396 OUTERLOOP:
397 i = j;
398 continue;
399 }
400 }
401 END_QSCAN:
402
403 /* Limiting the length of the subject for matching stops fruitless searches
404 in large trees taking too much time. */
405
406 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
407
408 /* Create a compile context, and set a limit on the size of the compiled
409 pattern. This stops the fuzzer using vast amounts of memory. */
410
411 compile_context = pcre2_compile_context_create(NULL);
412 if (compile_context == NULL)
413 {
414 #ifdef STANDALONE
415 fprintf(stderr, "** Failed to create compile context block\n");
416 #endif
417 abort();
418 }
419 pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
420
421 /* Ensure that all undefined option bits are zero (waste of time trying them)
422 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
423 input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
424 no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
425 because \C in random patterns is highly likely to cause a crash. */
426
427 compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
428 PCRE2_NEVER_BACKSLASH_C;
429 match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
430 BASE_MATCH_OPTIONS;
431
432 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
433 allowed together and just give an immediate error return. */
434
435 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
436 match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
437
438 /* Do the compile with and without the options, and after a successful compile,
439 likewise do the match with and without the options. */
440
441 for (int i = 0; i < 2; i++)
442 {
443 uint32_t callout_count;
444 int errorcode;
445 #ifdef SUPPORT_JIT
446 int errorcode_jit;
447 #ifdef SUPPORT_DIFF_FUZZ
448 int matches = 0;
449 int matches_jit = 0;
450 #endif
451 #endif
452 PCRE2_SIZE erroroffset;
453 pcre2_code *code;
454
455 #ifdef STANDALONE
456 printf("\n");
457 print_compile_options(stdout, compile_options);
458 #endif
459
460 code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
461 &errorcode, &erroroffset, compile_context);
462
463 /* Compilation succeeded */
464
465 if (code != NULL)
466 {
467 int j;
468 uint32_t save_match_options = match_options;
469
470 /* Call JIT compile only if the compiled pattern is not too big. */
471
472 #ifdef SUPPORT_JIT
473 int jit_ret = -1;
474 if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
475 {
476 #ifdef STANDALONE
477 printf("Compile succeeded; calling JIT compile\n");
478 #endif
479 jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
480 #ifdef STANDALONE
481 if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
482 #endif
483 }
484 else
485 {
486 #ifdef STANDALONE
487 printf("Not calling JIT: compiled pattern is too long "
488 "(%ld bytes; limit=%d)\n",
489 ((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
490 #endif
491 }
492 #endif /* SUPPORT_JIT */
493
494 /* Create match data and context blocks only when we first need them. Set
495 low match and depth limits to avoid wasting too much searching large
496 pattern trees. Almost all matches are going to fail. */
497
498 if (match_data == NULL)
499 {
500 match_data = pcre2_match_data_create(32, NULL);
501 #ifdef SUPPORT_JIT
502 match_data_jit = pcre2_match_data_create(32, NULL);
503 if (match_data == NULL || match_data_jit == NULL)
504 #else
505 if (match_data == NULL)
506 #endif
507 {
508 #ifdef STANDALONE
509 fprintf(stderr, "** Failed to create match data block\n");
510 #endif
511 abort();
512 }
513 }
514
515 if (match_context == NULL)
516 {
517 match_context = pcre2_match_context_create(NULL);
518 if (match_context == NULL)
519 {
520 #ifdef STANDALONE
521 fprintf(stderr, "** Failed to create match context block\n");
522 #endif
523 abort();
524 }
525 (void)pcre2_set_match_limit(match_context, 100);
526 (void)pcre2_set_depth_limit(match_context, 100);
527 (void)pcre2_set_callout(match_context, callout_function, &callout_count);
528 }
529
530 /* Match twice, with and without options. */
531
532 #ifdef STANDALONE
533 printf("\n");
534 #endif
535 for (j = 0; j < 2; j++)
536 {
537 #ifdef STANDALONE
538 print_match_options(stdout, match_options);
539 #endif
540
541 callout_count = 0;
542 errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
543 match_options, match_data, match_context);
544
545 #ifdef STANDALONE
546 if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
547 print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
548 #endif
549
550 /* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
551 with the interpreter. */
552
553 #ifdef SUPPORT_JIT
554 if (jit_ret >= 0)
555 {
556 #ifdef STANDALONE
557 printf("Matching with JIT\n");
558 #endif
559 callout_count = 0;
560 errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
561 match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
562
563 #ifdef STANDALONE
564 if (errorcode_jit >= 0)
565 printf("Match returned %d\n", errorcode_jit);
566 else
567 print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
568 errorcode_jit);
569 #else
570 (void)errorcode_jit; /* Avoid compiler warning */
571 #endif /* STANDALONE */
572
573 /* With differential matching enabled, compare with interpreter. */
574
575 #ifdef SUPPORT_DIFF_FUZZ
576 matches = errorcode;
577 matches_jit = errorcode_jit;
578
579 if (errorcode_jit != errorcode)
580 {
581 if (!(errorcode < 0 && errorcode_jit < 0) &&
582 errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
583 errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
584 {
585 describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
586 }
587 }
588 else
589 {
590 for (int index = 0; index < errorcode; index++)
591 {
592 PCRE2_UCHAR *bufferptr, *bufferptr_jit;
593 PCRE2_SIZE bufflen, bufflen_jit;
594
595 bufferptr = bufferptr_jit = NULL;
596 bufflen = bufflen_jit = 0;
597
598 errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
599 errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
600
601 if (errorcode != errorcode_jit)
602 {
603 describe_failure("match entry errorcode comparison", wdata, size,
604 compile_options, match_options, errorcode, errorcode_jit,
605 matches, matches_jit, match_data, match_data_jit);
606 }
607
608 if (errorcode >= 0)
609 {
610 if (bufflen != bufflen_jit)
611 {
612 describe_failure("match entry length comparison", wdata, size,
613 compile_options, match_options, errorcode, errorcode_jit,
614 matches, matches_jit, match_data, match_data_jit);
615 }
616
617 if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
618 {
619 describe_failure("match entry content comparison", wdata, size,
620 compile_options, match_options, errorcode, errorcode_jit,
621 matches, matches_jit, match_data, match_data_jit);
622 }
623 }
624
625 pcre2_substring_free(bufferptr);
626 pcre2_substring_free(bufferptr_jit);
627 }
628 }
629 #endif /* SUPPORT_DIFF_FUZZ */
630 }
631 #endif /* SUPPORT_JIT */
632
633 if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
634 match_options = BASE_MATCH_OPTIONS; /* For second time */
635 }
636
637 /* Match with DFA twice, with and without options, but remove options that
638 are not allowed with DFA. */
639
640 match_options = save_match_options & ~BASE_MATCH_OPTIONS;
641
642 #ifdef STANDALONE
643 printf("\n");
644 #endif
645
646 for (j = 0; j < 2; j++)
647 {
648 #ifdef STANDALONE
649 printf("DFA match options %.8x =", match_options);
650 printf("%s%s%s%s%s%s%s%s%s\n",
651 ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
652 ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
653 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
654 ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
655 ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
656 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
657 ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
658 ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
659 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
660 #endif
661
662 callout_count = 0;
663 errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
664 (PCRE2_SIZE)match_size, 0, match_options, match_data,
665 match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
666
667 #ifdef STANDALONE
668 if (errorcode >= 0)
669 printf("Match returned %d\n", errorcode);
670 else
671 print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
672 #endif
673
674 if (match_options == 0) break; /* No point doing same twice */
675 match_options = 0; /* For second time */
676 }
677
678 match_options = save_match_options; /* Reset for the second compile */
679 pcre2_code_free(code);
680 }
681
682 /* Compilation failed */
683
684 else
685 {
686 #ifdef STANDALONE
687 print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
688 erroroffset);
689 #else
690 if (errorcode == PCRE2_ERROR_INTERNAL) abort();
691 #endif
692 }
693
694 if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
695 compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
696 }
697
698 /* Tidy up before exiting */
699
700 if (match_data != NULL) pcre2_match_data_free(match_data);
701 #ifdef SUPPORT_JIT
702 if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
703 free(newwdata);
704 #endif
705 if (match_context != NULL) pcre2_match_context_free(match_context);
706 if (compile_context != NULL) pcre2_compile_context_free(compile_context);
707 return 0;
708 }
709
710
711 /* Optional main program. */
712
713 #ifdef STANDALONE
main(int argc,char ** argv)714 int main(int argc, char **argv)
715 {
716 LLVMFuzzerInitialize(&argc, &argv);
717
718 if (argc < 2)
719 {
720 printf("** No arguments given\n");
721 return 0;
722 }
723
724 for (int i = 1; i < argc; i++)
725 {
726 size_t filelen;
727 size_t readsize;
728 unsigned char *buffer;
729 FILE *f;
730
731 /* Handle a literal string. Copy to an exact size buffer so that checks for
732 overrunning work. */
733
734 if (argv[i][0] == '=')
735 {
736 readsize = strlen(argv[i]) - 1;
737 printf("------ <Literal> ------\n");
738 printf("Length = %lu\n", readsize);
739 printf("%.*s\n", (int)readsize, argv[i]+1);
740 buffer = (unsigned char *)malloc(readsize);
741 if (buffer == NULL)
742 printf("** Failed to allocate %lu bytes of memory\n", readsize);
743 else
744 {
745 memcpy(buffer, argv[i]+1, readsize);
746 LLVMFuzzerTestOneInput(buffer, readsize);
747 free(buffer);
748 }
749 continue;
750 }
751
752 /* Handle a string given in a file */
753
754 f = fopen(argv[i], "rb");
755 if (f == NULL)
756 {
757 printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
758 continue;
759 }
760
761 printf("------ %s ------\n", argv[i]);
762
763 fseek(f, 0, SEEK_END);
764 filelen = ftell(f);
765 fseek(f, 0, SEEK_SET);
766
767 buffer = (unsigned char *)malloc(filelen);
768 if (buffer == NULL)
769 {
770 printf("** Failed to allocate %lu bytes of memory\n", filelen);
771 fclose(f);
772 continue;
773 }
774
775 readsize = fread(buffer, 1, filelen, f);
776 fclose(f);
777
778 if (readsize != filelen)
779 printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
780 else
781 {
782 printf("Length = %lu\n", filelen);
783 LLVMFuzzerTestOneInput(buffer, filelen);
784 }
785 free(buffer);
786 }
787
788 return 0;
789 }
790 #endif /* STANDALONE */
791
792 /* End */
793