xref: /aosp_15_r20/external/pdfium/testing/fuzzers/pdf_xfa_raw_fuzzer.cc (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2021 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <fuzzer/FuzzedDataProvider.h>
6 
7 #include <cctype>
8 #include <string>
9 
10 #include "public/fpdf_formfill.h"
11 #include "testing/fuzzers/pdf_fuzzer_templates.h"
12 #include "testing/fuzzers/pdfium_fuzzer_helper.h"
13 
14 class PDFiumXFAFuzzer : public PDFiumFuzzerHelper {
15  public:
16   PDFiumXFAFuzzer() = default;
17   ~PDFiumXFAFuzzer() override = default;
18 
GetFormCallbackVersion() const19   int GetFormCallbackVersion() const override { return 2; }
20 
21   // Return false if XFA doesn't load as otherwise we're duplicating the work
22   // done by the non-xfa fuzzer.
OnFormFillEnvLoaded(FPDF_DOCUMENT doc)23   bool OnFormFillEnvLoaded(FPDF_DOCUMENT doc) override {
24     int form_type = FPDF_GetFormType(doc);
25     if (form_type != FORMTYPE_XFA_FULL && form_type != FORMTYPE_XFA_FOREGROUND)
26       return false;
27     return FPDF_LoadXFA(doc);
28   }
29 };
30 
IsValidForFuzzing(const uint8_t * data,size_t size)31 bool IsValidForFuzzing(const uint8_t* data, size_t size) {
32   if (size > 2048) {
33     return false;
34   }
35 
36   const char* ptr = reinterpret_cast<const char*>(data);
37   bool is_open = false;
38   size_t tag_size = 0;
39   for (size_t i = 0; i < size; i++) {
40     if (!std::isspace(ptr[i]) && !std::isprint(ptr[i])) {
41       return false;
42     }
43 
44     // We do not want any script tags. The reason is this fuzzer
45     // should avoid exploring v8 code. Avoiding anything with "script"
46     // is an over-approximation, in that some inputs may contain "script"
47     // and still be a valid fuzz-case. However, this over-approximation is
48     // used to enforce strict constraints and avoid cases where whitespace
49     // may play a role, or other tags, e.g. "Javascript" will end up triggering
50     // large explorations of v8 code. The alternative we considered were
51     // "<script"
52     if (i + 6 < size && memcmp(ptr + i, "script", 6) == 0) {
53       return false;
54     }
55 
56     if (ptr[i] == '<') {
57       if (is_open) {
58         return false;
59       }
60       is_open = true;
61       tag_size = 0;
62     } else if (ptr[i] == '>') {
63       if (!is_open || tag_size == 0) {
64         return false;
65       }
66       is_open = false;
67     } else if (is_open) {
68       tag_size++;
69     }
70   }
71   // we must close the last bracket.
72   return !is_open;
73 }
74 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)75 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
76   // Filter the string to reduce the state space exploration.
77   if (!IsValidForFuzzing(data, size)) {
78     return 0;
79   }
80   std::string xfa_string = "<xdp xmlns=\"http://ns.adobe.com/xdp/\">";
81   xfa_string += std::string(reinterpret_cast<const char*>(data), size);
82   xfa_string += "</xdp>";
83 
84   // Add 1 for newline before endstream.
85   std::string xfa_stream_len = std::to_string(xfa_string.size() + 1);
86 
87   // Compose the fuzzer
88   std::string xfa_final_str = std::string(kSimplePdfTemplate);
89   xfa_final_str.replace(xfa_final_str.find("$1"), 2, xfa_stream_len);
90   xfa_final_str.replace(xfa_final_str.find("$2"), 2, xfa_string);
91 
92 #ifdef PDFIUM_FUZZER_DUMP
93   for (size_t i = 0; i < xfa_final_str.size(); i++) {
94     putc(xfa_final_str[i], stdout);
95   }
96 #endif
97 
98   PDFiumXFAFuzzer fuzzer;
99   fuzzer.RenderPdf(xfa_final_str.c_str(), xfa_final_str.size());
100   return 0;
101 }
102