xref: /aosp_15_r20/external/libxml2/fuzz/html.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * html.c: a libFuzzer target to test several HTML parser interfaces.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <libxml/HTMLparser.h>
8 #include <libxml/HTMLtree.h>
9 #include <libxml/catalog.h>
10 #include "fuzz.h"
11 
12 int
LLVMFuzzerInitialize(int * argc ATTRIBUTE_UNUSED,char *** argv ATTRIBUTE_UNUSED)13 LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
14                      char ***argv ATTRIBUTE_UNUSED) {
15     xmlFuzzMemSetup();
16     xmlInitParser();
17 #ifdef LIBXML_CATALOG_ENABLED
18     xmlInitializeCatalog();
19     xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
20 #endif
21 
22     return 0;
23 }
24 
25 int
LLVMFuzzerTestOneInput(const char * data,size_t size)26 LLVMFuzzerTestOneInput(const char *data, size_t size) {
27     xmlParserCtxtPtr ctxt;
28     htmlDocPtr doc;
29     const char *docBuffer;
30     size_t maxAlloc, docSize;
31     int opts;
32 
33     xmlFuzzDataInit(data, size);
34     opts = (int) xmlFuzzReadInt(4);
35     maxAlloc = xmlFuzzReadInt(4) % (size + 100);
36 
37     docBuffer = xmlFuzzReadRemaining(&docSize);
38     if (docBuffer == NULL) {
39         xmlFuzzDataCleanup();
40         return(0);
41     }
42 
43     /* Pull parser */
44 
45     xmlFuzzMemSetLimit(maxAlloc);
46     ctxt = htmlNewParserCtxt();
47     if (ctxt != NULL) {
48         xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
49         doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts);
50         xmlFuzzCheckMallocFailure("htmlCtxtReadMemory",
51                                   ctxt->errNo == XML_ERR_NO_MEMORY);
52 
53         if (doc != NULL) {
54             xmlDocPtr copy;
55 
56 #ifdef LIBXML_OUTPUT_ENABLED
57             xmlOutputBufferPtr out;
58             const xmlChar *content;
59 
60             /*
61              * Also test the serializer. Call htmlDocContentDumpOutput with our
62              * own buffer to avoid encoding the output. The HTML encoding is
63              * excruciatingly slow (see htmlEntityValueLookup).
64              */
65             out = xmlAllocOutputBuffer(NULL);
66             htmlDocContentDumpOutput(out, doc, NULL);
67             content = xmlOutputBufferGetContent(out);
68             xmlOutputBufferClose(out);
69             xmlFuzzCheckMallocFailure("htmlDocContentDumpOutput",
70                                       content == NULL);
71 #endif
72 
73             copy = xmlCopyDoc(doc, 1);
74             xmlFuzzCheckMallocFailure("xmlCopyNode", copy == NULL);
75             xmlFreeDoc(copy);
76 
77             xmlFreeDoc(doc);
78         }
79 
80         htmlFreeParserCtxt(ctxt);
81     }
82 
83 
84     /* Push parser */
85 
86 #ifdef LIBXML_PUSH_ENABLED
87     {
88         static const size_t maxChunkSize = 128;
89         size_t consumed, chunkSize;
90 
91         xmlFuzzMemSetLimit(maxAlloc);
92         ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
93                                         XML_CHAR_ENCODING_NONE);
94 
95         if (ctxt != NULL) {
96             xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
97             htmlCtxtUseOptions(ctxt, opts);
98 
99             for (consumed = 0; consumed < docSize; consumed += chunkSize) {
100                 chunkSize = docSize - consumed;
101                 if (chunkSize > maxChunkSize)
102                     chunkSize = maxChunkSize;
103                 htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
104             }
105 
106             htmlParseChunk(ctxt, NULL, 0, 1);
107             xmlFuzzCheckMallocFailure("htmlParseChunk",
108                                       ctxt->errNo == XML_ERR_NO_MEMORY);
109             xmlFreeDoc(ctxt->myDoc);
110             htmlFreeParserCtxt(ctxt);
111         }
112     }
113 #endif
114 
115     /* Cleanup */
116 
117     xmlFuzzMemSetLimit(0);
118     xmlFuzzDataCleanup();
119     xmlResetLastError();
120 
121     return(0);
122 }
123 
124