xref: /aosp_15_r20/external/pigweed/pw_tokenizer/detokenize_fuzzer.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker // Copyright 2020 The Pigweed Authors
2*61c4878aSAndroid Build Coastguard Worker //
3*61c4878aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4*61c4878aSAndroid Build Coastguard Worker // use this file except in compliance with the License. You may obtain a copy of
5*61c4878aSAndroid Build Coastguard Worker // the License at
6*61c4878aSAndroid Build Coastguard Worker //
7*61c4878aSAndroid Build Coastguard Worker //     https://www.apache.org/licenses/LICENSE-2.0
8*61c4878aSAndroid Build Coastguard Worker //
9*61c4878aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*61c4878aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11*61c4878aSAndroid Build Coastguard Worker // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12*61c4878aSAndroid Build Coastguard Worker // License for the specific language governing permissions and limitations under
13*61c4878aSAndroid Build Coastguard Worker // the License.
14*61c4878aSAndroid Build Coastguard Worker 
15*61c4878aSAndroid Build Coastguard Worker // This file implements a basic fuzz test for the Detokenizer.
16*61c4878aSAndroid Build Coastguard Worker // An instance of the Detokenizer is created from a minimal, nearly-empty token
17*61c4878aSAndroid Build Coastguard Worker // database. Fuzz data is fed to the detokenizer in various supported input
18*61c4878aSAndroid Build Coastguard Worker // argument formats at random, when then decodes this data and tries to match
19*61c4878aSAndroid Build Coastguard Worker // it to tokens in the database.
20*61c4878aSAndroid Build Coastguard Worker 
21*61c4878aSAndroid Build Coastguard Worker #include <cstddef>
22*61c4878aSAndroid Build Coastguard Worker #include <cstdint>
23*61c4878aSAndroid Build Coastguard Worker #include <cstring>
24*61c4878aSAndroid Build Coastguard Worker #include <vector>
25*61c4878aSAndroid Build Coastguard Worker 
26*61c4878aSAndroid Build Coastguard Worker #include "pw_fuzzer/fuzzed_data_provider.h"
27*61c4878aSAndroid Build Coastguard Worker #include "pw_preprocessor/util.h"
28*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/detokenize.h"
29*61c4878aSAndroid Build Coastguard Worker 
30*61c4878aSAndroid Build Coastguard Worker namespace pw::tokenizer {
31*61c4878aSAndroid Build Coastguard Worker namespace {
32*61c4878aSAndroid Build Coastguard Worker 
33*61c4878aSAndroid Build Coastguard Worker constexpr size_t kFuzzRangeMin = 0;
34*61c4878aSAndroid Build Coastguard Worker constexpr size_t kFuzzRangeMax = 10000;
35*61c4878aSAndroid Build Coastguard Worker 
36*61c4878aSAndroid Build Coastguard Worker enum DetokenizeBufferArgumentType : uint8_t {
37*61c4878aSAndroid Build Coastguard Worker   kSpan = 0,
38*61c4878aSAndroid Build Coastguard Worker   kStringView,
39*61c4878aSAndroid Build Coastguard Worker   kPtrAndLength,
40*61c4878aSAndroid Build Coastguard Worker   kMaxValue = kPtrAndLength
41*61c4878aSAndroid Build Coastguard Worker };
42*61c4878aSAndroid Build Coastguard Worker 
43*61c4878aSAndroid Build Coastguard Worker // In order to better fuzz the detokenizer, rather than use an empty token
44*61c4878aSAndroid Build Coastguard Worker // database, we construct a minimal database with 4 entries out of a string
45*61c4878aSAndroid Build Coastguard Worker // literal array that matches the token database format (see token_database.h
46*61c4878aSAndroid Build Coastguard Worker // for detailed info on the database entry format)
47*61c4878aSAndroid Build Coastguard Worker constexpr char kBasicData[] =
48*61c4878aSAndroid Build Coastguard Worker     "TOKENS\0\0"
49*61c4878aSAndroid Build Coastguard Worker     "\x04\x00\x00\x00"
50*61c4878aSAndroid Build Coastguard Worker     "\0\0\0\0"
51*61c4878aSAndroid Build Coastguard Worker     "\x01\x00\x00\x00----"
52*61c4878aSAndroid Build Coastguard Worker     "\x05\x00\x00\x00----"
53*61c4878aSAndroid Build Coastguard Worker     "\xFF\x00\x00\x00----"
54*61c4878aSAndroid Build Coastguard Worker     "\xFF\xEE\xEE\xDD----"
55*61c4878aSAndroid Build Coastguard Worker     "One\0"
56*61c4878aSAndroid Build Coastguard Worker     "TWO\0"
57*61c4878aSAndroid Build Coastguard Worker     "333\0"
58*61c4878aSAndroid Build Coastguard Worker     "FOUR";
59*61c4878aSAndroid Build Coastguard Worker 
60*61c4878aSAndroid Build Coastguard Worker }  // namespace
61*61c4878aSAndroid Build Coastguard Worker 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)62*61c4878aSAndroid Build Coastguard Worker extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
63*61c4878aSAndroid Build Coastguard Worker   static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());
64*61c4878aSAndroid Build Coastguard Worker 
65*61c4878aSAndroid Build Coastguard Worker   FuzzedDataProvider provider(data, size);
66*61c4878aSAndroid Build Coastguard Worker 
67*61c4878aSAndroid Build Coastguard Worker   while (provider.remaining_bytes() != 0) {
68*61c4878aSAndroid Build Coastguard Worker     // Map the first word of the remaining fuzz data to a buffer argument
69*61c4878aSAndroid Build Coastguard Worker     // type, and feed the Detokenizer with a random length buffer to be
70*61c4878aSAndroid Build Coastguard Worker     // detokenized in the relevant format. The detokenized string returned
71*61c4878aSAndroid Build Coastguard Worker     // is itself of little consequence to this test.
72*61c4878aSAndroid Build Coastguard Worker     switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
73*61c4878aSAndroid Build Coastguard Worker       case kSpan: {
74*61c4878aSAndroid Build Coastguard Worker         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
75*61c4878aSAndroid Build Coastguard Worker             kFuzzRangeMin, kFuzzRangeMax);
76*61c4878aSAndroid Build Coastguard Worker         std::vector<uint8_t> buffer =
77*61c4878aSAndroid Build Coastguard Worker             provider.ConsumeBytes<uint8_t>(consumed_size);
78*61c4878aSAndroid Build Coastguard Worker         if (buffer.empty()) {
79*61c4878aSAndroid Build Coastguard Worker           return -1;
80*61c4878aSAndroid Build Coastguard Worker         }
81*61c4878aSAndroid Build Coastguard Worker         auto detokenized_string =
82*61c4878aSAndroid Build Coastguard Worker             detokenizer.Detokenize(span(&buffer[0], buffer.size()));
83*61c4878aSAndroid Build Coastguard Worker         static_cast<void>(detokenized_string);
84*61c4878aSAndroid Build Coastguard Worker         break;
85*61c4878aSAndroid Build Coastguard Worker       }
86*61c4878aSAndroid Build Coastguard Worker 
87*61c4878aSAndroid Build Coastguard Worker       case kStringView: {
88*61c4878aSAndroid Build Coastguard Worker         std::string str =
89*61c4878aSAndroid Build Coastguard Worker             provider.ConsumeRandomLengthString(provider.remaining_bytes());
90*61c4878aSAndroid Build Coastguard Worker         auto detokenized_string = detokenizer.Detokenize(str);
91*61c4878aSAndroid Build Coastguard Worker         static_cast<void>(detokenized_string);
92*61c4878aSAndroid Build Coastguard Worker         break;
93*61c4878aSAndroid Build Coastguard Worker       }
94*61c4878aSAndroid Build Coastguard Worker 
95*61c4878aSAndroid Build Coastguard Worker       case kPtrAndLength: {
96*61c4878aSAndroid Build Coastguard Worker         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
97*61c4878aSAndroid Build Coastguard Worker             kFuzzRangeMin, kFuzzRangeMax);
98*61c4878aSAndroid Build Coastguard Worker         std::vector<uint8_t> buffer =
99*61c4878aSAndroid Build Coastguard Worker             provider.ConsumeBytes<uint8_t>(consumed_size);
100*61c4878aSAndroid Build Coastguard Worker         auto detokenized_string =
101*61c4878aSAndroid Build Coastguard Worker             detokenizer.Detokenize(buffer.data(), buffer.size());
102*61c4878aSAndroid Build Coastguard Worker         static_cast<void>(detokenized_string);
103*61c4878aSAndroid Build Coastguard Worker         break;
104*61c4878aSAndroid Build Coastguard Worker       }
105*61c4878aSAndroid Build Coastguard Worker     }
106*61c4878aSAndroid Build Coastguard Worker   }
107*61c4878aSAndroid Build Coastguard Worker 
108*61c4878aSAndroid Build Coastguard Worker   return 0;
109*61c4878aSAndroid Build Coastguard Worker }
110*61c4878aSAndroid Build Coastguard Worker 
111*61c4878aSAndroid Build Coastguard Worker }  // namespace pw::tokenizer
112