xref: /aosp_15_r20/external/pigweed/pw_tokenizer/token_database_fuzzer.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker // Copyright 2020 The Pigweed Authors
2*61c4878aSAndroid Build Coastguard Worker //
3*61c4878aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4*61c4878aSAndroid Build Coastguard Worker // use this file except in compliance with the License. You may obtain a copy of
5*61c4878aSAndroid Build Coastguard Worker // the License at
6*61c4878aSAndroid Build Coastguard Worker //
7*61c4878aSAndroid Build Coastguard Worker //     https://www.apache.org/licenses/LICENSE-2.0
8*61c4878aSAndroid Build Coastguard Worker //
9*61c4878aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*61c4878aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11*61c4878aSAndroid Build Coastguard Worker // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12*61c4878aSAndroid Build Coastguard Worker // License for the specific language governing permissions and limitations under
13*61c4878aSAndroid Build Coastguard Worker // the License.
14*61c4878aSAndroid Build Coastguard Worker 
15*61c4878aSAndroid Build Coastguard Worker // This file implements a basic fuzz test for the TokenDatabase class
16*61c4878aSAndroid Build Coastguard Worker // A database is created from fuzz data, and a random entry count (also
17*61c4878aSAndroid Build Coastguard Worker // derived from the fuzz data) is set. We then run iterations and 'find'
18*61c4878aSAndroid Build Coastguard Worker // operations on this database.
19*61c4878aSAndroid Build Coastguard Worker 
20*61c4878aSAndroid Build Coastguard Worker #include <cstring>
21*61c4878aSAndroid Build Coastguard Worker 
22*61c4878aSAndroid Build Coastguard Worker #include "pw_fuzzer/asan_interface.h"
23*61c4878aSAndroid Build Coastguard Worker #include "pw_fuzzer/fuzzed_data_provider.h"
24*61c4878aSAndroid Build Coastguard Worker #include "pw_preprocessor/util.h"
25*61c4878aSAndroid Build Coastguard Worker #include "pw_span/span.h"
26*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/token_database.h"
27*61c4878aSAndroid Build Coastguard Worker 
28*61c4878aSAndroid Build Coastguard Worker namespace pw::tokenizer {
29*61c4878aSAndroid Build Coastguard Worker namespace {
30*61c4878aSAndroid Build Coastguard Worker 
31*61c4878aSAndroid Build Coastguard Worker enum FuzzTestType : uint8_t {
32*61c4878aSAndroid Build Coastguard Worker   kValidHeader,
33*61c4878aSAndroid Build Coastguard Worker   kRandomHeader,
34*61c4878aSAndroid Build Coastguard Worker   kMaxValue = kRandomHeader,
35*61c4878aSAndroid Build Coastguard Worker };
36*61c4878aSAndroid Build Coastguard Worker 
37*61c4878aSAndroid Build Coastguard Worker constexpr size_t kTokenHeaderSize = 16;
38*61c4878aSAndroid Build Coastguard Worker 
39*61c4878aSAndroid Build Coastguard Worker // The default max length in bytes of fuzzed data provided. Note that
40*61c4878aSAndroid Build Coastguard Worker // this needs to change if the fuzzer executable is run with a
41*61c4878aSAndroid Build Coastguard Worker // '-max_len' argument.
42*61c4878aSAndroid Build Coastguard Worker constexpr size_t kFuzzDataSizeMax = 4096;
43*61c4878aSAndroid Build Coastguard Worker 
44*61c4878aSAndroid Build Coastguard Worker // Location of the 'EntryCount' field in the token header.
45*61c4878aSAndroid Build Coastguard Worker constexpr size_t kEntryCountOffset = 8;
46*61c4878aSAndroid Build Coastguard Worker constexpr size_t kEntryCountSize = 4;
47*61c4878aSAndroid Build Coastguard Worker 
SetTokenEntryCountInBuffer(uint8_t * buffer,uint32_t count)48*61c4878aSAndroid Build Coastguard Worker void SetTokenEntryCountInBuffer(uint8_t* buffer, uint32_t count) {
49*61c4878aSAndroid Build Coastguard Worker   memcpy(buffer + kEntryCountOffset, &count, kEntryCountSize);
50*61c4878aSAndroid Build Coastguard Worker }
51*61c4878aSAndroid Build Coastguard Worker 
IterateOverDatabase(TokenDatabase * const database)52*61c4878aSAndroid Build Coastguard Worker void IterateOverDatabase(TokenDatabase* const database) {
53*61c4878aSAndroid Build Coastguard Worker   for (TokenDatabase::Entry entry : *database) {
54*61c4878aSAndroid Build Coastguard Worker     // Since we don't "use" the contents of the entry, we exercise
55*61c4878aSAndroid Build Coastguard Worker     // the entry by extracting its contents into volatile variables
56*61c4878aSAndroid Build Coastguard Worker     // to prevent it from being optimized out during compilation.
57*61c4878aSAndroid Build Coastguard Worker     [[maybe_unused]] volatile const char* entry_string = entry.string;
58*61c4878aSAndroid Build Coastguard Worker     [[maybe_unused]] volatile uint32_t entry_token = entry.token;
59*61c4878aSAndroid Build Coastguard Worker   }
60*61c4878aSAndroid Build Coastguard Worker }
61*61c4878aSAndroid Build Coastguard Worker 
62*61c4878aSAndroid Build Coastguard Worker }  // namespace
63*61c4878aSAndroid Build Coastguard Worker 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)64*61c4878aSAndroid Build Coastguard Worker extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
65*61c4878aSAndroid Build Coastguard Worker   constexpr size_t kBufferSizeMax = kFuzzDataSizeMax + kTokenHeaderSize;
66*61c4878aSAndroid Build Coastguard Worker   constexpr char kDefaultHeader[] = "TOKENS\0\0\0\0\0\0\0\0\0";
67*61c4878aSAndroid Build Coastguard Worker   static uint8_t buffer[kBufferSizeMax];
68*61c4878aSAndroid Build Coastguard Worker 
69*61c4878aSAndroid Build Coastguard Worker   if (!data || size == 0 || size > kFuzzDataSizeMax) {
70*61c4878aSAndroid Build Coastguard Worker     return 0;
71*61c4878aSAndroid Build Coastguard Worker   }
72*61c4878aSAndroid Build Coastguard Worker 
73*61c4878aSAndroid Build Coastguard Worker   FuzzedDataProvider provider(data, size);
74*61c4878aSAndroid Build Coastguard Worker 
75*61c4878aSAndroid Build Coastguard Worker   // Initialize the token header with either a valid or invalid header
76*61c4878aSAndroid Build Coastguard Worker   // based on a random enum consumed from the fuzz data.
77*61c4878aSAndroid Build Coastguard Worker   switch (provider.ConsumeEnum<FuzzTestType>()) {
78*61c4878aSAndroid Build Coastguard Worker     case kValidHeader:
79*61c4878aSAndroid Build Coastguard Worker       memcpy(buffer, kDefaultHeader, kTokenHeaderSize);
80*61c4878aSAndroid Build Coastguard Worker       break;
81*61c4878aSAndroid Build Coastguard Worker 
82*61c4878aSAndroid Build Coastguard Worker     case kRandomHeader: {
83*61c4878aSAndroid Build Coastguard Worker       std::vector<uint8_t> random_header =
84*61c4878aSAndroid Build Coastguard Worker           provider.ConsumeBytes<uint8_t>(kTokenHeaderSize);
85*61c4878aSAndroid Build Coastguard Worker       random_header.resize(kTokenHeaderSize);
86*61c4878aSAndroid Build Coastguard Worker       memcpy(buffer, &random_header[0], kTokenHeaderSize);
87*61c4878aSAndroid Build Coastguard Worker       break;
88*61c4878aSAndroid Build Coastguard Worker     }
89*61c4878aSAndroid Build Coastguard Worker   }
90*61c4878aSAndroid Build Coastguard Worker 
91*61c4878aSAndroid Build Coastguard Worker   // Consume a 'test token' integer to look up later in the database.
92*61c4878aSAndroid Build Coastguard Worker   uint32_t random_token = provider.ConsumeIntegral<uint32_t>();
93*61c4878aSAndroid Build Coastguard Worker 
94*61c4878aSAndroid Build Coastguard Worker   // Consume a 'token count' integer to set as our database entry count.
95*61c4878aSAndroid Build Coastguard Worker   uint32_t random_token_count =
96*61c4878aSAndroid Build Coastguard Worker       provider.ConsumeIntegralInRange<uint32_t>(0, kFuzzDataSizeMax);
97*61c4878aSAndroid Build Coastguard Worker 
98*61c4878aSAndroid Build Coastguard Worker   // Consume the remaining data. Note that the data corresponding to the
99*61c4878aSAndroid Build Coastguard Worker   // string entries in the database are not explicitly null-terminated.
100*61c4878aSAndroid Build Coastguard Worker   // TODO(karthikmb): Once OSS-Fuzz updates to Clang11.0, switch to
101*61c4878aSAndroid Build Coastguard Worker   // provider.ConsumeData() to avoid extra memory and the memcpy call.
102*61c4878aSAndroid Build Coastguard Worker   auto consumed_bytes =
103*61c4878aSAndroid Build Coastguard Worker       provider.ConsumeBytes<uint8_t>(provider.remaining_bytes());
104*61c4878aSAndroid Build Coastguard Worker   memcpy(buffer + kTokenHeaderSize, &consumed_bytes[0], consumed_bytes.size());
105*61c4878aSAndroid Build Coastguard Worker 
106*61c4878aSAndroid Build Coastguard Worker   SetTokenEntryCountInBuffer(buffer, random_token_count);
107*61c4878aSAndroid Build Coastguard Worker 
108*61c4878aSAndroid Build Coastguard Worker   // Poison the unused buffer space for this run of the fuzzer to
109*61c4878aSAndroid Build Coastguard Worker   // prevent the token database creator from reading too far in.
110*61c4878aSAndroid Build Coastguard Worker   size_t data_size = kTokenHeaderSize + consumed_bytes.size();
111*61c4878aSAndroid Build Coastguard Worker   size_t poisoned_length = kBufferSizeMax - data_size;
112*61c4878aSAndroid Build Coastguard Worker   void* poisoned = &buffer[data_size];
113*61c4878aSAndroid Build Coastguard Worker 
114*61c4878aSAndroid Build Coastguard Worker   ASAN_POISON_MEMORY_REGION(poisoned, poisoned_length);
115*61c4878aSAndroid Build Coastguard Worker 
116*61c4878aSAndroid Build Coastguard Worker   // We create a database from a span of the buffer since the string
117*61c4878aSAndroid Build Coastguard Worker   // entries might not be null terminated, and the creation of a database
118*61c4878aSAndroid Build Coastguard Worker   // from a raw buffer has an explicit null terminated string requirement
119*61c4878aSAndroid Build Coastguard Worker   // specified in the API.
120*61c4878aSAndroid Build Coastguard Worker   span<uint8_t> data_span(buffer, data_size);
121*61c4878aSAndroid Build Coastguard Worker   auto token_database = TokenDatabase::Create<span<uint8_t>>(data_span);
122*61c4878aSAndroid Build Coastguard Worker   [[maybe_unused]] volatile auto match = token_database.Find(random_token);
123*61c4878aSAndroid Build Coastguard Worker 
124*61c4878aSAndroid Build Coastguard Worker   IterateOverDatabase(&token_database);
125*61c4878aSAndroid Build Coastguard Worker 
126*61c4878aSAndroid Build Coastguard Worker   // Un-poison for the next iteration.
127*61c4878aSAndroid Build Coastguard Worker   ASAN_UNPOISON_MEMORY_REGION(poisoned, poisoned_length);
128*61c4878aSAndroid Build Coastguard Worker 
129*61c4878aSAndroid Build Coastguard Worker   return 0;
130*61c4878aSAndroid Build Coastguard Worker }
131*61c4878aSAndroid Build Coastguard Worker 
132*61c4878aSAndroid Build Coastguard Worker }  // namespace pw::tokenizer
133