xref: /aosp_15_r20/external/libwebm/webm_parser/src/byte_parser.h (revision 103e46e4cd4b6efcf6001f23fa8665fb110abf8d)
1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS.  All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #ifndef SRC_BYTE_PARSER_H_
9*103e46e4SHarish Mahendrakar #define SRC_BYTE_PARSER_H_
10*103e46e4SHarish Mahendrakar 
11*103e46e4SHarish Mahendrakar #include <cassert>
12*103e46e4SHarish Mahendrakar #include <cstdint>
13*103e46e4SHarish Mahendrakar #include <string>
14*103e46e4SHarish Mahendrakar #include <utility>
15*103e46e4SHarish Mahendrakar #include <vector>
16*103e46e4SHarish Mahendrakar 
17*103e46e4SHarish Mahendrakar #include "src/element_parser.h"
18*103e46e4SHarish Mahendrakar #include "webm/callback.h"
19*103e46e4SHarish Mahendrakar #include "webm/element.h"
20*103e46e4SHarish Mahendrakar #include "webm/reader.h"
21*103e46e4SHarish Mahendrakar #include "webm/status.h"
22*103e46e4SHarish Mahendrakar 
23*103e46e4SHarish Mahendrakar namespace webm {
24*103e46e4SHarish Mahendrakar 
25*103e46e4SHarish Mahendrakar // Parses an EBML string (UTF-8 and ASCII) or binary element from a byte stream.
26*103e46e4SHarish Mahendrakar // Spec reference for string/binary elements:
27*103e46e4SHarish Mahendrakar // http://matroska.org/technical/specs/index.html#EBML_ex
28*103e46e4SHarish Mahendrakar // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown#ebml-element-types
29*103e46e4SHarish Mahendrakar template <typename T>
30*103e46e4SHarish Mahendrakar class ByteParser : public ElementParser {
31*103e46e4SHarish Mahendrakar  public:
32*103e46e4SHarish Mahendrakar   static_assert(std::is_same<T, std::vector<std::uint8_t>>::value ||
33*103e46e4SHarish Mahendrakar                     std::is_same<T, std::string>::value,
34*103e46e4SHarish Mahendrakar                 "T must be std::vector<std::uint8_t> or std::string");
35*103e46e4SHarish Mahendrakar 
36*103e46e4SHarish Mahendrakar   // Constructs a new parser which will use the given default_value as the
37*103e46e4SHarish Mahendrakar   // value for the element if its size is zero. Defaults to the empty string
38*103e46e4SHarish Mahendrakar   // or empty binary element (as the EBML spec indicates).
39*103e46e4SHarish Mahendrakar   explicit ByteParser(T default_value = {})
default_value_(std::move (default_value))40*103e46e4SHarish Mahendrakar       : default_value_(std::move(default_value)) {}
41*103e46e4SHarish Mahendrakar 
42*103e46e4SHarish Mahendrakar   ByteParser(ByteParser&&) = default;
43*103e46e4SHarish Mahendrakar   ByteParser& operator=(ByteParser&&) = default;
44*103e46e4SHarish Mahendrakar 
45*103e46e4SHarish Mahendrakar   ByteParser(const ByteParser&) = delete;
46*103e46e4SHarish Mahendrakar   ByteParser& operator=(const ByteParser&) = delete;
47*103e46e4SHarish Mahendrakar 
Init(const ElementMetadata & metadata,std::uint64_t max_size)48*103e46e4SHarish Mahendrakar   Status Init(const ElementMetadata& metadata,
49*103e46e4SHarish Mahendrakar               std::uint64_t max_size) override {
50*103e46e4SHarish Mahendrakar     assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
51*103e46e4SHarish Mahendrakar 
52*103e46e4SHarish Mahendrakar     if (metadata.size == kUnknownElementSize) {
53*103e46e4SHarish Mahendrakar       return Status(Status::kInvalidElementSize);
54*103e46e4SHarish Mahendrakar     }
55*103e46e4SHarish Mahendrakar 
56*103e46e4SHarish Mahendrakar     if (metadata.size > std::numeric_limits<std::size_t>::max() ||
57*103e46e4SHarish Mahendrakar         metadata.size > value_.max_size()) {
58*103e46e4SHarish Mahendrakar       return Status(Status::kNotEnoughMemory);
59*103e46e4SHarish Mahendrakar     }
60*103e46e4SHarish Mahendrakar 
61*103e46e4SHarish Mahendrakar #if WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT
62*103e46e4SHarish Mahendrakar     // AFL and ASan just kill the process if too much memory is allocated, so
63*103e46e4SHarish Mahendrakar     // let's cap the maximum size of the element. It's too easy for the fuzzer
64*103e46e4SHarish Mahendrakar     // to make an element with a ridiculously huge size, and that just creates
65*103e46e4SHarish Mahendrakar     // uninteresting false positives.
66*103e46e4SHarish Mahendrakar     if (metadata.size > WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT) {
67*103e46e4SHarish Mahendrakar       return Status(Status::kNotEnoughMemory);
68*103e46e4SHarish Mahendrakar     }
69*103e46e4SHarish Mahendrakar #endif
70*103e46e4SHarish Mahendrakar 
71*103e46e4SHarish Mahendrakar     if (metadata.size == 0) {
72*103e46e4SHarish Mahendrakar       value_ = default_value_;
73*103e46e4SHarish Mahendrakar       total_read_ = default_value_.size();
74*103e46e4SHarish Mahendrakar     } else {
75*103e46e4SHarish Mahendrakar       value_.resize(static_cast<std::size_t>(metadata.size));
76*103e46e4SHarish Mahendrakar       total_read_ = 0;
77*103e46e4SHarish Mahendrakar     }
78*103e46e4SHarish Mahendrakar 
79*103e46e4SHarish Mahendrakar     return Status(Status::kOkCompleted);
80*103e46e4SHarish Mahendrakar   }
81*103e46e4SHarish Mahendrakar 
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)82*103e46e4SHarish Mahendrakar   Status Feed(Callback* callback, Reader* reader,
83*103e46e4SHarish Mahendrakar               std::uint64_t* num_bytes_read) override {
84*103e46e4SHarish Mahendrakar     assert(callback != nullptr);
85*103e46e4SHarish Mahendrakar     assert(reader != nullptr);
86*103e46e4SHarish Mahendrakar     assert(num_bytes_read != nullptr);
87*103e46e4SHarish Mahendrakar 
88*103e46e4SHarish Mahendrakar     *num_bytes_read = 0;
89*103e46e4SHarish Mahendrakar 
90*103e46e4SHarish Mahendrakar     if (total_read_ == value_.size()) {
91*103e46e4SHarish Mahendrakar       return Status(Status::kOkCompleted);
92*103e46e4SHarish Mahendrakar     }
93*103e46e4SHarish Mahendrakar 
94*103e46e4SHarish Mahendrakar     Status status;
95*103e46e4SHarish Mahendrakar     do {
96*103e46e4SHarish Mahendrakar       std::uint64_t local_num_bytes_read = 0;
97*103e46e4SHarish Mahendrakar       std::uint8_t* buffer =
98*103e46e4SHarish Mahendrakar           reinterpret_cast<std::uint8_t*>(&value_.front()) + total_read_;
99*103e46e4SHarish Mahendrakar       std::size_t buffer_size = value_.size() - total_read_;
100*103e46e4SHarish Mahendrakar       status = reader->Read(buffer_size, buffer, &local_num_bytes_read);
101*103e46e4SHarish Mahendrakar       assert((status.completed_ok() && local_num_bytes_read == buffer_size) ||
102*103e46e4SHarish Mahendrakar              (status.ok() && local_num_bytes_read < buffer_size) ||
103*103e46e4SHarish Mahendrakar              (!status.ok() && local_num_bytes_read == 0));
104*103e46e4SHarish Mahendrakar       *num_bytes_read += local_num_bytes_read;
105*103e46e4SHarish Mahendrakar       total_read_ += static_cast<std::size_t>(local_num_bytes_read);
106*103e46e4SHarish Mahendrakar     } while (status.code == Status::kOkPartial);
107*103e46e4SHarish Mahendrakar 
108*103e46e4SHarish Mahendrakar     // UTF-8 and ASCII string elements can be padded with NUL characters at the
109*103e46e4SHarish Mahendrakar     // end, which should be ignored.
110*103e46e4SHarish Mahendrakar     if (std::is_same<T, std::string>::value && status.completed_ok()) {
111*103e46e4SHarish Mahendrakar       while (!value_.empty() && value_.back() == '\0') {
112*103e46e4SHarish Mahendrakar         value_.pop_back();
113*103e46e4SHarish Mahendrakar       }
114*103e46e4SHarish Mahendrakar     }
115*103e46e4SHarish Mahendrakar 
116*103e46e4SHarish Mahendrakar     return status;
117*103e46e4SHarish Mahendrakar   }
118*103e46e4SHarish Mahendrakar 
119*103e46e4SHarish Mahendrakar   // Gets the parsed value. This must not be called until the parse has been
120*103e46e4SHarish Mahendrakar   // successfully completed.
value()121*103e46e4SHarish Mahendrakar   const T& value() const {
122*103e46e4SHarish Mahendrakar     assert(total_read_ >= value_.size());
123*103e46e4SHarish Mahendrakar     return value_;
124*103e46e4SHarish Mahendrakar   }
125*103e46e4SHarish Mahendrakar 
126*103e46e4SHarish Mahendrakar   // Gets the parsed value. This must not be called until the parse has been
127*103e46e4SHarish Mahendrakar   // successfully completed.
mutable_value()128*103e46e4SHarish Mahendrakar   T* mutable_value() {
129*103e46e4SHarish Mahendrakar     assert(total_read_ >= value_.size());
130*103e46e4SHarish Mahendrakar     return &value_;
131*103e46e4SHarish Mahendrakar   }
132*103e46e4SHarish Mahendrakar 
133*103e46e4SHarish Mahendrakar  private:
134*103e46e4SHarish Mahendrakar   T value_;
135*103e46e4SHarish Mahendrakar   T default_value_;
136*103e46e4SHarish Mahendrakar   std::size_t total_read_;
137*103e46e4SHarish Mahendrakar };
138*103e46e4SHarish Mahendrakar 
139*103e46e4SHarish Mahendrakar using StringParser = ByteParser<std::string>;
140*103e46e4SHarish Mahendrakar using BinaryParser = ByteParser<std::vector<std::uint8_t>>;
141*103e46e4SHarish Mahendrakar 
142*103e46e4SHarish Mahendrakar }  // namespace webm
143*103e46e4SHarish Mahendrakar 
144*103e46e4SHarish Mahendrakar #endif  // SRC_BYTE_PARSER_H_
145