1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved. 2*103e46e4SHarish Mahendrakar // 3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license 4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source 5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found 6*103e46e4SHarish Mahendrakar // in the file PATENTS. All contributing project authors may 7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree. 8*103e46e4SHarish Mahendrakar #ifndef SRC_BYTE_PARSER_H_ 9*103e46e4SHarish Mahendrakar #define SRC_BYTE_PARSER_H_ 10*103e46e4SHarish Mahendrakar 11*103e46e4SHarish Mahendrakar #include <cassert> 12*103e46e4SHarish Mahendrakar #include <cstdint> 13*103e46e4SHarish Mahendrakar #include <string> 14*103e46e4SHarish Mahendrakar #include <utility> 15*103e46e4SHarish Mahendrakar #include <vector> 16*103e46e4SHarish Mahendrakar 17*103e46e4SHarish Mahendrakar #include "src/element_parser.h" 18*103e46e4SHarish Mahendrakar #include "webm/callback.h" 19*103e46e4SHarish Mahendrakar #include "webm/element.h" 20*103e46e4SHarish Mahendrakar #include "webm/reader.h" 21*103e46e4SHarish Mahendrakar #include "webm/status.h" 22*103e46e4SHarish Mahendrakar 23*103e46e4SHarish Mahendrakar namespace webm { 24*103e46e4SHarish Mahendrakar 25*103e46e4SHarish Mahendrakar // Parses an EBML string (UTF-8 and ASCII) or binary element from a byte stream. 26*103e46e4SHarish Mahendrakar // Spec reference for string/binary elements: 27*103e46e4SHarish Mahendrakar // http://matroska.org/technical/specs/index.html#EBML_ex 28*103e46e4SHarish Mahendrakar // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown#ebml-element-types 29*103e46e4SHarish Mahendrakar template <typename T> 30*103e46e4SHarish Mahendrakar class ByteParser : public ElementParser { 31*103e46e4SHarish Mahendrakar public: 32*103e46e4SHarish Mahendrakar static_assert(std::is_same<T, std::vector<std::uint8_t>>::value || 33*103e46e4SHarish Mahendrakar std::is_same<T, std::string>::value, 34*103e46e4SHarish Mahendrakar "T must be std::vector<std::uint8_t> or std::string"); 35*103e46e4SHarish Mahendrakar 36*103e46e4SHarish Mahendrakar // Constructs a new parser which will use the given default_value as the 37*103e46e4SHarish Mahendrakar // value for the element if its size is zero. Defaults to the empty string 38*103e46e4SHarish Mahendrakar // or empty binary element (as the EBML spec indicates). 39*103e46e4SHarish Mahendrakar explicit ByteParser(T default_value = {}) default_value_(std::move (default_value))40*103e46e4SHarish Mahendrakar : default_value_(std::move(default_value)) {} 41*103e46e4SHarish Mahendrakar 42*103e46e4SHarish Mahendrakar ByteParser(ByteParser&&) = default; 43*103e46e4SHarish Mahendrakar ByteParser& operator=(ByteParser&&) = default; 44*103e46e4SHarish Mahendrakar 45*103e46e4SHarish Mahendrakar ByteParser(const ByteParser&) = delete; 46*103e46e4SHarish Mahendrakar ByteParser& operator=(const ByteParser&) = delete; 47*103e46e4SHarish Mahendrakar Init(const ElementMetadata & metadata,std::uint64_t max_size)48*103e46e4SHarish Mahendrakar Status Init(const ElementMetadata& metadata, 49*103e46e4SHarish Mahendrakar std::uint64_t max_size) override { 50*103e46e4SHarish Mahendrakar assert(metadata.size == kUnknownElementSize || metadata.size <= max_size); 51*103e46e4SHarish Mahendrakar 52*103e46e4SHarish Mahendrakar if (metadata.size == kUnknownElementSize) { 53*103e46e4SHarish Mahendrakar return Status(Status::kInvalidElementSize); 54*103e46e4SHarish Mahendrakar } 55*103e46e4SHarish Mahendrakar 56*103e46e4SHarish Mahendrakar if (metadata.size > std::numeric_limits<std::size_t>::max() || 57*103e46e4SHarish Mahendrakar metadata.size > value_.max_size()) { 58*103e46e4SHarish Mahendrakar return Status(Status::kNotEnoughMemory); 59*103e46e4SHarish Mahendrakar } 60*103e46e4SHarish Mahendrakar 61*103e46e4SHarish Mahendrakar #if WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT 62*103e46e4SHarish Mahendrakar // AFL and ASan just kill the process if too much memory is allocated, so 63*103e46e4SHarish Mahendrakar // let's cap the maximum size of the element. It's too easy for the fuzzer 64*103e46e4SHarish Mahendrakar // to make an element with a ridiculously huge size, and that just creates 65*103e46e4SHarish Mahendrakar // uninteresting false positives. 66*103e46e4SHarish Mahendrakar if (metadata.size > WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT) { 67*103e46e4SHarish Mahendrakar return Status(Status::kNotEnoughMemory); 68*103e46e4SHarish Mahendrakar } 69*103e46e4SHarish Mahendrakar #endif 70*103e46e4SHarish Mahendrakar 71*103e46e4SHarish Mahendrakar if (metadata.size == 0) { 72*103e46e4SHarish Mahendrakar value_ = default_value_; 73*103e46e4SHarish Mahendrakar total_read_ = default_value_.size(); 74*103e46e4SHarish Mahendrakar } else { 75*103e46e4SHarish Mahendrakar value_.resize(static_cast<std::size_t>(metadata.size)); 76*103e46e4SHarish Mahendrakar total_read_ = 0; 77*103e46e4SHarish Mahendrakar } 78*103e46e4SHarish Mahendrakar 79*103e46e4SHarish Mahendrakar return Status(Status::kOkCompleted); 80*103e46e4SHarish Mahendrakar } 81*103e46e4SHarish Mahendrakar Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)82*103e46e4SHarish Mahendrakar Status Feed(Callback* callback, Reader* reader, 83*103e46e4SHarish Mahendrakar std::uint64_t* num_bytes_read) override { 84*103e46e4SHarish Mahendrakar assert(callback != nullptr); 85*103e46e4SHarish Mahendrakar assert(reader != nullptr); 86*103e46e4SHarish Mahendrakar assert(num_bytes_read != nullptr); 87*103e46e4SHarish Mahendrakar 88*103e46e4SHarish Mahendrakar *num_bytes_read = 0; 89*103e46e4SHarish Mahendrakar 90*103e46e4SHarish Mahendrakar if (total_read_ == value_.size()) { 91*103e46e4SHarish Mahendrakar return Status(Status::kOkCompleted); 92*103e46e4SHarish Mahendrakar } 93*103e46e4SHarish Mahendrakar 94*103e46e4SHarish Mahendrakar Status status; 95*103e46e4SHarish Mahendrakar do { 96*103e46e4SHarish Mahendrakar std::uint64_t local_num_bytes_read = 0; 97*103e46e4SHarish Mahendrakar std::uint8_t* buffer = 98*103e46e4SHarish Mahendrakar reinterpret_cast<std::uint8_t*>(&value_.front()) + total_read_; 99*103e46e4SHarish Mahendrakar std::size_t buffer_size = value_.size() - total_read_; 100*103e46e4SHarish Mahendrakar status = reader->Read(buffer_size, buffer, &local_num_bytes_read); 101*103e46e4SHarish Mahendrakar assert((status.completed_ok() && local_num_bytes_read == buffer_size) || 102*103e46e4SHarish Mahendrakar (status.ok() && local_num_bytes_read < buffer_size) || 103*103e46e4SHarish Mahendrakar (!status.ok() && local_num_bytes_read == 0)); 104*103e46e4SHarish Mahendrakar *num_bytes_read += local_num_bytes_read; 105*103e46e4SHarish Mahendrakar total_read_ += static_cast<std::size_t>(local_num_bytes_read); 106*103e46e4SHarish Mahendrakar } while (status.code == Status::kOkPartial); 107*103e46e4SHarish Mahendrakar 108*103e46e4SHarish Mahendrakar // UTF-8 and ASCII string elements can be padded with NUL characters at the 109*103e46e4SHarish Mahendrakar // end, which should be ignored. 110*103e46e4SHarish Mahendrakar if (std::is_same<T, std::string>::value && status.completed_ok()) { 111*103e46e4SHarish Mahendrakar while (!value_.empty() && value_.back() == '\0') { 112*103e46e4SHarish Mahendrakar value_.pop_back(); 113*103e46e4SHarish Mahendrakar } 114*103e46e4SHarish Mahendrakar } 115*103e46e4SHarish Mahendrakar 116*103e46e4SHarish Mahendrakar return status; 117*103e46e4SHarish Mahendrakar } 118*103e46e4SHarish Mahendrakar 119*103e46e4SHarish Mahendrakar // Gets the parsed value. This must not be called until the parse has been 120*103e46e4SHarish Mahendrakar // successfully completed. value()121*103e46e4SHarish Mahendrakar const T& value() const { 122*103e46e4SHarish Mahendrakar assert(total_read_ >= value_.size()); 123*103e46e4SHarish Mahendrakar return value_; 124*103e46e4SHarish Mahendrakar } 125*103e46e4SHarish Mahendrakar 126*103e46e4SHarish Mahendrakar // Gets the parsed value. This must not be called until the parse has been 127*103e46e4SHarish Mahendrakar // successfully completed. mutable_value()128*103e46e4SHarish Mahendrakar T* mutable_value() { 129*103e46e4SHarish Mahendrakar assert(total_read_ >= value_.size()); 130*103e46e4SHarish Mahendrakar return &value_; 131*103e46e4SHarish Mahendrakar } 132*103e46e4SHarish Mahendrakar 133*103e46e4SHarish Mahendrakar private: 134*103e46e4SHarish Mahendrakar T value_; 135*103e46e4SHarish Mahendrakar T default_value_; 136*103e46e4SHarish Mahendrakar std::size_t total_read_; 137*103e46e4SHarish Mahendrakar }; 138*103e46e4SHarish Mahendrakar 139*103e46e4SHarish Mahendrakar using StringParser = ByteParser<std::string>; 140*103e46e4SHarish Mahendrakar using BinaryParser = ByteParser<std::vector<std::uint8_t>>; 141*103e46e4SHarish Mahendrakar 142*103e46e4SHarish Mahendrakar } // namespace webm 143*103e46e4SHarish Mahendrakar 144*103e46e4SHarish Mahendrakar #endif // SRC_BYTE_PARSER_H_ 145