1 // Copyright 2022 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef QUICHE_BALSA_BALSA_FRAME_H_ 6 #define QUICHE_BALSA_BALSA_FRAME_H_ 7 8 #include <cstddef> 9 #include <cstdint> 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "absl/container/flat_hash_map.h" 15 #include "quiche/balsa/balsa_enums.h" 16 #include "quiche/balsa/balsa_headers.h" 17 #include "quiche/balsa/balsa_visitor_interface.h" 18 #include "quiche/balsa/framer_interface.h" 19 #include "quiche/balsa/http_validation_policy.h" 20 #include "quiche/balsa/noop_balsa_visitor.h" 21 #include "quiche/common/platform/api/quiche_export.h" 22 #include "quiche/common/platform/api/quiche_flag_utils.h" 23 #include "quiche/common/platform/api/quiche_logging.h" 24 25 namespace quiche { 26 27 namespace test { 28 class BalsaFrameTestPeer; 29 } // namespace test 30 31 // BalsaFrame is a lightweight HTTP framer. 32 class QUICHE_EXPORT BalsaFrame : public FramerInterface { 33 public: 34 typedef std::vector<std::pair<size_t, size_t> > Lines; 35 36 typedef BalsaHeaders::HeaderLineDescription HeaderLineDescription; 37 typedef BalsaHeaders::HeaderLines HeaderLines; 38 typedef BalsaHeaders::HeaderTokenList HeaderTokenList; 39 40 enum class InvalidCharsLevel { kOff, kWarning, kError }; 41 42 static constexpr int32_t kValidTerm1 = '\n' << 16 | '\r' << 8 | '\n'; 43 static constexpr int32_t kValidTerm1Mask = 0xFF << 16 | 0xFF << 8 | 0xFF; 44 static constexpr int32_t kValidTerm2 = '\n' << 8 | '\n'; 45 static constexpr int32_t kValidTerm2Mask = 0xFF << 8 | 0xFF; BalsaFrame()46 BalsaFrame() 47 : last_char_was_slash_r_(false), 48 saw_non_newline_char_(false), 49 start_was_space_(true), 50 chunk_length_character_extracted_(false), 51 is_request_(true), 52 allow_reading_until_close_for_request_(false), 53 request_was_head_(false), 54 max_header_length_(16 * 1024), 55 visitor_(&do_nothing_visitor_), 56 chunk_length_remaining_(0), 57 content_length_remaining_(0), 58 last_slash_n_idx_(0), 59 term_chars_(0), 60 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), 61 last_error_(BalsaFrameEnums::BALSA_NO_ERROR), 62 continue_headers_(nullptr), 63 headers_(nullptr), 64 start_of_trailer_line_(0), 65 trailer_length_(0), 66 invalid_chars_level_(InvalidCharsLevel::kOff), 67 use_interim_headers_callback_(false) {} 68 ~BalsaFrame()69 ~BalsaFrame() override {} 70 71 // Reset reinitializes all the member variables of the framer and clears the 72 // attached header object (but doesn't change the pointer value headers_). 73 void Reset(); 74 75 // The method set_balsa_headers clears the headers provided and attaches them 76 // to the framer. This is a required step before the framer will process any 77 // input message data. 78 // To detach the header object from the framer, use 79 // set_balsa_headers(nullptr). set_balsa_headers(BalsaHeaders * headers)80 void set_balsa_headers(BalsaHeaders* headers) { 81 if (headers_ != headers) { 82 headers_ = headers; 83 } 84 if (headers_ != nullptr) { 85 // Clear the headers if they are non-null, even if the new headers are 86 // the same as the old. 87 headers_->Clear(); 88 } 89 } 90 91 // If set to non-null, allow 100 Continue headers before the main headers. 92 // This method is a no-op if set_use_interim_headers_callback(true) is called. set_continue_headers(BalsaHeaders * continue_headers)93 void set_continue_headers(BalsaHeaders* continue_headers) { 94 if (continue_headers_ != continue_headers) { 95 continue_headers_ = continue_headers; 96 } 97 if (continue_headers_ != nullptr) { 98 // Clear the headers if they are non-null, even if the new headers are 99 // the same as the old. 100 continue_headers_->Clear(); 101 } 102 } 103 104 // Enables the framer to process trailers and deliver them in 105 // `BalsaVisitorInterface::OnTrailers()`. If this method is not called and 106 // trailers are received, only minimal trailers parsing will be performed 107 // (just enough to advance past trailers). EnableTrailers()108 void EnableTrailers() { 109 if (is_request()) { 110 QUICHE_CODE_COUNT(balsa_trailer_in_request); 111 } 112 if (trailers_ == nullptr) { 113 trailers_ = std::make_unique<BalsaHeaders>(); 114 } 115 } 116 set_balsa_visitor(BalsaVisitorInterface * visitor)117 void set_balsa_visitor(BalsaVisitorInterface* visitor) { 118 visitor_ = visitor; 119 if (visitor_ == nullptr) { 120 visitor_ = &do_nothing_visitor_; 121 } 122 } 123 set_invalid_chars_level(InvalidCharsLevel v)124 void set_invalid_chars_level(InvalidCharsLevel v) { 125 invalid_chars_level_ = v; 126 } 127 track_invalid_chars()128 bool track_invalid_chars() { 129 return invalid_chars_level_ != InvalidCharsLevel::kOff; 130 } 131 invalid_chars_error_enabled()132 bool invalid_chars_error_enabled() { 133 return invalid_chars_level_ == InvalidCharsLevel::kError; 134 } 135 set_http_validation_policy(const HttpValidationPolicy & policy)136 void set_http_validation_policy(const HttpValidationPolicy& policy) { 137 http_validation_policy_ = policy; 138 } http_validation_policy()139 const HttpValidationPolicy& http_validation_policy() const { 140 return http_validation_policy_; 141 } 142 set_is_request(bool is_request)143 void set_is_request(bool is_request) { is_request_ = is_request; } 144 is_request()145 bool is_request() const { return is_request_; } 146 set_request_was_head(bool request_was_head)147 void set_request_was_head(bool request_was_head) { 148 request_was_head_ = request_was_head; 149 } 150 set_max_header_length(size_t max_header_length)151 void set_max_header_length(size_t max_header_length) { 152 max_header_length_ = max_header_length; 153 } 154 max_header_length()155 size_t max_header_length() const { return max_header_length_; } 156 MessageFullyRead()157 bool MessageFullyRead() const { 158 return parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ; 159 } 160 ParseState()161 BalsaFrameEnums::ParseState ParseState() const { return parse_state_; } 162 Error()163 bool Error() const { return parse_state_ == BalsaFrameEnums::ERROR; } 164 ErrorCode()165 BalsaFrameEnums::ErrorCode ErrorCode() const { return last_error_; } 166 get_invalid_chars()167 const absl::flat_hash_map<char, int>& get_invalid_chars() const { 168 return invalid_chars_; 169 } 170 headers()171 const BalsaHeaders* headers() const { return headers_; } mutable_headers()172 BalsaHeaders* mutable_headers() { return headers_; } 173 174 size_t BytesSafeToSplice() const; 175 void BytesSpliced(size_t bytes_spliced); 176 177 size_t ProcessInput(const char* input, size_t size) override; 178 set_allow_reading_until_close_for_request(bool set)179 void set_allow_reading_until_close_for_request(bool set) { 180 allow_reading_until_close_for_request_ = set; 181 } 182 183 // For websockets and possibly other uses, we suspend the usual expectations 184 // about when a message has a body and how long it should be. AllowArbitraryBody()185 void AllowArbitraryBody() { 186 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; 187 } 188 189 // If enabled, calls BalsaVisitorInterface::OnInterimHeaders() when parsing 190 // interim headers. For 100 Continue, this callback will be invoked instead of 191 // ContinueHeaderDone(), even when set_continue_headers() is called. set_use_interim_headers_callback(bool set)192 void set_use_interim_headers_callback(bool set) { 193 use_interim_headers_callback_ = set; 194 } 195 196 // If enabled, parse the available portion of headers even on a 197 // HEADERS_TOO_LONG error, so that that portion of headers is available to the 198 // error handler. Generally results in the last header being truncated. set_parse_truncated_headers_even_when_headers_too_long(bool set)199 void set_parse_truncated_headers_even_when_headers_too_long(bool set) { 200 parse_truncated_headers_even_when_headers_too_long_ = set; 201 } 202 203 protected: 204 inline BalsaHeadersEnums::ContentLengthStatus ProcessContentLengthLine( 205 size_t line_idx, size_t* length); 206 207 inline void ProcessTransferEncodingLine(size_t line_idx); 208 209 void ProcessFirstLine(const char* begin, const char* end); 210 211 void CleanUpKeyValueWhitespace(const char* stream_begin, 212 const char* line_begin, const char* current, 213 const char* line_end, 214 HeaderLineDescription* current_header_line); 215 216 void ProcessHeaderLines(const Lines& lines, bool is_trailer, 217 BalsaHeaders* headers); 218 219 // Returns true if there are invalid characters, false otherwise. 220 // Will also update counts per invalid character in invalid_chars_. 221 bool CheckHeaderLinesForInvalidChars(const Lines& lines, 222 const BalsaHeaders* headers); 223 224 inline size_t ProcessHeaders(const char* message_start, 225 size_t message_length); 226 227 void AssignParseStateAfterHeadersHaveBeenParsed(); 228 LineFramingFound(char current_char)229 inline bool LineFramingFound(char current_char) { 230 return current_char == '\n'; 231 } 232 233 // Return header framing pattern. Non-zero return value indicates found, 234 // which has two possible outcomes: kValidTerm1, which means \n\r\n 235 // or kValidTerm2, which means \n\n. Zero return value means not found. HeaderFramingFound(char current_char)236 inline int32_t HeaderFramingFound(char current_char) { 237 // Note that the 'if (current_char == '\n' ...)' test exists to ensure that 238 // the HeaderFramingMayBeFound test works properly. In benchmarking done on 239 // 2/13/2008, the 'if' actually speeds up performance of the function 240 // anyway.. 241 if (current_char == '\n' || current_char == '\r') { 242 term_chars_ <<= 8; 243 // This is necessary IFF architecture has > 8 bit char. Alas, I'm 244 // paranoid. 245 term_chars_ |= current_char & 0xFF; 246 247 if ((term_chars_ & kValidTerm1Mask) == kValidTerm1) { 248 term_chars_ = 0; 249 return kValidTerm1; 250 } 251 if ((term_chars_ & kValidTerm2Mask) == kValidTerm2) { 252 term_chars_ = 0; 253 return kValidTerm2; 254 } 255 } else { 256 term_chars_ = 0; 257 } 258 return 0; 259 } 260 HeaderFramingMayBeFound()261 inline bool HeaderFramingMayBeFound() const { return term_chars_ != 0; } 262 263 private: 264 friend class test::BalsaFrameTestPeer; 265 266 // Calls HandleError() and returns false on error. 267 bool FindColonsAndParseIntoKeyValue(const Lines& lines, bool is_trailer, 268 BalsaHeaders* headers); 269 270 void HandleError(BalsaFrameEnums::ErrorCode error_code); 271 void HandleWarning(BalsaFrameEnums::ErrorCode error_code); 272 273 void HandleHeadersTooLongError(); 274 275 bool last_char_was_slash_r_; 276 bool saw_non_newline_char_; 277 bool start_was_space_; 278 bool chunk_length_character_extracted_; 279 bool is_request_; // This is not reset in Reset() 280 // Generally, requests are not allowed to frame with connection: close. For 281 // protocols which do their own protocol-specific chunking, such as streamed 282 // stubby, we allow connection close semantics for requests. 283 bool allow_reading_until_close_for_request_; 284 bool request_was_head_; // This is not reset in Reset() 285 size_t max_header_length_; // This is not reset in Reset() 286 BalsaVisitorInterface* visitor_; 287 size_t chunk_length_remaining_; 288 size_t content_length_remaining_; 289 size_t last_slash_n_idx_; 290 uint32_t term_chars_; 291 BalsaFrameEnums::ParseState parse_state_; 292 BalsaFrameEnums::ErrorCode last_error_; 293 absl::flat_hash_map<char, int> invalid_chars_; 294 295 Lines lines_; 296 297 BalsaHeaders* continue_headers_; // This is not reset to nullptr in Reset(). 298 BalsaHeaders* headers_; // This is not reset to nullptr in Reset(). 299 NoOpBalsaVisitor do_nothing_visitor_; 300 301 Lines trailer_lines_; 302 size_t start_of_trailer_line_; 303 size_t trailer_length_; 304 305 // Cleared but not reset to nullptr in Reset(). 306 std::unique_ptr<BalsaHeaders> trailers_; 307 308 InvalidCharsLevel invalid_chars_level_; // This is not reset in Reset(). 309 310 HttpValidationPolicy http_validation_policy_; 311 312 // This is not reset in Reset(). 313 // TODO(b/68801833): Default-enable and then deprecate this field, along with 314 // set_continue_headers(). 315 bool use_interim_headers_callback_; 316 317 // This is not reset in Reset(). 318 bool parse_truncated_headers_even_when_headers_too_long_ = false; 319 }; 320 321 } // namespace quiche 322 323 #endif // QUICHE_BALSA_BALSA_FRAME_H_ 324