xref: /aosp_15_r20/external/cronet/net/third_party/quiche/src/quiche/balsa/balsa_frame.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "quiche/balsa/balsa_frame.h"
6 
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstring>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 
16 #include "absl/strings/match.h"
17 #include "absl/strings/numbers.h"
18 #include "absl/strings/string_view.h"
19 #include "quiche/balsa/balsa_enums.h"
20 #include "quiche/balsa/balsa_headers.h"
21 #include "quiche/balsa/balsa_visitor_interface.h"
22 #include "quiche/balsa/header_properties.h"
23 #include "quiche/common/platform/api/quiche_logging.h"
24 
25 // When comparing characters (other than == and !=), cast to unsigned char
26 // to make sure values above 127 rank as expected, even on platforms where char
27 // is signed and thus such values are represented as negative numbers before the
28 // cast.
29 #define CHAR_LT(a, b) \
30   (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
31 #define CHAR_LE(a, b) \
32   (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
33 #define CHAR_GT(a, b) \
34   (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
35 #define CHAR_GE(a, b) \
36   (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
37 #define QUICHE_DCHECK_CHAR_GE(a, b) \
38   QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
39 
40 namespace quiche {
41 
42 namespace {
43 
44 constexpr size_t kContinueStatusCode = 100;
45 constexpr size_t kSwitchingProtocolsStatusCode = 101;
46 
47 constexpr absl::string_view kChunked = "chunked";
48 constexpr absl::string_view kContentLength = "content-length";
49 constexpr absl::string_view kIdentity = "identity";
50 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
51 
IsInterimResponse(size_t response_code)52 bool IsInterimResponse(size_t response_code) {
53   return response_code >= 100 && response_code < 200;
54 }
55 
56 }  // namespace
57 
Reset()58 void BalsaFrame::Reset() {
59   last_char_was_slash_r_ = false;
60   saw_non_newline_char_ = false;
61   start_was_space_ = true;
62   chunk_length_character_extracted_ = false;
63   // is_request_ = true;               // not reset between messages.
64   allow_reading_until_close_for_request_ = false;
65   // request_was_head_ = false;        // not reset between messages.
66   // max_header_length_ = 16 * 1024;   // not reset between messages.
67   // visitor_ = &do_nothing_visitor_;  // not reset between messages.
68   chunk_length_remaining_ = 0;
69   content_length_remaining_ = 0;
70   last_slash_n_idx_ = 0;
71   term_chars_ = 0;
72   parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
73   last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
74   invalid_chars_.clear();
75   lines_.clear();
76   if (continue_headers_ != nullptr) {
77     continue_headers_->Clear();
78   }
79   if (headers_ != nullptr) {
80     headers_->Clear();
81   }
82   trailer_lines_.clear();
83   start_of_trailer_line_ = 0;
84   trailer_length_ = 0;
85   if (trailers_ != nullptr) {
86     trailers_->Clear();
87   }
88 }
89 
90 namespace {
91 
92 // Within the line bounded by [current, end), parses a single "island",
93 // comprising a (possibly empty) span of whitespace followed by a (possibly
94 // empty) span of non-whitespace.
95 //
96 // Returns a pointer to the first whitespace character beyond this island, or
97 // returns end if no additional whitespace characters are present after this
98 // island.  (I.e., returnvalue == end || *returnvalue > ' ')
99 //
100 // Upon return, the whitespace span are the characters
101 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
102 // non-whitespace span are the characters whose indices fall in
103 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)104 inline const char* ParseOneIsland(const char* current, const char* begin,
105                                   const char* end, size_t* first_whitespace,
106                                   size_t* first_nonwhite) {
107   *first_whitespace = current - begin;
108   while (current < end && CHAR_LE(*current, ' ')) {
109     ++current;
110   }
111   *first_nonwhite = current - begin;
112   while (current < end && CHAR_GT(*current, ' ')) {
113     ++current;
114   }
115   return current;
116 }
117 
118 }  // namespace
119 
120 // Summary:
121 //     Parses the first line of either a request or response.
122 //     Note that in the case of a detected warning, error_code will be set
123 //   but the function will not return false.
124 //     Exactly zero or one warning or error (but not both) may be detected
125 //   by this function.
126 //     Note that this function will not write the data of the first-line
127 //   into the header's buffer (that should already have been done elsewhere).
128 //
129 // Pre-conditions:
130 //     begin != end
131 //     *begin should be a character which is > ' '. This implies that there
132 //   is at least one non-whitespace characters between [begin, end).
133 //   headers is a valid pointer to a BalsaHeaders class.
134 //     error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
135 //     Entire first line must exist between [begin, end)
136 //     Exactly zero or one newlines -may- exist between [begin, end)
137 //     [begin, end) should exist in the header's buffer.
138 //
139 // Side-effects:
140 //   headers will be modified
141 //   error_code may be modified if either a warning or error is detected
142 //
143 // Returns:
144 //   True if no error (as opposed to warning) is detected.
145 //   False if an error (as opposed to warning) is detected.
146 
147 //
148 // If there is indeed non-whitespace in the line, then the following
149 // will take care of this for you:
150 //  while (*begin <= ' ') ++begin;
151 //  ProcessFirstLine(begin, end, is_request, &headers, &error_code);
152 //
153 
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)154 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
155                         BalsaHeaders* headers,
156                         BalsaFrameEnums::ErrorCode* error_code) {
157   while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
158     --end;
159   }
160 
161   const char* current =
162       ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
163                      &headers->non_whitespace_1_idx_);
164   current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
165                            &headers->non_whitespace_2_idx_);
166   current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
167                            &headers->non_whitespace_3_idx_);
168 
169   // Clean up any trailing whitespace that comes after the third island
170   const char* last = end;
171   while (current <= last && CHAR_LE(*last, ' ')) {
172     --last;
173   }
174   headers->whitespace_4_idx_ = last - begin + 1;
175 
176   // Either the passed-in line is empty, or it starts with a non-whitespace
177   // character.
178   QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
179 
180   QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
181   QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
182 
183   // If the line isn't empty, it has at least one non-whitespace character (see
184   // first QUICHE_DCHECK), which will have been identified as a non-empty
185   // [non_whitespace_1_idx_, whitespace_2_idx_).
186   QUICHE_DCHECK(begin == end ||
187                 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
188 
189   if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
190     // This error may be triggered if the second token is empty, OR there's no
191     // WS after the first token; we don't bother to distinguish exactly which.
192     // (I'm not sure why we distinguish different kinds of parse error at all,
193     // actually.)
194     // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
195     // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
196     *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
197         BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
198         static_cast<int>(is_request));
199     if (!is_request) {  // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
200       return false;
201     }
202   }
203   if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
204     if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
205       // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
206       // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
207       *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
208           BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
209           static_cast<int>(is_request));
210     }
211   }
212 
213   if (!is_request) {
214     headers->parsed_response_code_ = 0;
215     // If the response code is non-empty:
216     if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
217       if (!absl::SimpleAtoi(
218               absl::string_view(begin + headers->non_whitespace_2_idx_,
219                                 headers->non_whitespace_3_idx_ -
220                                     headers->non_whitespace_2_idx_),
221               &headers->parsed_response_code_)) {
222         *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
223         return false;
224       }
225     }
226   }
227 
228   return true;
229 }
230 
231 // begin - beginning of the firstline
232 // end - end of the firstline
233 //
234 // A precondition for this function is that there is non-whitespace between
235 // [begin, end). If this precondition is not met, the function will not perform
236 // as expected (and bad things may happen, and it will eat your first, second,
237 // and third unborn children!).
238 //
239 // Another precondition for this function is that [begin, end) includes
240 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)241 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
242   BalsaFrameEnums::ErrorCode previous_error = last_error_;
243   if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
244     parse_state_ = BalsaFrameEnums::ERROR;
245     HandleError(last_error_);
246     return;
247   }
248   if (previous_error != last_error_) {
249     HandleWarning(last_error_);
250   }
251 
252   const absl::string_view line_input(
253       begin + headers_->non_whitespace_1_idx_,
254       headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
255   const absl::string_view part1(
256       begin + headers_->non_whitespace_1_idx_,
257       headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
258   const absl::string_view part2(
259       begin + headers_->non_whitespace_2_idx_,
260       headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
261   const absl::string_view part3(
262       begin + headers_->non_whitespace_3_idx_,
263       headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
264 
265   if (is_request_) {
266     visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
267     if (part3.empty()) {
268       parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
269     }
270     return;
271   }
272 
273   visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
274 }
275 
276 // 'stream_begin' points to the first character of the headers buffer.
277 // 'line_begin' points to the first character of the line.
278 // 'current' points to a char which is ':'.
279 // 'line_end' points to the position of '\n' + 1.
280 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)281 void BalsaFrame::CleanUpKeyValueWhitespace(
282     const char* stream_begin, const char* line_begin, const char* current,
283     const char* line_end, HeaderLineDescription* current_header_line) {
284   const char* colon_loc = current;
285   QUICHE_DCHECK_LT(colon_loc, line_end);
286   QUICHE_DCHECK_EQ(':', *colon_loc);
287   QUICHE_DCHECK_EQ(':', *current);
288   QUICHE_DCHECK_CHAR_GE(' ', *line_end)
289       << "\"" << std::string(line_begin, line_end) << "\"";
290 
291   --current;
292   while (current > line_begin && CHAR_LE(*current, ' ')) {
293     --current;
294   }
295   current += static_cast<int>(current != colon_loc);
296   current_header_line->key_end_idx = current - stream_begin;
297 
298   current = colon_loc;
299   QUICHE_DCHECK_EQ(':', *current);
300   ++current;
301   while (current < line_end && CHAR_LE(*current, ' ')) {
302     ++current;
303   }
304   current_header_line->value_begin_idx = current - stream_begin;
305 
306   QUICHE_DCHECK_GE(current_header_line->key_end_idx,
307                    current_header_line->first_char_idx);
308   QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
309                    current_header_line->key_end_idx);
310   QUICHE_DCHECK_GE(current_header_line->last_char_idx,
311                    current_header_line->value_begin_idx);
312 }
313 
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)314 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
315                                                 bool is_trailer,
316                                                 BalsaHeaders* headers) {
317   QUICHE_DCHECK(!lines.empty());
318   const char* stream_begin = headers->OriginalHeaderStreamBegin();
319   // The last line is always just a newline (and is uninteresting).
320   const Lines::size_type lines_size_m1 = lines.size() - 1;
321   // For a trailer, there is no first line, so lines[0] is the first header.
322   // For real headers, the first line takes lines[0], so real header starts
323   // at index 1.
324   int first_header_idx = (is_trailer ? 0 : 1);
325   const char* current = stream_begin + lines[first_header_idx].first;
326   // This code is a bit more subtle than it may appear at first glance.
327   // This code looks for a colon in the current line... but it also looks
328   // beyond the current line. If there is no colon in the current line, then
329   // for each subsequent line (until the colon which -has- been found is
330   // associated with a line), no searching for a colon will be performed. In
331   // this way, we minimize the amount of bytes we have scanned for a colon.
332   for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
333     const char* line_begin = stream_begin + lines[i].first;
334 
335     // Here we handle possible continuations.  Note that we do not replace
336     // the '\n' in the line before a continuation (at least, as of now),
337     // which implies that any code which looks for a value must deal with
338     // "\r\n", etc -within- the line (and not just at the end of it).
339     for (++i; i < lines_size_m1; ++i) {
340       const char c = *(stream_begin + lines[i].first);
341       if (CHAR_GT(c, ' ')) {
342         // Not a continuation, so stop.  Note that if the 'original' i = 1,
343         // and the next line is not a continuation, we'll end up with i = 2
344         // when we break. This handles the incrementing of i for the outer
345         // loop.
346         break;
347       }
348 
349       // Space and tab are valid starts to continuation lines.
350       // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
351       // can choose to reject or normalize continuation lines.
352       if ((c != ' ' && c != '\t') ||
353           http_validation_policy().disallow_header_continuation_lines) {
354         HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
355                                : BalsaFrameEnums::INVALID_HEADER_FORMAT);
356         return false;
357       }
358 
359       // If disallow_header_continuation_lines() is false, we neither reject nor
360       // normalize continuation lines, in violation of RFC7230.
361     }
362     const char* line_end = stream_begin + lines[i - 1].second;
363     QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
364 
365     // We cleanup the whitespace at the end of the line before doing anything
366     // else of interest as it allows us to do nothing when irregularly formatted
367     // headers are parsed (e.g. those with only keys, only values, or no colon).
368     //
369     // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
370     --line_end;
371     QUICHE_DCHECK_EQ('\n', *line_end)
372         << "\"" << std::string(line_begin, line_end) << "\"";
373     while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
374       --line_end;
375     }
376     ++line_end;
377     QUICHE_DCHECK_CHAR_GE(' ', *line_end);
378     QUICHE_DCHECK_LT(line_begin, line_end);
379 
380     // We use '0' for the block idx, because we're always writing to the first
381     // block from the framer (we do this because the framer requires that the
382     // entire header sequence be in a contiguous buffer).
383     headers->header_lines_.push_back(HeaderLineDescription(
384         line_begin - stream_begin, line_end - stream_begin,
385         line_end - stream_begin, line_end - stream_begin, 0));
386     if (current >= line_end) {
387       if (http_validation_policy().require_header_colon) {
388         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
389                                : BalsaFrameEnums::HEADER_MISSING_COLON);
390         return false;
391       }
392       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
393                                : BalsaFrameEnums::HEADER_MISSING_COLON);
394       // Then the next colon will not be found within this header line-- time
395       // to try again with another header-line.
396       continue;
397     }
398     if (current < line_begin) {
399       // When this condition is true, the last detected colon was part of a
400       // previous line.  We reset to the beginning of the line as we don't care
401       // about the presence of any colon before the beginning of the current
402       // line.
403       current = line_begin;
404     }
405     for (; current < line_end; ++current) {
406       if (*current == ':') {
407         break;
408       }
409 
410       // Generally invalid characters were found earlier.
411       if (http_validation_policy().disallow_double_quote_in_header_name) {
412         if (header_properties::IsInvalidHeaderKeyChar(*current)) {
413           HandleError(is_trailer
414                           ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
415                           : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
416           return false;
417         }
418       } else if (header_properties::IsInvalidHeaderKeyCharAllowDoubleQuote(
419                      *current)) {
420         HandleError(is_trailer
421                         ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
422                         : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
423         return false;
424       }
425     }
426 
427     if (current == line_end) {
428       // There was no colon in the line. The arguments we passed into the
429       // construction for the HeaderLineDescription object should be OK-- it
430       // assumes that the entire content is 'key' by default (which is true, as
431       // there was no colon, there can be no value). Note that this is a
432       // construct which is technically not allowed by the spec.
433 
434       // In strict mode, we do treat this invalid value-less key as an error.
435       if (http_validation_policy().require_header_colon) {
436         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
437                                : BalsaFrameEnums::HEADER_MISSING_COLON);
438         return false;
439       }
440       HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
441                                : BalsaFrameEnums::HEADER_MISSING_COLON);
442       continue;
443     }
444 
445     QUICHE_DCHECK_EQ(*current, ':');
446     QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
447     QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
448 
449     HeaderLineDescription& current_header_line = headers->header_lines_.back();
450     current_header_line.key_end_idx = current - stream_begin;
451     current_header_line.value_begin_idx = current_header_line.key_end_idx;
452     if (current < line_end) {
453       ++current_header_line.key_end_idx;
454 
455       CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
456                                 &current_header_line);
457     }
458   }
459 
460   return true;
461 }
462 
HandleWarning(BalsaFrameEnums::ErrorCode error_code)463 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
464   last_error_ = error_code;
465   visitor_->HandleWarning(last_error_);
466 }
467 
HandleError(BalsaFrameEnums::ErrorCode error_code)468 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
469   last_error_ = error_code;
470   parse_state_ = BalsaFrameEnums::ERROR;
471   visitor_->HandleError(last_error_);
472 }
473 
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)474 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
475     HeaderLines::size_type line_idx, size_t* length) {
476   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
477   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
478   const char* line_end = stream_begin + header_line.last_char_idx;
479   const char* value_begin = (stream_begin + header_line.value_begin_idx);
480 
481   if (value_begin >= line_end) {
482     // There is no non-whitespace value data.
483     QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
484     return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
485   }
486 
487   *length = 0;
488   while (value_begin < line_end) {
489     if (*value_begin < '0' || *value_begin > '9') {
490       // bad! content-length found, and couldn't parse all of it!
491       QUICHE_DVLOG(1)
492           << "invalid content-length - non numeric character detected";
493       return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
494     }
495     const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
496     size_t length_x_10 = *length * 10;
497     const size_t c = *value_begin - '0';
498     if (*length > kMaxDiv10 ||
499         (std::numeric_limits<size_t>::max() - length_x_10) < c) {
500       QUICHE_DVLOG(1) << "content-length overflow";
501       return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
502     }
503     *length = length_x_10 + c;
504     ++value_begin;
505   }
506   QUICHE_DVLOG(1) << "content_length parsed: " << *length;
507   return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
508 }
509 
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)510 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
511   const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
512   const char* stream_begin = headers_->OriginalHeaderStreamBegin();
513   const absl::string_view transfer_encoding(
514       stream_begin + header_line.value_begin_idx,
515       header_line.last_char_idx - header_line.value_begin_idx);
516 
517   if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
518     headers_->transfer_encoding_is_chunked_ = true;
519     return;
520   }
521 
522   if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
523     headers_->transfer_encoding_is_chunked_ = false;
524     return;
525   }
526 
527   if (http_validation_policy().validate_transfer_encoding) {
528     HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
529   }
530 }
531 
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)532 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
533                                                  const BalsaHeaders* headers) {
534   // Read from the beginning of the first line to the end of the last line.
535   // Note we need to add the first line's offset as in the case of a trailer
536   // it's non-zero.
537   const char* stream_begin =
538       headers->OriginalHeaderStreamBegin() + lines.front().first;
539   const char* stream_end =
540       headers->OriginalHeaderStreamBegin() + lines.back().second;
541   bool found_invalid = false;
542 
543   for (const char* c = stream_begin; c < stream_end; c++) {
544     if (header_properties::IsInvalidHeaderChar(*c)) {
545       found_invalid = true;
546       invalid_chars_[*c]++;
547     }
548     if (*c == '\r' &&
549         http_validation_policy().disallow_lone_cr_in_request_headers &&
550         c + 1 < stream_end && *(c + 1) != '\n') {
551       found_invalid = true;
552       invalid_chars_[*c]++;
553     }
554   }
555 
556   return found_invalid;
557 }
558 
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)559 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
560                                     BalsaHeaders* headers) {
561   QUICHE_DCHECK(!lines.empty());
562   QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
563 
564   if ((is_request() || http_validation_policy()
565                            .disallow_invalid_header_characters_in_response) &&
566       track_invalid_chars()) {
567     if (CheckHeaderLinesForInvalidChars(lines, headers)) {
568       if (invalid_chars_error_enabled()) {
569         HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
570         return;
571       }
572 
573       HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
574     }
575   }
576 
577   // There is no need to attempt to process headers (resp. trailers)
578   // if no header (resp. trailer) lines exist.
579   //
580   // The last line of the message, which is an empty line, is never a header
581   // (resp. trailer) line.  Furthermore, the first line of the message is not
582   // a header line.  Therefore there are at least two (resp. one) lines in the
583   // message which are not header (resp. trailer) lines.
584   //
585   // Thus, we test to see if we have more than two (resp. one) lines total
586   // before attempting to parse any header (resp. trailer) lines.
587   if (lines.size() <= (is_trailer ? 1 : 2)) {
588     return;
589   }
590 
591   HeaderLines::size_type content_length_idx = 0;
592   HeaderLines::size_type transfer_encoding_idx = 0;
593   const char* stream_begin = headers->OriginalHeaderStreamBegin();
594   // Parse the rest of the header or trailer data into key-value pairs.
595   if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
596     return;
597   }
598   // At this point, we've parsed all of the headers/trailers.  Time to look
599   // for those headers which we require for framing or for format errors.
600   const HeaderLines::size_type lines_size = headers->header_lines_.size();
601   for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
602     const HeaderLineDescription& line = headers->header_lines_[i];
603     const absl::string_view key(stream_begin + line.first_char_idx,
604                                 line.key_end_idx - line.first_char_idx);
605     QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
606 
607     // If a header begins with either lowercase or uppercase 'c' or 't', then
608     // the header may be one of content-length, connection, content-encoding
609     // or transfer-encoding. These headers are special, as they change the way
610     // that the message is framed, and so the framer is required to search
611     // for them.  However, first check for a formatting error, and skip
612     // special header treatment on trailer lines (when is_trailer is true).
613     if (key.empty() || key[0] == ' ') {
614       parse_state_ = BalsaFrameEnums::ERROR;
615       HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
616                              : BalsaFrameEnums::INVALID_HEADER_FORMAT);
617       return;
618     }
619     if (is_trailer) {
620       continue;
621     }
622     if (absl::EqualsIgnoreCase(key, kContentLength)) {
623       size_t length = 0;
624       BalsaHeadersEnums::ContentLengthStatus content_length_status =
625           ProcessContentLengthLine(i, &length);
626       if (content_length_idx == 0) {
627         content_length_idx = i + 1;
628         headers->content_length_status_ = content_length_status;
629         headers->content_length_ = length;
630         content_length_remaining_ = length;
631         continue;
632       }
633       if ((headers->content_length_status_ != content_length_status) ||
634           ((headers->content_length_status_ ==
635             BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
636            (http_validation_policy().disallow_multiple_content_length ||
637             length != headers->content_length_))) {
638         HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
639         return;
640       }
641       continue;
642     }
643     if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
644       if (http_validation_policy().validate_transfer_encoding &&
645           transfer_encoding_idx != 0) {
646         HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
647         return;
648       }
649       transfer_encoding_idx = i + 1;
650     }
651   }
652 
653   if (!is_trailer) {
654     if (http_validation_policy().validate_transfer_encoding &&
655         http_validation_policy()
656             .disallow_transfer_encoding_with_content_length &&
657         content_length_idx != 0 && transfer_encoding_idx != 0) {
658       HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
659       return;
660     }
661     if (headers->transfer_encoding_is_chunked_) {
662       headers->content_length_ = 0;
663       headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
664       content_length_remaining_ = 0;
665     }
666     if (transfer_encoding_idx != 0) {
667       ProcessTransferEncodingLine(transfer_encoding_idx - 1);
668     }
669   }
670 }
671 
AssignParseStateAfterHeadersHaveBeenParsed()672 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
673   // For responses, can't have a body if the request was a HEAD, or if it is
674   // one of these response-codes.  rfc2616 section 4.3
675   parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
676   int response_code = headers_->parsed_response_code_;
677   if (!is_request_ && (request_was_head_ ||
678                        !BalsaHeaders::ResponseCanHaveBody(response_code))) {
679     // There is no body.
680     return;
681   }
682 
683   if (headers_->transfer_encoding_is_chunked_) {
684     // Note that
685     // if ( Transfer-Encoding: chunked &&  Content-length: )
686     // then Transfer-Encoding: chunked trumps.
687     // This is as specified in the spec.
688     // rfc2616 section 4.4.3
689     parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
690     return;
691   }
692 
693   // Errors parsing content-length definitely can cause
694   // protocol errors/warnings
695   switch (headers_->content_length_status_) {
696     // If we have a content-length, and it is parsed
697     // properly, there are two options.
698     // 1) zero content, in which case the message is done, and
699     // 2) nonzero content, in which case we have to
700     //    consume the body.
701     case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
702       if (headers_->content_length_ == 0) {
703         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
704       } else {
705         parse_state_ = BalsaFrameEnums::READING_CONTENT;
706       }
707       break;
708     case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
709     case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
710       // If there were characters left-over after parsing the
711       // content length, we should flag an error and stop.
712       HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
713       break;
714       // We can have: no transfer-encoding, no content length, and no
715       // connection: close...
716       // Unfortunately, this case doesn't seem to be covered in the spec.
717       // We'll assume that the safest thing to do here is what the google
718       // binaries before 2008 already do, which is to assume that
719       // everything until the connection is closed is body.
720     case BalsaHeadersEnums::NO_CONTENT_LENGTH:
721       if (is_request_) {
722         const absl::string_view method = headers_->request_method();
723         // POSTs and PUTs should have a detectable body length.  If they
724         // do not we consider it an error.
725         if ((method != "POST" && method != "PUT") ||
726             !http_validation_policy().require_content_length_if_body_required) {
727           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
728           break;
729         } else if (!allow_reading_until_close_for_request_) {
730           HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
731           break;
732         }
733       }
734       parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
735       HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
736       break;
737       // The COV_NF_... statements here provide hints to the apparatus
738       // which computes coverage reports/ratios that this code is never
739       // intended to be executed, and should technically be impossible.
740       // COV_NF_START
741     default:
742       QUICHE_LOG(FATAL) << "Saw a content_length_status: "
743                         << headers_->content_length_status_
744                         << " which is unknown.";
745       // COV_NF_END
746   }
747 }
748 
ProcessHeaders(const char * message_start,size_t message_length)749 size_t BalsaFrame::ProcessHeaders(const char* message_start,
750                                   size_t message_length) {
751   const char* const original_message_start = message_start;
752   const char* const message_end = message_start + message_length;
753   const char* message_current = message_start;
754   const char* checkpoint = message_start;
755 
756   if (message_length == 0) {
757     return message_current - original_message_start;
758   }
759 
760   while (message_current < message_end) {
761     size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
762 
763     // Yes, we could use strchr (assuming null termination), or
764     // memchr, but as it turns out that is slower than this tight loop
765     // for the input that we see.
766     if (!saw_non_newline_char_) {
767       do {
768         const char c = *message_current;
769         if (c != '\r' && c != '\n') {
770           if (CHAR_LE(c, ' ')) {
771             HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
772             return message_current - original_message_start;
773           }
774           break;
775         }
776         ++message_current;
777         if (message_current == message_end) {
778           return message_current - original_message_start;
779         }
780       } while (true);
781       saw_non_newline_char_ = true;
782       message_start = message_current;
783       checkpoint = message_current;
784     }
785     while (message_current < message_end) {
786       if (*message_current != '\n') {
787         ++message_current;
788         continue;
789       }
790       const size_t relative_idx = message_current - message_start;
791       const size_t message_current_idx = 1 + base_idx + relative_idx;
792       lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
793       if (lines_.size() == 1) {
794         headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
795         checkpoint = message_current + 1;
796         const char* begin = headers_->OriginalHeaderStreamBegin();
797 
798         QUICHE_DVLOG(1) << "First line "
799                         << std::string(begin, lines_[0].second);
800         QUICHE_DVLOG(1) << "is_request_: " << is_request_;
801         ProcessFirstLine(begin, begin + lines_[0].second);
802         if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
803           break;
804         }
805 
806         if (parse_state_ == BalsaFrameEnums::ERROR) {
807           return message_current - original_message_start;
808         }
809       }
810       const size_t chars_since_last_slash_n =
811           (message_current_idx - last_slash_n_idx_);
812       last_slash_n_idx_ = message_current_idx;
813       if (chars_since_last_slash_n > 2) {
814         // false positive.
815         ++message_current;
816         continue;
817       }
818       if ((chars_since_last_slash_n == 1) ||
819           (((message_current > message_start) &&
820             (*(message_current - 1) == '\r')) ||
821            (last_char_was_slash_r_))) {
822         break;
823       }
824       ++message_current;
825     }
826 
827     if (message_current == message_end) {
828       continue;
829     }
830 
831     ++message_current;
832     QUICHE_DCHECK(message_current >= message_start);
833     if (message_current > message_start) {
834       headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
835     }
836 
837     // Check if we have exceeded maximum headers length
838     // Although we check for this limit before and after we call this function
839     // we check it here as well to make sure that in case the visitor changed
840     // the max_header_length_ (for example after processing the first line)
841     // we handle it gracefully.
842     if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
843       HandleHeadersTooLongError();
844       return message_current - original_message_start;
845     }
846 
847     // Since we know that we won't be writing any more bytes of the header,
848     // we tell that to the headers object. The headers object may make
849     // more efficient allocation decisions when this is signaled.
850     headers_->DoneWritingFromFramer();
851     visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
852 
853     // Ok, now that we've written everything into our header buffer, it is
854     // time to process the header lines (extract proper values for headers
855     // which are important for framing).
856     ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
857     if (parse_state_ == BalsaFrameEnums::ERROR) {
858       return message_current - original_message_start;
859     }
860 
861     if (use_interim_headers_callback_ &&
862         IsInterimResponse(headers_->parsed_response_code()) &&
863         headers_->parsed_response_code() != kSwitchingProtocolsStatusCode) {
864       // Deliver headers from this interim response but reset everything else to
865       // prepare for the next set of headers. Skip 101 Switching Protocols
866       // because these are considered final headers for the current protocol.
867       visitor_->OnInterimHeaders(
868           std::make_unique<BalsaHeaders>(std::move(*headers_)));
869       Reset();
870       checkpoint = message_start = message_current;
871       continue;
872     }
873     if (continue_headers_ != nullptr &&
874         headers_->parsed_response_code_ == kContinueStatusCode) {
875       // Save the headers from this 100 Continue response but reset everything
876       // else to prepare for the next set of headers.
877       BalsaHeaders saved_continue_headers = std::move(*headers_);
878       Reset();
879       *continue_headers_ = std::move(saved_continue_headers);
880       visitor_->ContinueHeaderDone();
881       checkpoint = message_start = message_current;
882       continue;
883     }
884     AssignParseStateAfterHeadersHaveBeenParsed();
885     if (parse_state_ == BalsaFrameEnums::ERROR) {
886       return message_current - original_message_start;
887     }
888     visitor_->ProcessHeaders(*headers_);
889     visitor_->HeaderDone();
890     if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
891       visitor_->MessageDone();
892     }
893     return message_current - original_message_start;
894   }
895   // If we've gotten to here, it means that we've consumed all of the
896   // available input. We need to record whether or not the last character we
897   // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
898   // a header framing that is split across the two calls.
899   last_char_was_slash_r_ = (*(message_end - 1) == '\r');
900   QUICHE_DCHECK(message_current >= message_start);
901   if (message_current > message_start) {
902     headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
903   }
904   return message_current - original_message_start;
905 }
906 
BytesSafeToSplice() const907 size_t BalsaFrame::BytesSafeToSplice() const {
908   switch (parse_state_) {
909     case BalsaFrameEnums::READING_CHUNK_DATA:
910       return chunk_length_remaining_;
911     case BalsaFrameEnums::READING_UNTIL_CLOSE:
912       return std::numeric_limits<size_t>::max();
913     case BalsaFrameEnums::READING_CONTENT:
914       return content_length_remaining_;
915     default:
916       return 0;
917   }
918 }
919 
BytesSpliced(size_t bytes_spliced)920 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
921   switch (parse_state_) {
922     case BalsaFrameEnums::READING_CHUNK_DATA:
923       if (chunk_length_remaining_ < bytes_spliced) {
924         HandleError(BalsaFrameEnums::
925                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
926         return;
927       }
928       chunk_length_remaining_ -= bytes_spliced;
929       if (chunk_length_remaining_ == 0) {
930         parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
931       }
932       return;
933 
934     case BalsaFrameEnums::READING_UNTIL_CLOSE:
935       return;
936 
937     case BalsaFrameEnums::READING_CONTENT:
938       if (content_length_remaining_ < bytes_spliced) {
939         HandleError(BalsaFrameEnums::
940                         CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
941         return;
942       }
943       content_length_remaining_ -= bytes_spliced;
944       if (content_length_remaining_ == 0) {
945         parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
946         visitor_->MessageDone();
947       }
948       return;
949 
950     default:
951       HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
952       return;
953   }
954 }
955 
ProcessInput(const char * input,size_t size)956 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
957   const char* current = input;
958   const char* on_entry = current;
959   const char* end = current + size;
960 
961   QUICHE_DCHECK(headers_ != nullptr);
962   if (headers_ == nullptr) {
963     return 0;
964   }
965 
966   if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
967     const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
968     // Yes, we still have to check this here as the user can change the
969     // max_header_length amount!
970     // Also it is possible that we have reached the maximum allowed header size,
971     // and we have more to consume (remember we are still inside
972     // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
973     if (header_length > max_header_length_ ||
974         (header_length == max_header_length_ && size > 0)) {
975       HandleHeadersTooLongError();
976       return current - input;
977     }
978     const size_t bytes_to_process =
979         std::min(max_header_length_ - header_length, size);
980     current += ProcessHeaders(input, bytes_to_process);
981     // If we are still reading headers check if we have crossed the headers
982     // limit. Note that we check for >= as opposed to >. This is because if
983     // header_length_after equals max_header_length_ and we are still in the
984     // parse_state_  BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
985     // sure that the headers limit will be crossed later on
986     if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
987       // Note that headers_ is valid only if we are still reading headers.
988       const size_t header_length_after =
989           headers_->GetReadableBytesFromHeaderStream();
990       if (header_length_after >= max_header_length_) {
991         HandleHeadersTooLongError();
992       }
993     }
994     return current - input;
995   }
996 
997   if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
998       parse_state_ == BalsaFrameEnums::ERROR) {
999     // Can do nothing more 'till we're reset.
1000     return current - input;
1001   }
1002 
1003   QUICHE_DCHECK_LE(current, end);
1004   if (current == end) {
1005     return current - input;
1006   }
1007 
1008   while (true) {
1009     switch (parse_state_) {
1010       case BalsaFrameEnums::READING_CHUNK_LENGTH:
1011         // In this state we read the chunk length.
1012         // Note that once we hit a character which is not in:
1013         // [0-9;A-Fa-f\n], we transition to a different state.
1014         //
1015         QUICHE_DCHECK_LE(current, end);
1016         while (true) {
1017           if (current == end) {
1018             visitor_->OnRawBodyInput(
1019                 absl::string_view(on_entry, current - on_entry));
1020             return current - input;
1021           }
1022 
1023           const char c = *current;
1024           ++current;
1025 
1026           static const signed char kBad = -1;
1027           static const signed char kDelimiter = -2;
1028 
1029           // valid cases:
1030           //  "09123\n"                      // -> 09123
1031           //  "09123\r\n"                    // -> 09123
1032           //  "09123  \n"                    // -> 09123
1033           //  "09123  \r\n"                  // -> 09123
1034           //  "09123  12312\n"               // -> 09123
1035           //  "09123  12312\r\n"             // -> 09123
1036           //  "09123; foo=bar\n"             // -> 09123
1037           //  "09123; foo=bar\r\n"           // -> 09123
1038           //  "FFFFFFFFFFFFFFFF\r\n"         // -> FFFFFFFFFFFFFFFF
1039           //  "FFFFFFFFFFFFFFFF 22\r\n"      // -> FFFFFFFFFFFFFFFF
1040           // invalid cases:
1041           // "[ \t]+[^\n]*\n"
1042           // "FFFFFFFFFFFFFFFFF\r\n"  (would overflow)
1043           // "\r\n"
1044           // "\n"
1045           signed char addition = kBad;
1046           // clang-format off
1047           switch (c) {
1048             case '0': addition = 0; break;
1049             case '1': addition = 1; break;
1050             case '2': addition = 2; break;
1051             case '3': addition = 3; break;
1052             case '4': addition = 4; break;
1053             case '5': addition = 5; break;
1054             case '6': addition = 6; break;
1055             case '7': addition = 7; break;
1056             case '8': addition = 8; break;
1057             case '9': addition = 9; break;
1058             case 'a': addition = 0xA; break;
1059             case 'b': addition = 0xB; break;
1060             case 'c': addition = 0xC; break;
1061             case 'd': addition = 0xD; break;
1062             case 'e': addition = 0xE; break;
1063             case 'f': addition = 0xF; break;
1064             case 'A': addition = 0xA; break;
1065             case 'B': addition = 0xB; break;
1066             case 'C': addition = 0xC; break;
1067             case 'D': addition = 0xD; break;
1068             case 'E': addition = 0xE; break;
1069             case 'F': addition = 0xF; break;
1070             case '\t':
1071             case '\n':
1072             case '\r':
1073             case ' ':
1074             case ';':
1075               addition = kDelimiter;
1076               break;
1077             default:
1078               // Leave addition == kBad
1079               break;
1080           }
1081           // clang-format on
1082           if (addition >= 0) {
1083             chunk_length_character_extracted_ = true;
1084             size_t length_x_16 = chunk_length_remaining_ * 16;
1085             const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1086             if ((chunk_length_remaining_ > kMaxDiv16) ||
1087                 (std::numeric_limits<size_t>::max() - length_x_16) <
1088                     static_cast<size_t>(addition)) {
1089               // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1090               visitor_->OnRawBodyInput(
1091                   absl::string_view(on_entry, current - on_entry));
1092               HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1093               return current - input;
1094             }
1095             chunk_length_remaining_ = length_x_16 + addition;
1096             continue;
1097           }
1098 
1099           if (!chunk_length_character_extracted_ || addition == kBad) {
1100             // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1101             // characters were converted, or an unexpected character was
1102             // seen.
1103             visitor_->OnRawBodyInput(
1104                 absl::string_view(on_entry, current - on_entry));
1105             HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1106             return current - input;
1107           }
1108 
1109           break;
1110         }
1111 
1112         --current;
1113         parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1114         visitor_->OnChunkLength(chunk_length_remaining_);
1115         continue;
1116 
1117       case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1118         // TODO(phython): Convert this scanning to be 16 bytes at a time if
1119         // there is data to be read.
1120         const char* extensions_start = current;
1121         size_t extensions_length = 0;
1122         QUICHE_DCHECK_LE(current, end);
1123         while (true) {
1124           if (current == end) {
1125             visitor_->OnChunkExtensionInput(
1126                 absl::string_view(extensions_start, extensions_length));
1127             visitor_->OnRawBodyInput(
1128                 absl::string_view(on_entry, current - on_entry));
1129             return current - input;
1130           }
1131           const char c = *current;
1132           if (http_validation_policy_.disallow_lone_cr_in_chunk_extension &&
1133               c == '\r' && (current + 1 == end || *(current + 1) != '\n')) {
1134             // We have a lone carriage return.
1135             HandleError(BalsaFrameEnums::INVALID_CHUNK_EXTENSION);
1136             return current - input;
1137           }
1138           if (c == '\r' || c == '\n') {
1139             extensions_length = (extensions_start == current)
1140                                     ? 0
1141                                     : current - extensions_start - 1;
1142           }
1143 
1144           ++current;
1145           if (c == '\n') {
1146             break;
1147           }
1148         }
1149 
1150         chunk_length_character_extracted_ = false;
1151         visitor_->OnChunkExtensionInput(
1152             absl::string_view(extensions_start, extensions_length));
1153 
1154         if (chunk_length_remaining_ != 0) {
1155           parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1156           continue;
1157         }
1158 
1159         HeaderFramingFound('\n');
1160         parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1161         continue;
1162       }
1163 
1164       case BalsaFrameEnums::READING_CHUNK_DATA:
1165         while (current < end) {
1166           if (chunk_length_remaining_ == 0) {
1167             break;
1168           }
1169           // read in the chunk
1170           size_t bytes_remaining = end - current;
1171           size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1172                                       ? chunk_length_remaining_
1173                                       : bytes_remaining;
1174           const char* tmp_current = current + consumed_bytes;
1175           visitor_->OnRawBodyInput(
1176               absl::string_view(on_entry, tmp_current - on_entry));
1177           visitor_->OnBodyChunkInput(
1178               absl::string_view(current, consumed_bytes));
1179           on_entry = current = tmp_current;
1180           chunk_length_remaining_ -= consumed_bytes;
1181         }
1182 
1183         if (chunk_length_remaining_ == 0) {
1184           parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1185           continue;
1186         }
1187 
1188         visitor_->OnRawBodyInput(
1189             absl::string_view(on_entry, current - on_entry));
1190         return current - input;
1191 
1192       case BalsaFrameEnums::READING_CHUNK_TERM:
1193         QUICHE_DCHECK_LE(current, end);
1194         while (true) {
1195           if (current == end) {
1196             visitor_->OnRawBodyInput(
1197                 absl::string_view(on_entry, current - on_entry));
1198             return current - input;
1199           }
1200 
1201           const char c = *current;
1202           ++current;
1203 
1204           if (c == '\n') {
1205             break;
1206           }
1207         }
1208         parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1209         continue;
1210 
1211       case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1212         QUICHE_DCHECK_LE(current, end);
1213         while (true) {
1214           if (current == end) {
1215             visitor_->OnRawBodyInput(
1216                 absl::string_view(on_entry, current - on_entry));
1217             return current - input;
1218           }
1219 
1220           const char c = *current;
1221           if (HeaderFramingFound(c) != 0) {
1222             // If we've found a "\r\n\r\n", then the message
1223             // is done.
1224             ++current;
1225             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1226             visitor_->OnRawBodyInput(
1227                 absl::string_view(on_entry, current - on_entry));
1228             visitor_->MessageDone();
1229             return current - input;
1230           }
1231 
1232           // If not, however, since the spec only suggests that the
1233           // client SHOULD indicate the presence of trailers, we get to
1234           // *test* that they did or didn't.
1235           // If all of the bytes we've seen since:
1236           //   OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1237           // are either '\r', or '\n', then we can assume that we don't yet
1238           // know if we need to parse headers, or if the next byte will make
1239           // the HeaderFramingFound condition (above) true.
1240           if (!HeaderFramingMayBeFound()) {
1241             break;
1242           }
1243 
1244           // If HeaderFramingMayBeFound(), then we have seen only characters
1245           // '\r' or '\n'.
1246           ++current;
1247 
1248           // Lets try again! There is no state change here.
1249         }
1250 
1251         // If (!HeaderFramingMayBeFound()), then we know that we must be
1252         // reading the first non CRLF character of a trailer.
1253         parse_state_ = BalsaFrameEnums::READING_TRAILER;
1254         visitor_->OnRawBodyInput(
1255             absl::string_view(on_entry, current - on_entry));
1256         on_entry = current;
1257         continue;
1258 
1259       // TODO(yongfa): No leading whitespace is allowed before field-name per
1260       // RFC2616. Leading whitespace will cause header parsing error too.
1261       case BalsaFrameEnums::READING_TRAILER:
1262         while (current < end) {
1263           const char c = *current;
1264           ++current;
1265           ++trailer_length_;
1266           if (trailers_ != nullptr) {
1267             // Reuse the header length limit for trailer, which is just a bunch
1268             // of headers.
1269             if (trailer_length_ > max_header_length_) {
1270               --current;
1271               HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1272               return current - input;
1273             }
1274             if (LineFramingFound(c)) {
1275               trailer_lines_.push_back(
1276                   std::make_pair(start_of_trailer_line_, trailer_length_));
1277               start_of_trailer_line_ = trailer_length_;
1278             }
1279           }
1280           if (HeaderFramingFound(c) != 0) {
1281             parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1282             if (trailers_ != nullptr) {
1283               trailers_->WriteFromFramer(on_entry, current - on_entry);
1284               trailers_->DoneWritingFromFramer();
1285               ProcessHeaderLines(trailer_lines_, true /*is_trailer*/,
1286                                  trailers_.get());
1287               if (parse_state_ == BalsaFrameEnums::ERROR) {
1288                 return current - input;
1289               }
1290               visitor_->OnTrailers(std::move(trailers_));
1291 
1292               // Allows trailers to be delivered without another call to
1293               // EnableTrailers() in case the framer is Reset().
1294               trailers_ = std::make_unique<BalsaHeaders>();
1295             }
1296             visitor_->OnTrailerInput(
1297                 absl::string_view(on_entry, current - on_entry));
1298             visitor_->MessageDone();
1299             return current - input;
1300           }
1301         }
1302         if (trailers_ != nullptr) {
1303           trailers_->WriteFromFramer(on_entry, current - on_entry);
1304         }
1305         visitor_->OnTrailerInput(
1306             absl::string_view(on_entry, current - on_entry));
1307         return current - input;
1308 
1309       case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1310         const size_t bytes_remaining = end - current;
1311         if (bytes_remaining > 0) {
1312           visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1313           visitor_->OnBodyChunkInput(
1314               absl::string_view(current, bytes_remaining));
1315           current += bytes_remaining;
1316         }
1317         return current - input;
1318       }
1319 
1320       case BalsaFrameEnums::READING_CONTENT:
1321         while ((content_length_remaining_ != 0u) && current < end) {
1322           // read in the content
1323           const size_t bytes_remaining = end - current;
1324           const size_t consumed_bytes =
1325               (content_length_remaining_ < bytes_remaining)
1326                   ? content_length_remaining_
1327                   : bytes_remaining;
1328           visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1329           visitor_->OnBodyChunkInput(
1330               absl::string_view(current, consumed_bytes));
1331           current += consumed_bytes;
1332           content_length_remaining_ -= consumed_bytes;
1333         }
1334         if (content_length_remaining_ == 0) {
1335           parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1336           visitor_->MessageDone();
1337         }
1338         return current - input;
1339 
1340       default:
1341         // The state-machine should never be in a state that isn't handled
1342         // above.  This is a glaring logic error, and we should do something
1343         // drastic to ensure that this gets looked-at and fixed.
1344         QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_  // COV_NF_LINE
1345                           << " memory corruption?!";            // COV_NF_LINE
1346     }
1347   }
1348 }
1349 
HandleHeadersTooLongError()1350 void BalsaFrame::HandleHeadersTooLongError() {
1351   if (parse_truncated_headers_even_when_headers_too_long_) {
1352     const size_t len = headers_->GetReadableBytesFromHeaderStream();
1353     const char* stream_begin = headers_->OriginalHeaderStreamBegin();
1354 
1355     if (last_slash_n_idx_ < len && stream_begin[last_slash_n_idx_] != '\r') {
1356       // We write an end to the truncated line, and a blank line to end the
1357       // headers, to end up with something that will parse.
1358       static const absl::string_view kTwoLineEnds = "\r\n\r\n";
1359       headers_->WriteFromFramer(kTwoLineEnds.data(), kTwoLineEnds.size());
1360 
1361       // This is the last, truncated line.
1362       lines_.push_back(std::make_pair(last_slash_n_idx_, len + 2));
1363       // A blank line to end the headers.
1364       lines_.push_back(std::make_pair(len + 2, len + 4));
1365     }
1366 
1367     ProcessHeaderLines(lines_, /*is_trailer=*/false, headers_);
1368   }
1369 
1370   HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
1371 }
1372 
1373 const int32_t BalsaFrame::kValidTerm1;
1374 const int32_t BalsaFrame::kValidTerm1Mask;
1375 const int32_t BalsaFrame::kValidTerm2;
1376 const int32_t BalsaFrame::kValidTerm2Mask;
1377 
1378 }  // namespace quiche
1379 
1380 #undef CHAR_LT
1381 #undef CHAR_LE
1382 #undef CHAR_GT
1383 #undef CHAR_GE
1384 #undef QUICHE_DCHECK_CHAR_GE
1385