1 // Copyright 2022 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "quiche/balsa/balsa_frame.h"
6
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstring>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <utility>
15
16 #include "absl/strings/match.h"
17 #include "absl/strings/numbers.h"
18 #include "absl/strings/string_view.h"
19 #include "quiche/balsa/balsa_enums.h"
20 #include "quiche/balsa/balsa_headers.h"
21 #include "quiche/balsa/balsa_visitor_interface.h"
22 #include "quiche/balsa/header_properties.h"
23 #include "quiche/common/platform/api/quiche_logging.h"
24
25 // When comparing characters (other than == and !=), cast to unsigned char
26 // to make sure values above 127 rank as expected, even on platforms where char
27 // is signed and thus such values are represented as negative numbers before the
28 // cast.
29 #define CHAR_LT(a, b) \
30 (static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
31 #define CHAR_LE(a, b) \
32 (static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
33 #define CHAR_GT(a, b) \
34 (static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
35 #define CHAR_GE(a, b) \
36 (static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
37 #define QUICHE_DCHECK_CHAR_GE(a, b) \
38 QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
39
40 namespace quiche {
41
42 namespace {
43
44 constexpr size_t kContinueStatusCode = 100;
45 constexpr size_t kSwitchingProtocolsStatusCode = 101;
46
47 constexpr absl::string_view kChunked = "chunked";
48 constexpr absl::string_view kContentLength = "content-length";
49 constexpr absl::string_view kIdentity = "identity";
50 constexpr absl::string_view kTransferEncoding = "transfer-encoding";
51
IsInterimResponse(size_t response_code)52 bool IsInterimResponse(size_t response_code) {
53 return response_code >= 100 && response_code < 200;
54 }
55
56 } // namespace
57
Reset()58 void BalsaFrame::Reset() {
59 last_char_was_slash_r_ = false;
60 saw_non_newline_char_ = false;
61 start_was_space_ = true;
62 chunk_length_character_extracted_ = false;
63 // is_request_ = true; // not reset between messages.
64 allow_reading_until_close_for_request_ = false;
65 // request_was_head_ = false; // not reset between messages.
66 // max_header_length_ = 16 * 1024; // not reset between messages.
67 // visitor_ = &do_nothing_visitor_; // not reset between messages.
68 chunk_length_remaining_ = 0;
69 content_length_remaining_ = 0;
70 last_slash_n_idx_ = 0;
71 term_chars_ = 0;
72 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
73 last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
74 invalid_chars_.clear();
75 lines_.clear();
76 if (continue_headers_ != nullptr) {
77 continue_headers_->Clear();
78 }
79 if (headers_ != nullptr) {
80 headers_->Clear();
81 }
82 trailer_lines_.clear();
83 start_of_trailer_line_ = 0;
84 trailer_length_ = 0;
85 if (trailers_ != nullptr) {
86 trailers_->Clear();
87 }
88 }
89
90 namespace {
91
92 // Within the line bounded by [current, end), parses a single "island",
93 // comprising a (possibly empty) span of whitespace followed by a (possibly
94 // empty) span of non-whitespace.
95 //
96 // Returns a pointer to the first whitespace character beyond this island, or
97 // returns end if no additional whitespace characters are present after this
98 // island. (I.e., returnvalue == end || *returnvalue > ' ')
99 //
100 // Upon return, the whitespace span are the characters
101 // whose indices fall in [*first_whitespace, *first_nonwhite), while the
102 // non-whitespace span are the characters whose indices fall in
103 // [*first_nonwhite, returnvalue - begin).
ParseOneIsland(const char * current,const char * begin,const char * end,size_t * first_whitespace,size_t * first_nonwhite)104 inline const char* ParseOneIsland(const char* current, const char* begin,
105 const char* end, size_t* first_whitespace,
106 size_t* first_nonwhite) {
107 *first_whitespace = current - begin;
108 while (current < end && CHAR_LE(*current, ' ')) {
109 ++current;
110 }
111 *first_nonwhite = current - begin;
112 while (current < end && CHAR_GT(*current, ' ')) {
113 ++current;
114 }
115 return current;
116 }
117
118 } // namespace
119
120 // Summary:
121 // Parses the first line of either a request or response.
122 // Note that in the case of a detected warning, error_code will be set
123 // but the function will not return false.
124 // Exactly zero or one warning or error (but not both) may be detected
125 // by this function.
126 // Note that this function will not write the data of the first-line
127 // into the header's buffer (that should already have been done elsewhere).
128 //
129 // Pre-conditions:
130 // begin != end
131 // *begin should be a character which is > ' '. This implies that there
132 // is at least one non-whitespace characters between [begin, end).
133 // headers is a valid pointer to a BalsaHeaders class.
134 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
135 // Entire first line must exist between [begin, end)
136 // Exactly zero or one newlines -may- exist between [begin, end)
137 // [begin, end) should exist in the header's buffer.
138 //
139 // Side-effects:
140 // headers will be modified
141 // error_code may be modified if either a warning or error is detected
142 //
143 // Returns:
144 // True if no error (as opposed to warning) is detected.
145 // False if an error (as opposed to warning) is detected.
146
147 //
148 // If there is indeed non-whitespace in the line, then the following
149 // will take care of this for you:
150 // while (*begin <= ' ') ++begin;
151 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);
152 //
153
ParseHTTPFirstLine(const char * begin,const char * end,bool is_request,BalsaHeaders * headers,BalsaFrameEnums::ErrorCode * error_code)154 bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
155 BalsaHeaders* headers,
156 BalsaFrameEnums::ErrorCode* error_code) {
157 while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
158 --end;
159 }
160
161 const char* current =
162 ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
163 &headers->non_whitespace_1_idx_);
164 current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
165 &headers->non_whitespace_2_idx_);
166 current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
167 &headers->non_whitespace_3_idx_);
168
169 // Clean up any trailing whitespace that comes after the third island
170 const char* last = end;
171 while (current <= last && CHAR_LE(*last, ' ')) {
172 --last;
173 }
174 headers->whitespace_4_idx_ = last - begin + 1;
175
176 // Either the passed-in line is empty, or it starts with a non-whitespace
177 // character.
178 QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
179
180 QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
181 QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
182
183 // If the line isn't empty, it has at least one non-whitespace character (see
184 // first QUICHE_DCHECK), which will have been identified as a non-empty
185 // [non_whitespace_1_idx_, whitespace_2_idx_).
186 QUICHE_DCHECK(begin == end ||
187 headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
188
189 if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
190 // This error may be triggered if the second token is empty, OR there's no
191 // WS after the first token; we don't bother to distinguish exactly which.
192 // (I'm not sure why we distinguish different kinds of parse error at all,
193 // actually.)
194 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
195 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
196 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
197 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
198 static_cast<int>(is_request));
199 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
200 return false;
201 }
202 }
203 if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
204 if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
205 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
206 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
207 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
208 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
209 static_cast<int>(is_request));
210 }
211 }
212
213 if (!is_request) {
214 headers->parsed_response_code_ = 0;
215 // If the response code is non-empty:
216 if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
217 if (!absl::SimpleAtoi(
218 absl::string_view(begin + headers->non_whitespace_2_idx_,
219 headers->non_whitespace_3_idx_ -
220 headers->non_whitespace_2_idx_),
221 &headers->parsed_response_code_)) {
222 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
223 return false;
224 }
225 }
226 }
227
228 return true;
229 }
230
231 // begin - beginning of the firstline
232 // end - end of the firstline
233 //
234 // A precondition for this function is that there is non-whitespace between
235 // [begin, end). If this precondition is not met, the function will not perform
236 // as expected (and bad things may happen, and it will eat your first, second,
237 // and third unborn children!).
238 //
239 // Another precondition for this function is that [begin, end) includes
240 // at most one newline, which must be at the end of the line.
ProcessFirstLine(const char * begin,const char * end)241 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
242 BalsaFrameEnums::ErrorCode previous_error = last_error_;
243 if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
244 parse_state_ = BalsaFrameEnums::ERROR;
245 HandleError(last_error_);
246 return;
247 }
248 if (previous_error != last_error_) {
249 HandleWarning(last_error_);
250 }
251
252 const absl::string_view line_input(
253 begin + headers_->non_whitespace_1_idx_,
254 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
255 const absl::string_view part1(
256 begin + headers_->non_whitespace_1_idx_,
257 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
258 const absl::string_view part2(
259 begin + headers_->non_whitespace_2_idx_,
260 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
261 const absl::string_view part3(
262 begin + headers_->non_whitespace_3_idx_,
263 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
264
265 if (is_request_) {
266 visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
267 if (part3.empty()) {
268 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
269 }
270 return;
271 }
272
273 visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
274 }
275
276 // 'stream_begin' points to the first character of the headers buffer.
277 // 'line_begin' points to the first character of the line.
278 // 'current' points to a char which is ':'.
279 // 'line_end' points to the position of '\n' + 1.
280 // 'line_begin' points to the position of first character of line.
CleanUpKeyValueWhitespace(const char * stream_begin,const char * line_begin,const char * current,const char * line_end,HeaderLineDescription * current_header_line)281 void BalsaFrame::CleanUpKeyValueWhitespace(
282 const char* stream_begin, const char* line_begin, const char* current,
283 const char* line_end, HeaderLineDescription* current_header_line) {
284 const char* colon_loc = current;
285 QUICHE_DCHECK_LT(colon_loc, line_end);
286 QUICHE_DCHECK_EQ(':', *colon_loc);
287 QUICHE_DCHECK_EQ(':', *current);
288 QUICHE_DCHECK_CHAR_GE(' ', *line_end)
289 << "\"" << std::string(line_begin, line_end) << "\"";
290
291 --current;
292 while (current > line_begin && CHAR_LE(*current, ' ')) {
293 --current;
294 }
295 current += static_cast<int>(current != colon_loc);
296 current_header_line->key_end_idx = current - stream_begin;
297
298 current = colon_loc;
299 QUICHE_DCHECK_EQ(':', *current);
300 ++current;
301 while (current < line_end && CHAR_LE(*current, ' ')) {
302 ++current;
303 }
304 current_header_line->value_begin_idx = current - stream_begin;
305
306 QUICHE_DCHECK_GE(current_header_line->key_end_idx,
307 current_header_line->first_char_idx);
308 QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
309 current_header_line->key_end_idx);
310 QUICHE_DCHECK_GE(current_header_line->last_char_idx,
311 current_header_line->value_begin_idx);
312 }
313
FindColonsAndParseIntoKeyValue(const Lines & lines,bool is_trailer,BalsaHeaders * headers)314 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
315 bool is_trailer,
316 BalsaHeaders* headers) {
317 QUICHE_DCHECK(!lines.empty());
318 const char* stream_begin = headers->OriginalHeaderStreamBegin();
319 // The last line is always just a newline (and is uninteresting).
320 const Lines::size_type lines_size_m1 = lines.size() - 1;
321 // For a trailer, there is no first line, so lines[0] is the first header.
322 // For real headers, the first line takes lines[0], so real header starts
323 // at index 1.
324 int first_header_idx = (is_trailer ? 0 : 1);
325 const char* current = stream_begin + lines[first_header_idx].first;
326 // This code is a bit more subtle than it may appear at first glance.
327 // This code looks for a colon in the current line... but it also looks
328 // beyond the current line. If there is no colon in the current line, then
329 // for each subsequent line (until the colon which -has- been found is
330 // associated with a line), no searching for a colon will be performed. In
331 // this way, we minimize the amount of bytes we have scanned for a colon.
332 for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
333 const char* line_begin = stream_begin + lines[i].first;
334
335 // Here we handle possible continuations. Note that we do not replace
336 // the '\n' in the line before a continuation (at least, as of now),
337 // which implies that any code which looks for a value must deal with
338 // "\r\n", etc -within- the line (and not just at the end of it).
339 for (++i; i < lines_size_m1; ++i) {
340 const char c = *(stream_begin + lines[i].first);
341 if (CHAR_GT(c, ' ')) {
342 // Not a continuation, so stop. Note that if the 'original' i = 1,
343 // and the next line is not a continuation, we'll end up with i = 2
344 // when we break. This handles the incrementing of i for the outer
345 // loop.
346 break;
347 }
348
349 // Space and tab are valid starts to continuation lines.
350 // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
351 // can choose to reject or normalize continuation lines.
352 if ((c != ' ' && c != '\t') ||
353 http_validation_policy().disallow_header_continuation_lines) {
354 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
355 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
356 return false;
357 }
358
359 // If disallow_header_continuation_lines() is false, we neither reject nor
360 // normalize continuation lines, in violation of RFC7230.
361 }
362 const char* line_end = stream_begin + lines[i - 1].second;
363 QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
364
365 // We cleanup the whitespace at the end of the line before doing anything
366 // else of interest as it allows us to do nothing when irregularly formatted
367 // headers are parsed (e.g. those with only keys, only values, or no colon).
368 //
369 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
370 --line_end;
371 QUICHE_DCHECK_EQ('\n', *line_end)
372 << "\"" << std::string(line_begin, line_end) << "\"";
373 while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
374 --line_end;
375 }
376 ++line_end;
377 QUICHE_DCHECK_CHAR_GE(' ', *line_end);
378 QUICHE_DCHECK_LT(line_begin, line_end);
379
380 // We use '0' for the block idx, because we're always writing to the first
381 // block from the framer (we do this because the framer requires that the
382 // entire header sequence be in a contiguous buffer).
383 headers->header_lines_.push_back(HeaderLineDescription(
384 line_begin - stream_begin, line_end - stream_begin,
385 line_end - stream_begin, line_end - stream_begin, 0));
386 if (current >= line_end) {
387 if (http_validation_policy().require_header_colon) {
388 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
389 : BalsaFrameEnums::HEADER_MISSING_COLON);
390 return false;
391 }
392 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
393 : BalsaFrameEnums::HEADER_MISSING_COLON);
394 // Then the next colon will not be found within this header line-- time
395 // to try again with another header-line.
396 continue;
397 }
398 if (current < line_begin) {
399 // When this condition is true, the last detected colon was part of a
400 // previous line. We reset to the beginning of the line as we don't care
401 // about the presence of any colon before the beginning of the current
402 // line.
403 current = line_begin;
404 }
405 for (; current < line_end; ++current) {
406 if (*current == ':') {
407 break;
408 }
409
410 // Generally invalid characters were found earlier.
411 if (http_validation_policy().disallow_double_quote_in_header_name) {
412 if (header_properties::IsInvalidHeaderKeyChar(*current)) {
413 HandleError(is_trailer
414 ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
415 : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
416 return false;
417 }
418 } else if (header_properties::IsInvalidHeaderKeyCharAllowDoubleQuote(
419 *current)) {
420 HandleError(is_trailer
421 ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
422 : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
423 return false;
424 }
425 }
426
427 if (current == line_end) {
428 // There was no colon in the line. The arguments we passed into the
429 // construction for the HeaderLineDescription object should be OK-- it
430 // assumes that the entire content is 'key' by default (which is true, as
431 // there was no colon, there can be no value). Note that this is a
432 // construct which is technically not allowed by the spec.
433
434 // In strict mode, we do treat this invalid value-less key as an error.
435 if (http_validation_policy().require_header_colon) {
436 HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
437 : BalsaFrameEnums::HEADER_MISSING_COLON);
438 return false;
439 }
440 HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
441 : BalsaFrameEnums::HEADER_MISSING_COLON);
442 continue;
443 }
444
445 QUICHE_DCHECK_EQ(*current, ':');
446 QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
447 QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
448
449 HeaderLineDescription& current_header_line = headers->header_lines_.back();
450 current_header_line.key_end_idx = current - stream_begin;
451 current_header_line.value_begin_idx = current_header_line.key_end_idx;
452 if (current < line_end) {
453 ++current_header_line.key_end_idx;
454
455 CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
456 ¤t_header_line);
457 }
458 }
459
460 return true;
461 }
462
HandleWarning(BalsaFrameEnums::ErrorCode error_code)463 void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
464 last_error_ = error_code;
465 visitor_->HandleWarning(last_error_);
466 }
467
HandleError(BalsaFrameEnums::ErrorCode error_code)468 void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
469 last_error_ = error_code;
470 parse_state_ = BalsaFrameEnums::ERROR;
471 visitor_->HandleError(last_error_);
472 }
473
ProcessContentLengthLine(HeaderLines::size_type line_idx,size_t * length)474 BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
475 HeaderLines::size_type line_idx, size_t* length) {
476 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
477 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
478 const char* line_end = stream_begin + header_line.last_char_idx;
479 const char* value_begin = (stream_begin + header_line.value_begin_idx);
480
481 if (value_begin >= line_end) {
482 // There is no non-whitespace value data.
483 QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
484 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
485 }
486
487 *length = 0;
488 while (value_begin < line_end) {
489 if (*value_begin < '0' || *value_begin > '9') {
490 // bad! content-length found, and couldn't parse all of it!
491 QUICHE_DVLOG(1)
492 << "invalid content-length - non numeric character detected";
493 return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
494 }
495 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
496 size_t length_x_10 = *length * 10;
497 const size_t c = *value_begin - '0';
498 if (*length > kMaxDiv10 ||
499 (std::numeric_limits<size_t>::max() - length_x_10) < c) {
500 QUICHE_DVLOG(1) << "content-length overflow";
501 return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
502 }
503 *length = length_x_10 + c;
504 ++value_begin;
505 }
506 QUICHE_DVLOG(1) << "content_length parsed: " << *length;
507 return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
508 }
509
ProcessTransferEncodingLine(HeaderLines::size_type line_idx)510 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
511 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
512 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
513 const absl::string_view transfer_encoding(
514 stream_begin + header_line.value_begin_idx,
515 header_line.last_char_idx - header_line.value_begin_idx);
516
517 if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
518 headers_->transfer_encoding_is_chunked_ = true;
519 return;
520 }
521
522 if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
523 headers_->transfer_encoding_is_chunked_ = false;
524 return;
525 }
526
527 if (http_validation_policy().validate_transfer_encoding) {
528 HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
529 }
530 }
531
CheckHeaderLinesForInvalidChars(const Lines & lines,const BalsaHeaders * headers)532 bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
533 const BalsaHeaders* headers) {
534 // Read from the beginning of the first line to the end of the last line.
535 // Note we need to add the first line's offset as in the case of a trailer
536 // it's non-zero.
537 const char* stream_begin =
538 headers->OriginalHeaderStreamBegin() + lines.front().first;
539 const char* stream_end =
540 headers->OriginalHeaderStreamBegin() + lines.back().second;
541 bool found_invalid = false;
542
543 for (const char* c = stream_begin; c < stream_end; c++) {
544 if (header_properties::IsInvalidHeaderChar(*c)) {
545 found_invalid = true;
546 invalid_chars_[*c]++;
547 }
548 if (*c == '\r' &&
549 http_validation_policy().disallow_lone_cr_in_request_headers &&
550 c + 1 < stream_end && *(c + 1) != '\n') {
551 found_invalid = true;
552 invalid_chars_[*c]++;
553 }
554 }
555
556 return found_invalid;
557 }
558
ProcessHeaderLines(const Lines & lines,bool is_trailer,BalsaHeaders * headers)559 void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
560 BalsaHeaders* headers) {
561 QUICHE_DCHECK(!lines.empty());
562 QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
563
564 if ((is_request() || http_validation_policy()
565 .disallow_invalid_header_characters_in_response) &&
566 track_invalid_chars()) {
567 if (CheckHeaderLinesForInvalidChars(lines, headers)) {
568 if (invalid_chars_error_enabled()) {
569 HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
570 return;
571 }
572
573 HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
574 }
575 }
576
577 // There is no need to attempt to process headers (resp. trailers)
578 // if no header (resp. trailer) lines exist.
579 //
580 // The last line of the message, which is an empty line, is never a header
581 // (resp. trailer) line. Furthermore, the first line of the message is not
582 // a header line. Therefore there are at least two (resp. one) lines in the
583 // message which are not header (resp. trailer) lines.
584 //
585 // Thus, we test to see if we have more than two (resp. one) lines total
586 // before attempting to parse any header (resp. trailer) lines.
587 if (lines.size() <= (is_trailer ? 1 : 2)) {
588 return;
589 }
590
591 HeaderLines::size_type content_length_idx = 0;
592 HeaderLines::size_type transfer_encoding_idx = 0;
593 const char* stream_begin = headers->OriginalHeaderStreamBegin();
594 // Parse the rest of the header or trailer data into key-value pairs.
595 if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
596 return;
597 }
598 // At this point, we've parsed all of the headers/trailers. Time to look
599 // for those headers which we require for framing or for format errors.
600 const HeaderLines::size_type lines_size = headers->header_lines_.size();
601 for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
602 const HeaderLineDescription& line = headers->header_lines_[i];
603 const absl::string_view key(stream_begin + line.first_char_idx,
604 line.key_end_idx - line.first_char_idx);
605 QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
606
607 // If a header begins with either lowercase or uppercase 'c' or 't', then
608 // the header may be one of content-length, connection, content-encoding
609 // or transfer-encoding. These headers are special, as they change the way
610 // that the message is framed, and so the framer is required to search
611 // for them. However, first check for a formatting error, and skip
612 // special header treatment on trailer lines (when is_trailer is true).
613 if (key.empty() || key[0] == ' ') {
614 parse_state_ = BalsaFrameEnums::ERROR;
615 HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
616 : BalsaFrameEnums::INVALID_HEADER_FORMAT);
617 return;
618 }
619 if (is_trailer) {
620 continue;
621 }
622 if (absl::EqualsIgnoreCase(key, kContentLength)) {
623 size_t length = 0;
624 BalsaHeadersEnums::ContentLengthStatus content_length_status =
625 ProcessContentLengthLine(i, &length);
626 if (content_length_idx == 0) {
627 content_length_idx = i + 1;
628 headers->content_length_status_ = content_length_status;
629 headers->content_length_ = length;
630 content_length_remaining_ = length;
631 continue;
632 }
633 if ((headers->content_length_status_ != content_length_status) ||
634 ((headers->content_length_status_ ==
635 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
636 (http_validation_policy().disallow_multiple_content_length ||
637 length != headers->content_length_))) {
638 HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
639 return;
640 }
641 continue;
642 }
643 if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
644 if (http_validation_policy().validate_transfer_encoding &&
645 transfer_encoding_idx != 0) {
646 HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
647 return;
648 }
649 transfer_encoding_idx = i + 1;
650 }
651 }
652
653 if (!is_trailer) {
654 if (http_validation_policy().validate_transfer_encoding &&
655 http_validation_policy()
656 .disallow_transfer_encoding_with_content_length &&
657 content_length_idx != 0 && transfer_encoding_idx != 0) {
658 HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
659 return;
660 }
661 if (headers->transfer_encoding_is_chunked_) {
662 headers->content_length_ = 0;
663 headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
664 content_length_remaining_ = 0;
665 }
666 if (transfer_encoding_idx != 0) {
667 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
668 }
669 }
670 }
671
AssignParseStateAfterHeadersHaveBeenParsed()672 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
673 // For responses, can't have a body if the request was a HEAD, or if it is
674 // one of these response-codes. rfc2616 section 4.3
675 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
676 int response_code = headers_->parsed_response_code_;
677 if (!is_request_ && (request_was_head_ ||
678 !BalsaHeaders::ResponseCanHaveBody(response_code))) {
679 // There is no body.
680 return;
681 }
682
683 if (headers_->transfer_encoding_is_chunked_) {
684 // Note that
685 // if ( Transfer-Encoding: chunked && Content-length: )
686 // then Transfer-Encoding: chunked trumps.
687 // This is as specified in the spec.
688 // rfc2616 section 4.4.3
689 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
690 return;
691 }
692
693 // Errors parsing content-length definitely can cause
694 // protocol errors/warnings
695 switch (headers_->content_length_status_) {
696 // If we have a content-length, and it is parsed
697 // properly, there are two options.
698 // 1) zero content, in which case the message is done, and
699 // 2) nonzero content, in which case we have to
700 // consume the body.
701 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
702 if (headers_->content_length_ == 0) {
703 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
704 } else {
705 parse_state_ = BalsaFrameEnums::READING_CONTENT;
706 }
707 break;
708 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
709 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
710 // If there were characters left-over after parsing the
711 // content length, we should flag an error and stop.
712 HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
713 break;
714 // We can have: no transfer-encoding, no content length, and no
715 // connection: close...
716 // Unfortunately, this case doesn't seem to be covered in the spec.
717 // We'll assume that the safest thing to do here is what the google
718 // binaries before 2008 already do, which is to assume that
719 // everything until the connection is closed is body.
720 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
721 if (is_request_) {
722 const absl::string_view method = headers_->request_method();
723 // POSTs and PUTs should have a detectable body length. If they
724 // do not we consider it an error.
725 if ((method != "POST" && method != "PUT") ||
726 !http_validation_policy().require_content_length_if_body_required) {
727 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
728 break;
729 } else if (!allow_reading_until_close_for_request_) {
730 HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
731 break;
732 }
733 }
734 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
735 HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
736 break;
737 // The COV_NF_... statements here provide hints to the apparatus
738 // which computes coverage reports/ratios that this code is never
739 // intended to be executed, and should technically be impossible.
740 // COV_NF_START
741 default:
742 QUICHE_LOG(FATAL) << "Saw a content_length_status: "
743 << headers_->content_length_status_
744 << " which is unknown.";
745 // COV_NF_END
746 }
747 }
748
ProcessHeaders(const char * message_start,size_t message_length)749 size_t BalsaFrame::ProcessHeaders(const char* message_start,
750 size_t message_length) {
751 const char* const original_message_start = message_start;
752 const char* const message_end = message_start + message_length;
753 const char* message_current = message_start;
754 const char* checkpoint = message_start;
755
756 if (message_length == 0) {
757 return message_current - original_message_start;
758 }
759
760 while (message_current < message_end) {
761 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
762
763 // Yes, we could use strchr (assuming null termination), or
764 // memchr, but as it turns out that is slower than this tight loop
765 // for the input that we see.
766 if (!saw_non_newline_char_) {
767 do {
768 const char c = *message_current;
769 if (c != '\r' && c != '\n') {
770 if (CHAR_LE(c, ' ')) {
771 HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
772 return message_current - original_message_start;
773 }
774 break;
775 }
776 ++message_current;
777 if (message_current == message_end) {
778 return message_current - original_message_start;
779 }
780 } while (true);
781 saw_non_newline_char_ = true;
782 message_start = message_current;
783 checkpoint = message_current;
784 }
785 while (message_current < message_end) {
786 if (*message_current != '\n') {
787 ++message_current;
788 continue;
789 }
790 const size_t relative_idx = message_current - message_start;
791 const size_t message_current_idx = 1 + base_idx + relative_idx;
792 lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
793 if (lines_.size() == 1) {
794 headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
795 checkpoint = message_current + 1;
796 const char* begin = headers_->OriginalHeaderStreamBegin();
797
798 QUICHE_DVLOG(1) << "First line "
799 << std::string(begin, lines_[0].second);
800 QUICHE_DVLOG(1) << "is_request_: " << is_request_;
801 ProcessFirstLine(begin, begin + lines_[0].second);
802 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
803 break;
804 }
805
806 if (parse_state_ == BalsaFrameEnums::ERROR) {
807 return message_current - original_message_start;
808 }
809 }
810 const size_t chars_since_last_slash_n =
811 (message_current_idx - last_slash_n_idx_);
812 last_slash_n_idx_ = message_current_idx;
813 if (chars_since_last_slash_n > 2) {
814 // false positive.
815 ++message_current;
816 continue;
817 }
818 if ((chars_since_last_slash_n == 1) ||
819 (((message_current > message_start) &&
820 (*(message_current - 1) == '\r')) ||
821 (last_char_was_slash_r_))) {
822 break;
823 }
824 ++message_current;
825 }
826
827 if (message_current == message_end) {
828 continue;
829 }
830
831 ++message_current;
832 QUICHE_DCHECK(message_current >= message_start);
833 if (message_current > message_start) {
834 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
835 }
836
837 // Check if we have exceeded maximum headers length
838 // Although we check for this limit before and after we call this function
839 // we check it here as well to make sure that in case the visitor changed
840 // the max_header_length_ (for example after processing the first line)
841 // we handle it gracefully.
842 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
843 HandleHeadersTooLongError();
844 return message_current - original_message_start;
845 }
846
847 // Since we know that we won't be writing any more bytes of the header,
848 // we tell that to the headers object. The headers object may make
849 // more efficient allocation decisions when this is signaled.
850 headers_->DoneWritingFromFramer();
851 visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
852
853 // Ok, now that we've written everything into our header buffer, it is
854 // time to process the header lines (extract proper values for headers
855 // which are important for framing).
856 ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
857 if (parse_state_ == BalsaFrameEnums::ERROR) {
858 return message_current - original_message_start;
859 }
860
861 if (use_interim_headers_callback_ &&
862 IsInterimResponse(headers_->parsed_response_code()) &&
863 headers_->parsed_response_code() != kSwitchingProtocolsStatusCode) {
864 // Deliver headers from this interim response but reset everything else to
865 // prepare for the next set of headers. Skip 101 Switching Protocols
866 // because these are considered final headers for the current protocol.
867 visitor_->OnInterimHeaders(
868 std::make_unique<BalsaHeaders>(std::move(*headers_)));
869 Reset();
870 checkpoint = message_start = message_current;
871 continue;
872 }
873 if (continue_headers_ != nullptr &&
874 headers_->parsed_response_code_ == kContinueStatusCode) {
875 // Save the headers from this 100 Continue response but reset everything
876 // else to prepare for the next set of headers.
877 BalsaHeaders saved_continue_headers = std::move(*headers_);
878 Reset();
879 *continue_headers_ = std::move(saved_continue_headers);
880 visitor_->ContinueHeaderDone();
881 checkpoint = message_start = message_current;
882 continue;
883 }
884 AssignParseStateAfterHeadersHaveBeenParsed();
885 if (parse_state_ == BalsaFrameEnums::ERROR) {
886 return message_current - original_message_start;
887 }
888 visitor_->ProcessHeaders(*headers_);
889 visitor_->HeaderDone();
890 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
891 visitor_->MessageDone();
892 }
893 return message_current - original_message_start;
894 }
895 // If we've gotten to here, it means that we've consumed all of the
896 // available input. We need to record whether or not the last character we
897 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
898 // a header framing that is split across the two calls.
899 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
900 QUICHE_DCHECK(message_current >= message_start);
901 if (message_current > message_start) {
902 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
903 }
904 return message_current - original_message_start;
905 }
906
BytesSafeToSplice() const907 size_t BalsaFrame::BytesSafeToSplice() const {
908 switch (parse_state_) {
909 case BalsaFrameEnums::READING_CHUNK_DATA:
910 return chunk_length_remaining_;
911 case BalsaFrameEnums::READING_UNTIL_CLOSE:
912 return std::numeric_limits<size_t>::max();
913 case BalsaFrameEnums::READING_CONTENT:
914 return content_length_remaining_;
915 default:
916 return 0;
917 }
918 }
919
BytesSpliced(size_t bytes_spliced)920 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
921 switch (parse_state_) {
922 case BalsaFrameEnums::READING_CHUNK_DATA:
923 if (chunk_length_remaining_ < bytes_spliced) {
924 HandleError(BalsaFrameEnums::
925 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
926 return;
927 }
928 chunk_length_remaining_ -= bytes_spliced;
929 if (chunk_length_remaining_ == 0) {
930 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
931 }
932 return;
933
934 case BalsaFrameEnums::READING_UNTIL_CLOSE:
935 return;
936
937 case BalsaFrameEnums::READING_CONTENT:
938 if (content_length_remaining_ < bytes_spliced) {
939 HandleError(BalsaFrameEnums::
940 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
941 return;
942 }
943 content_length_remaining_ -= bytes_spliced;
944 if (content_length_remaining_ == 0) {
945 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
946 visitor_->MessageDone();
947 }
948 return;
949
950 default:
951 HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
952 return;
953 }
954 }
955
ProcessInput(const char * input,size_t size)956 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
957 const char* current = input;
958 const char* on_entry = current;
959 const char* end = current + size;
960
961 QUICHE_DCHECK(headers_ != nullptr);
962 if (headers_ == nullptr) {
963 return 0;
964 }
965
966 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
967 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
968 // Yes, we still have to check this here as the user can change the
969 // max_header_length amount!
970 // Also it is possible that we have reached the maximum allowed header size,
971 // and we have more to consume (remember we are still inside
972 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
973 if (header_length > max_header_length_ ||
974 (header_length == max_header_length_ && size > 0)) {
975 HandleHeadersTooLongError();
976 return current - input;
977 }
978 const size_t bytes_to_process =
979 std::min(max_header_length_ - header_length, size);
980 current += ProcessHeaders(input, bytes_to_process);
981 // If we are still reading headers check if we have crossed the headers
982 // limit. Note that we check for >= as opposed to >. This is because if
983 // header_length_after equals max_header_length_ and we are still in the
984 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
985 // sure that the headers limit will be crossed later on
986 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
987 // Note that headers_ is valid only if we are still reading headers.
988 const size_t header_length_after =
989 headers_->GetReadableBytesFromHeaderStream();
990 if (header_length_after >= max_header_length_) {
991 HandleHeadersTooLongError();
992 }
993 }
994 return current - input;
995 }
996
997 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
998 parse_state_ == BalsaFrameEnums::ERROR) {
999 // Can do nothing more 'till we're reset.
1000 return current - input;
1001 }
1002
1003 QUICHE_DCHECK_LE(current, end);
1004 if (current == end) {
1005 return current - input;
1006 }
1007
1008 while (true) {
1009 switch (parse_state_) {
1010 case BalsaFrameEnums::READING_CHUNK_LENGTH:
1011 // In this state we read the chunk length.
1012 // Note that once we hit a character which is not in:
1013 // [0-9;A-Fa-f\n], we transition to a different state.
1014 //
1015 QUICHE_DCHECK_LE(current, end);
1016 while (true) {
1017 if (current == end) {
1018 visitor_->OnRawBodyInput(
1019 absl::string_view(on_entry, current - on_entry));
1020 return current - input;
1021 }
1022
1023 const char c = *current;
1024 ++current;
1025
1026 static const signed char kBad = -1;
1027 static const signed char kDelimiter = -2;
1028
1029 // valid cases:
1030 // "09123\n" // -> 09123
1031 // "09123\r\n" // -> 09123
1032 // "09123 \n" // -> 09123
1033 // "09123 \r\n" // -> 09123
1034 // "09123 12312\n" // -> 09123
1035 // "09123 12312\r\n" // -> 09123
1036 // "09123; foo=bar\n" // -> 09123
1037 // "09123; foo=bar\r\n" // -> 09123
1038 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
1039 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
1040 // invalid cases:
1041 // "[ \t]+[^\n]*\n"
1042 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
1043 // "\r\n"
1044 // "\n"
1045 signed char addition = kBad;
1046 // clang-format off
1047 switch (c) {
1048 case '0': addition = 0; break;
1049 case '1': addition = 1; break;
1050 case '2': addition = 2; break;
1051 case '3': addition = 3; break;
1052 case '4': addition = 4; break;
1053 case '5': addition = 5; break;
1054 case '6': addition = 6; break;
1055 case '7': addition = 7; break;
1056 case '8': addition = 8; break;
1057 case '9': addition = 9; break;
1058 case 'a': addition = 0xA; break;
1059 case 'b': addition = 0xB; break;
1060 case 'c': addition = 0xC; break;
1061 case 'd': addition = 0xD; break;
1062 case 'e': addition = 0xE; break;
1063 case 'f': addition = 0xF; break;
1064 case 'A': addition = 0xA; break;
1065 case 'B': addition = 0xB; break;
1066 case 'C': addition = 0xC; break;
1067 case 'D': addition = 0xD; break;
1068 case 'E': addition = 0xE; break;
1069 case 'F': addition = 0xF; break;
1070 case '\t':
1071 case '\n':
1072 case '\r':
1073 case ' ':
1074 case ';':
1075 addition = kDelimiter;
1076 break;
1077 default:
1078 // Leave addition == kBad
1079 break;
1080 }
1081 // clang-format on
1082 if (addition >= 0) {
1083 chunk_length_character_extracted_ = true;
1084 size_t length_x_16 = chunk_length_remaining_ * 16;
1085 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1086 if ((chunk_length_remaining_ > kMaxDiv16) ||
1087 (std::numeric_limits<size_t>::max() - length_x_16) <
1088 static_cast<size_t>(addition)) {
1089 // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1090 visitor_->OnRawBodyInput(
1091 absl::string_view(on_entry, current - on_entry));
1092 HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
1093 return current - input;
1094 }
1095 chunk_length_remaining_ = length_x_16 + addition;
1096 continue;
1097 }
1098
1099 if (!chunk_length_character_extracted_ || addition == kBad) {
1100 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1101 // characters were converted, or an unexpected character was
1102 // seen.
1103 visitor_->OnRawBodyInput(
1104 absl::string_view(on_entry, current - on_entry));
1105 HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
1106 return current - input;
1107 }
1108
1109 break;
1110 }
1111
1112 --current;
1113 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1114 visitor_->OnChunkLength(chunk_length_remaining_);
1115 continue;
1116
1117 case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1118 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1119 // there is data to be read.
1120 const char* extensions_start = current;
1121 size_t extensions_length = 0;
1122 QUICHE_DCHECK_LE(current, end);
1123 while (true) {
1124 if (current == end) {
1125 visitor_->OnChunkExtensionInput(
1126 absl::string_view(extensions_start, extensions_length));
1127 visitor_->OnRawBodyInput(
1128 absl::string_view(on_entry, current - on_entry));
1129 return current - input;
1130 }
1131 const char c = *current;
1132 if (http_validation_policy_.disallow_lone_cr_in_chunk_extension &&
1133 c == '\r' && (current + 1 == end || *(current + 1) != '\n')) {
1134 // We have a lone carriage return.
1135 HandleError(BalsaFrameEnums::INVALID_CHUNK_EXTENSION);
1136 return current - input;
1137 }
1138 if (c == '\r' || c == '\n') {
1139 extensions_length = (extensions_start == current)
1140 ? 0
1141 : current - extensions_start - 1;
1142 }
1143
1144 ++current;
1145 if (c == '\n') {
1146 break;
1147 }
1148 }
1149
1150 chunk_length_character_extracted_ = false;
1151 visitor_->OnChunkExtensionInput(
1152 absl::string_view(extensions_start, extensions_length));
1153
1154 if (chunk_length_remaining_ != 0) {
1155 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1156 continue;
1157 }
1158
1159 HeaderFramingFound('\n');
1160 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1161 continue;
1162 }
1163
1164 case BalsaFrameEnums::READING_CHUNK_DATA:
1165 while (current < end) {
1166 if (chunk_length_remaining_ == 0) {
1167 break;
1168 }
1169 // read in the chunk
1170 size_t bytes_remaining = end - current;
1171 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1172 ? chunk_length_remaining_
1173 : bytes_remaining;
1174 const char* tmp_current = current + consumed_bytes;
1175 visitor_->OnRawBodyInput(
1176 absl::string_view(on_entry, tmp_current - on_entry));
1177 visitor_->OnBodyChunkInput(
1178 absl::string_view(current, consumed_bytes));
1179 on_entry = current = tmp_current;
1180 chunk_length_remaining_ -= consumed_bytes;
1181 }
1182
1183 if (chunk_length_remaining_ == 0) {
1184 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1185 continue;
1186 }
1187
1188 visitor_->OnRawBodyInput(
1189 absl::string_view(on_entry, current - on_entry));
1190 return current - input;
1191
1192 case BalsaFrameEnums::READING_CHUNK_TERM:
1193 QUICHE_DCHECK_LE(current, end);
1194 while (true) {
1195 if (current == end) {
1196 visitor_->OnRawBodyInput(
1197 absl::string_view(on_entry, current - on_entry));
1198 return current - input;
1199 }
1200
1201 const char c = *current;
1202 ++current;
1203
1204 if (c == '\n') {
1205 break;
1206 }
1207 }
1208 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1209 continue;
1210
1211 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1212 QUICHE_DCHECK_LE(current, end);
1213 while (true) {
1214 if (current == end) {
1215 visitor_->OnRawBodyInput(
1216 absl::string_view(on_entry, current - on_entry));
1217 return current - input;
1218 }
1219
1220 const char c = *current;
1221 if (HeaderFramingFound(c) != 0) {
1222 // If we've found a "\r\n\r\n", then the message
1223 // is done.
1224 ++current;
1225 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1226 visitor_->OnRawBodyInput(
1227 absl::string_view(on_entry, current - on_entry));
1228 visitor_->MessageDone();
1229 return current - input;
1230 }
1231
1232 // If not, however, since the spec only suggests that the
1233 // client SHOULD indicate the presence of trailers, we get to
1234 // *test* that they did or didn't.
1235 // If all of the bytes we've seen since:
1236 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1237 // are either '\r', or '\n', then we can assume that we don't yet
1238 // know if we need to parse headers, or if the next byte will make
1239 // the HeaderFramingFound condition (above) true.
1240 if (!HeaderFramingMayBeFound()) {
1241 break;
1242 }
1243
1244 // If HeaderFramingMayBeFound(), then we have seen only characters
1245 // '\r' or '\n'.
1246 ++current;
1247
1248 // Lets try again! There is no state change here.
1249 }
1250
1251 // If (!HeaderFramingMayBeFound()), then we know that we must be
1252 // reading the first non CRLF character of a trailer.
1253 parse_state_ = BalsaFrameEnums::READING_TRAILER;
1254 visitor_->OnRawBodyInput(
1255 absl::string_view(on_entry, current - on_entry));
1256 on_entry = current;
1257 continue;
1258
1259 // TODO(yongfa): No leading whitespace is allowed before field-name per
1260 // RFC2616. Leading whitespace will cause header parsing error too.
1261 case BalsaFrameEnums::READING_TRAILER:
1262 while (current < end) {
1263 const char c = *current;
1264 ++current;
1265 ++trailer_length_;
1266 if (trailers_ != nullptr) {
1267 // Reuse the header length limit for trailer, which is just a bunch
1268 // of headers.
1269 if (trailer_length_ > max_header_length_) {
1270 --current;
1271 HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
1272 return current - input;
1273 }
1274 if (LineFramingFound(c)) {
1275 trailer_lines_.push_back(
1276 std::make_pair(start_of_trailer_line_, trailer_length_));
1277 start_of_trailer_line_ = trailer_length_;
1278 }
1279 }
1280 if (HeaderFramingFound(c) != 0) {
1281 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1282 if (trailers_ != nullptr) {
1283 trailers_->WriteFromFramer(on_entry, current - on_entry);
1284 trailers_->DoneWritingFromFramer();
1285 ProcessHeaderLines(trailer_lines_, true /*is_trailer*/,
1286 trailers_.get());
1287 if (parse_state_ == BalsaFrameEnums::ERROR) {
1288 return current - input;
1289 }
1290 visitor_->OnTrailers(std::move(trailers_));
1291
1292 // Allows trailers to be delivered without another call to
1293 // EnableTrailers() in case the framer is Reset().
1294 trailers_ = std::make_unique<BalsaHeaders>();
1295 }
1296 visitor_->OnTrailerInput(
1297 absl::string_view(on_entry, current - on_entry));
1298 visitor_->MessageDone();
1299 return current - input;
1300 }
1301 }
1302 if (trailers_ != nullptr) {
1303 trailers_->WriteFromFramer(on_entry, current - on_entry);
1304 }
1305 visitor_->OnTrailerInput(
1306 absl::string_view(on_entry, current - on_entry));
1307 return current - input;
1308
1309 case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1310 const size_t bytes_remaining = end - current;
1311 if (bytes_remaining > 0) {
1312 visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
1313 visitor_->OnBodyChunkInput(
1314 absl::string_view(current, bytes_remaining));
1315 current += bytes_remaining;
1316 }
1317 return current - input;
1318 }
1319
1320 case BalsaFrameEnums::READING_CONTENT:
1321 while ((content_length_remaining_ != 0u) && current < end) {
1322 // read in the content
1323 const size_t bytes_remaining = end - current;
1324 const size_t consumed_bytes =
1325 (content_length_remaining_ < bytes_remaining)
1326 ? content_length_remaining_
1327 : bytes_remaining;
1328 visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
1329 visitor_->OnBodyChunkInput(
1330 absl::string_view(current, consumed_bytes));
1331 current += consumed_bytes;
1332 content_length_remaining_ -= consumed_bytes;
1333 }
1334 if (content_length_remaining_ == 0) {
1335 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1336 visitor_->MessageDone();
1337 }
1338 return current - input;
1339
1340 default:
1341 // The state-machine should never be in a state that isn't handled
1342 // above. This is a glaring logic error, and we should do something
1343 // drastic to ensure that this gets looked-at and fixed.
1344 QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1345 << " memory corruption?!"; // COV_NF_LINE
1346 }
1347 }
1348 }
1349
HandleHeadersTooLongError()1350 void BalsaFrame::HandleHeadersTooLongError() {
1351 if (parse_truncated_headers_even_when_headers_too_long_) {
1352 const size_t len = headers_->GetReadableBytesFromHeaderStream();
1353 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
1354
1355 if (last_slash_n_idx_ < len && stream_begin[last_slash_n_idx_] != '\r') {
1356 // We write an end to the truncated line, and a blank line to end the
1357 // headers, to end up with something that will parse.
1358 static const absl::string_view kTwoLineEnds = "\r\n\r\n";
1359 headers_->WriteFromFramer(kTwoLineEnds.data(), kTwoLineEnds.size());
1360
1361 // This is the last, truncated line.
1362 lines_.push_back(std::make_pair(last_slash_n_idx_, len + 2));
1363 // A blank line to end the headers.
1364 lines_.push_back(std::make_pair(len + 2, len + 4));
1365 }
1366
1367 ProcessHeaderLines(lines_, /*is_trailer=*/false, headers_);
1368 }
1369
1370 HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
1371 }
1372
1373 const int32_t BalsaFrame::kValidTerm1;
1374 const int32_t BalsaFrame::kValidTerm1Mask;
1375 const int32_t BalsaFrame::kValidTerm2;
1376 const int32_t BalsaFrame::kValidTerm2Mask;
1377
1378 } // namespace quiche
1379
1380 #undef CHAR_LT
1381 #undef CHAR_LE
1382 #undef CHAR_GT
1383 #undef CHAR_GE
1384 #undef QUICHE_DCHECK_CHAR_GE
1385