1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The wire protocol for HTTP's "chunked" Transfer-Encoding.
6
7// Package internal contains HTTP internals shared by net/http and
8// net/http/httputil.
9package internal
10
11import (
12	"bufio"
13	"bytes"
14	"errors"
15	"fmt"
16	"io"
17)
18
19const maxLineLength = 4096 // assumed <= bufio.defaultBufSize
20
21var ErrLineTooLong = errors.New("header line too long")
22
23// NewChunkedReader returns a new chunkedReader that translates the data read from r
24// out of HTTP "chunked" format before returning it.
25// The chunkedReader returns [io.EOF] when the final 0-length chunk is read.
26//
27// NewChunkedReader is not needed by normal applications. The http package
28// automatically decodes chunking when reading response bodies.
29func NewChunkedReader(r io.Reader) io.Reader {
30	br, ok := r.(*bufio.Reader)
31	if !ok {
32		br = bufio.NewReader(r)
33	}
34	return &chunkedReader{r: br}
35}
36
37type chunkedReader struct {
38	r        *bufio.Reader
39	n        uint64 // unread bytes in chunk
40	err      error
41	buf      [2]byte
42	checkEnd bool  // whether need to check for \r\n chunk footer
43	excess   int64 // "excessive" chunk overhead, for malicious sender detection
44}
45
46func (cr *chunkedReader) beginChunk() {
47	// chunk-size CRLF
48	var line []byte
49	line, cr.err = readChunkLine(cr.r)
50	if cr.err != nil {
51		return
52	}
53	cr.excess += int64(len(line)) + 2 // header, plus \r\n after the chunk data
54	line = trimTrailingWhitespace(line)
55	line, cr.err = removeChunkExtension(line)
56	if cr.err != nil {
57		return
58	}
59	cr.n, cr.err = parseHexUint(line)
60	if cr.err != nil {
61		return
62	}
63	// A sender who sends one byte per chunk will send 5 bytes of overhead
64	// for every byte of data. ("1\r\nX\r\n" to send "X".)
65	// We want to allow this, since streaming a byte at a time can be legitimate.
66	//
67	// A sender can use chunk extensions to add arbitrary amounts of additional
68	// data per byte read. ("1;very long extension\r\nX\r\n" to send "X".)
69	// We don't want to disallow extensions (although we discard them),
70	// but we also don't want to allow a sender to reduce the signal/noise ratio
71	// arbitrarily.
72	//
73	// We track the amount of excess overhead read,
74	// and produce an error if it grows too large.
75	//
76	// Currently, we say that we're willing to accept 16 bytes of overhead per chunk,
77	// plus twice the amount of real data in the chunk.
78	cr.excess -= 16 + (2 * int64(cr.n))
79	cr.excess = max(cr.excess, 0)
80	if cr.excess > 16*1024 {
81		cr.err = errors.New("chunked encoding contains too much non-data")
82	}
83	if cr.n == 0 {
84		cr.err = io.EOF
85	}
86}
87
88func (cr *chunkedReader) chunkHeaderAvailable() bool {
89	n := cr.r.Buffered()
90	if n > 0 {
91		peek, _ := cr.r.Peek(n)
92		return bytes.IndexByte(peek, '\n') >= 0
93	}
94	return false
95}
96
97func (cr *chunkedReader) Read(b []uint8) (n int, err error) {
98	for cr.err == nil {
99		if cr.checkEnd {
100			if n > 0 && cr.r.Buffered() < 2 {
101				// We have some data. Return early (per the io.Reader
102				// contract) instead of potentially blocking while
103				// reading more.
104				break
105			}
106			if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil {
107				if string(cr.buf[:]) != "\r\n" {
108					cr.err = errors.New("malformed chunked encoding")
109					break
110				}
111			} else {
112				if cr.err == io.EOF {
113					cr.err = io.ErrUnexpectedEOF
114				}
115				break
116			}
117			cr.checkEnd = false
118		}
119		if cr.n == 0 {
120			if n > 0 && !cr.chunkHeaderAvailable() {
121				// We've read enough. Don't potentially block
122				// reading a new chunk header.
123				break
124			}
125			cr.beginChunk()
126			continue
127		}
128		if len(b) == 0 {
129			break
130		}
131		rbuf := b
132		if uint64(len(rbuf)) > cr.n {
133			rbuf = rbuf[:cr.n]
134		}
135		var n0 int
136		n0, cr.err = cr.r.Read(rbuf)
137		n += n0
138		b = b[n0:]
139		cr.n -= uint64(n0)
140		// If we're at the end of a chunk, read the next two
141		// bytes to verify they are "\r\n".
142		if cr.n == 0 && cr.err == nil {
143			cr.checkEnd = true
144		} else if cr.err == io.EOF {
145			cr.err = io.ErrUnexpectedEOF
146		}
147	}
148	return n, cr.err
149}
150
151// Read a line of bytes (up to \n) from b.
152// Give up if the line exceeds maxLineLength.
153// The returned bytes are owned by the bufio.Reader
154// so they are only valid until the next bufio read.
155func readChunkLine(b *bufio.Reader) ([]byte, error) {
156	p, err := b.ReadSlice('\n')
157	if err != nil {
158		// We always know when EOF is coming.
159		// If the caller asked for a line, there should be a line.
160		if err == io.EOF {
161			err = io.ErrUnexpectedEOF
162		} else if err == bufio.ErrBufferFull {
163			err = ErrLineTooLong
164		}
165		return nil, err
166	}
167	if len(p) >= maxLineLength {
168		return nil, ErrLineTooLong
169	}
170	return p, nil
171}
172
173func trimTrailingWhitespace(b []byte) []byte {
174	for len(b) > 0 && isASCIISpace(b[len(b)-1]) {
175		b = b[:len(b)-1]
176	}
177	return b
178}
179
180func isASCIISpace(b byte) bool {
181	return b == ' ' || b == '\t' || b == '\n' || b == '\r'
182}
183
184var semi = []byte(";")
185
186// removeChunkExtension removes any chunk-extension from p.
187// For example,
188//
189//	"0" => "0"
190//	"0;token" => "0"
191//	"0;token=val" => "0"
192//	`0;token="quoted string"` => "0"
193func removeChunkExtension(p []byte) ([]byte, error) {
194	p, _, _ = bytes.Cut(p, semi)
195	// TODO: care about exact syntax of chunk extensions? We're
196	// ignoring and stripping them anyway. For now just never
197	// return an error.
198	return p, nil
199}
200
201// NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP
202// "chunked" format before writing them to w. Closing the returned chunkedWriter
203// sends the final 0-length chunk that marks the end of the stream but does
204// not send the final CRLF that appears after trailers; trailers and the last
205// CRLF must be written separately.
206//
207// NewChunkedWriter is not needed by normal applications. The http
208// package adds chunking automatically if handlers don't set a
209// Content-Length header. Using newChunkedWriter inside a handler
210// would result in double chunking or chunking with a Content-Length
211// length, both of which are wrong.
212func NewChunkedWriter(w io.Writer) io.WriteCloser {
213	return &chunkedWriter{w}
214}
215
216// Writing to chunkedWriter translates to writing in HTTP chunked Transfer
217// Encoding wire format to the underlying Wire chunkedWriter.
218type chunkedWriter struct {
219	Wire io.Writer
220}
221
222// Write the contents of data as one chunk to Wire.
223// NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has
224// a bug since it does not check for success of [io.WriteString]
225func (cw *chunkedWriter) Write(data []byte) (n int, err error) {
226
227	// Don't send 0-length data. It looks like EOF for chunked encoding.
228	if len(data) == 0 {
229		return 0, nil
230	}
231
232	if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil {
233		return 0, err
234	}
235	if n, err = cw.Wire.Write(data); err != nil {
236		return
237	}
238	if n != len(data) {
239		err = io.ErrShortWrite
240		return
241	}
242	if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil {
243		return
244	}
245	if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok {
246		err = bw.Flush()
247	}
248	return
249}
250
251func (cw *chunkedWriter) Close() error {
252	_, err := io.WriteString(cw.Wire, "0\r\n")
253	return err
254}
255
256// FlushAfterChunkWriter signals from the caller of [NewChunkedWriter]
257// that each chunk should be followed by a flush. It is used by the
258// [net/http.Transport] code to keep the buffering behavior for headers and
259// trailers, but flush out chunks aggressively in the middle for
260// request bodies which may be generated slowly. See Issue 6574.
261type FlushAfterChunkWriter struct {
262	*bufio.Writer
263}
264
265func parseHexUint(v []byte) (n uint64, err error) {
266	if len(v) == 0 {
267		return 0, errors.New("empty hex number for chunk length")
268	}
269	for i, b := range v {
270		switch {
271		case '0' <= b && b <= '9':
272			b = b - '0'
273		case 'a' <= b && b <= 'f':
274			b = b - 'a' + 10
275		case 'A' <= b && b <= 'F':
276			b = b - 'A' + 10
277		default:
278			return 0, errors.New("invalid byte in chunk length")
279		}
280		if i == 16 {
281			return 0, errors.New("http chunk length too large")
282		}
283		n <<= 4
284		n |= uint64(b)
285	}
286	return
287}
288