1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5 6/* 7Package multipart implements MIME multipart parsing, as defined in RFC 82046. 9 10The implementation is sufficient for HTTP (RFC 2388) and the multipart 11bodies generated by popular browsers. 12 13# Limits 14 15To protect against malicious inputs, this package sets limits on the size 16of the MIME data it processes. 17 18[Reader.NextPart] and [Reader.NextRawPart] limit the number of headers in a 19part to 10000 and [Reader.ReadForm] limits the total number of headers in all 20FileHeaders to 10000. 21These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> 22setting. 23 24Reader.ReadForm further limits the number of parts in a form to 1000. 25This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> 26setting. 27*/ 28package multipart 29 30import ( 31 "bufio" 32 "bytes" 33 "fmt" 34 "internal/godebug" 35 "io" 36 "mime" 37 "mime/quotedprintable" 38 "net/textproto" 39 "path/filepath" 40 "strconv" 41 "strings" 42) 43 44var emptyParams = make(map[string]string) 45 46// This constant needs to be at least 76 for this package to work correctly. 47// This is because \r\n--separator_of_len_70- would fill the buffer and it 48// wouldn't be safe to consume a single byte from it. 49const peekBufferSize = 4096 50 51// A Part represents a single part in a multipart body. 52type Part struct { 53 // The headers of the body, if any, with the keys canonicalized 54 // in the same fashion that the Go http.Request headers are. 55 // For example, "foo-bar" changes case to "Foo-Bar" 56 Header textproto.MIMEHeader 57 58 mr *Reader 59 60 disposition string 61 dispositionParams map[string]string 62 63 // r is either a reader directly reading from mr, or it's a 64 // wrapper around such a reader, decoding the 65 // Content-Transfer-Encoding 66 r io.Reader 67 68 n int // known data bytes waiting in mr.bufReader 69 total int64 // total data bytes read already 70 err error // error to return when n == 0 71 readErr error // read error observed from mr.bufReader 72} 73 74// FormName returns the name parameter if p has a Content-Disposition 75// of type "form-data". Otherwise it returns the empty string. 76func (p *Part) FormName() string { 77 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 78 // of Content-Disposition value format. 79 if p.dispositionParams == nil { 80 p.parseContentDisposition() 81 } 82 if p.disposition != "form-data" { 83 return "" 84 } 85 return p.dispositionParams["name"] 86} 87 88// FileName returns the filename parameter of the [Part]'s Content-Disposition 89// header. If not empty, the filename is passed through filepath.Base (which is 90// platform dependent) before being returned. 91func (p *Part) FileName() string { 92 if p.dispositionParams == nil { 93 p.parseContentDisposition() 94 } 95 filename := p.dispositionParams["filename"] 96 if filename == "" { 97 return "" 98 } 99 // RFC 7578, Section 4.2 requires that if a filename is provided, the 100 // directory path information must not be used. 101 return filepath.Base(filename) 102} 103 104func (p *Part) parseContentDisposition() { 105 v := p.Header.Get("Content-Disposition") 106 var err error 107 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 108 if err != nil { 109 p.dispositionParams = emptyParams 110 } 111} 112 113// NewReader creates a new multipart [Reader] reading from r using the 114// given MIME boundary. 115// 116// The boundary is usually obtained from the "boundary" parameter of 117// the message's "Content-Type" header. Use [mime.ParseMediaType] to 118// parse such headers. 119func NewReader(r io.Reader, boundary string) *Reader { 120 b := []byte("\r\n--" + boundary + "--") 121 return &Reader{ 122 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 123 nl: b[:2], 124 nlDashBoundary: b[:len(b)-2], 125 dashBoundaryDash: b[2:], 126 dashBoundary: b[2 : len(b)-2], 127 } 128} 129 130// stickyErrorReader is an io.Reader which never calls Read on its 131// underlying Reader once an error has been seen. (the io.Reader 132// interface's contract promises nothing about the return values of 133// Read calls after an error, yet this package does do multiple Reads 134// after error) 135type stickyErrorReader struct { 136 r io.Reader 137 err error 138} 139 140func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 141 if r.err != nil { 142 return 0, r.err 143 } 144 n, r.err = r.r.Read(p) 145 return n, r.err 146} 147 148func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 149 bp := &Part{ 150 Header: make(map[string][]string), 151 mr: mr, 152 } 153 if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil { 154 return nil, err 155 } 156 bp.r = partReader{bp} 157 158 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 159 if !rawPart { 160 const cte = "Content-Transfer-Encoding" 161 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 162 bp.Header.Del(cte) 163 bp.r = quotedprintable.NewReader(bp.r) 164 } 165 } 166 return bp, nil 167} 168 169func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error { 170 r := textproto.NewReader(p.mr.bufReader) 171 header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders) 172 if err == nil { 173 p.Header = header 174 } 175 // TODO: Add a distinguishable error to net/textproto. 176 if err != nil && err.Error() == "message too large" { 177 err = ErrMessageTooLarge 178 } 179 return err 180} 181 182// Read reads the body of a part, after its headers and before the 183// next part (if any) begins. 184func (p *Part) Read(d []byte) (n int, err error) { 185 return p.r.Read(d) 186} 187 188// partReader implements io.Reader by reading raw bytes directly from the 189// wrapped *Part, without doing any Transfer-Encoding decoding. 190type partReader struct { 191 p *Part 192} 193 194func (pr partReader) Read(d []byte) (int, error) { 195 p := pr.p 196 br := p.mr.bufReader 197 198 // Read into buffer until we identify some data to return, 199 // or we find a reason to stop (boundary or read error). 200 for p.n == 0 && p.err == nil { 201 peek, _ := br.Peek(br.Buffered()) 202 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 203 if p.n == 0 && p.err == nil { 204 // Force buffered I/O to read more into buffer. 205 _, p.readErr = br.Peek(len(peek) + 1) 206 if p.readErr == io.EOF { 207 p.readErr = io.ErrUnexpectedEOF 208 } 209 } 210 } 211 212 // Read out from "data to return" part of buffer. 213 if p.n == 0 { 214 return 0, p.err 215 } 216 n := len(d) 217 if n > p.n { 218 n = p.n 219 } 220 n, _ = br.Read(d[:n]) 221 p.total += int64(n) 222 p.n -= n 223 if p.n == 0 { 224 return n, p.err 225 } 226 return n, nil 227} 228 229// scanUntilBoundary scans buf to identify how much of it can be safely 230// returned as part of the Part body. 231// dashBoundary is "--boundary". 232// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 233// The comments below (and the name) assume "\n--boundary", but either is accepted. 234// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 235// readErr is the read error, if any, that followed reading the bytes in buf. 236// scanUntilBoundary returns the number of data bytes from buf that can be 237// returned as part of the Part body and also the error to return (if any) 238// once those data bytes are done. 239func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 240 if total == 0 { 241 // At beginning of body, allow dashBoundary. 242 if bytes.HasPrefix(buf, dashBoundary) { 243 switch matchAfterPrefix(buf, dashBoundary, readErr) { 244 case -1: 245 return len(dashBoundary), nil 246 case 0: 247 return 0, nil 248 case +1: 249 return 0, io.EOF 250 } 251 } 252 if bytes.HasPrefix(dashBoundary, buf) { 253 return 0, readErr 254 } 255 } 256 257 // Search for "\n--boundary". 258 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 259 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 260 case -1: 261 return i + len(nlDashBoundary), nil 262 case 0: 263 return i, nil 264 case +1: 265 return i, io.EOF 266 } 267 } 268 if bytes.HasPrefix(nlDashBoundary, buf) { 269 return 0, readErr 270 } 271 272 // Otherwise, anything up to the final \n is not part of the boundary 273 // and so must be part of the body. 274 // Also if the section from the final \n onward is not a prefix of the boundary, 275 // it too must be part of the body. 276 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 277 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 278 return i, nil 279 } 280 return len(buf), readErr 281} 282 283// matchAfterPrefix checks whether buf should be considered to match the boundary. 284// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 285// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 286// 287// matchAfterPrefix returns +1 if the buffer does match the boundary, 288// meaning the prefix is followed by a double dash, space, tab, cr, nl, 289// or end of input. 290// It returns -1 if the buffer definitely does NOT match the boundary, 291// meaning the prefix is followed by some other character. 292// For example, "--foobar" does not match "--foo". 293// It returns 0 more input needs to be read to make the decision, 294// meaning that len(buf) == len(prefix) and readErr == nil. 295func matchAfterPrefix(buf, prefix []byte, readErr error) int { 296 if len(buf) == len(prefix) { 297 if readErr != nil { 298 return +1 299 } 300 return 0 301 } 302 c := buf[len(prefix)] 303 304 if c == ' ' || c == '\t' || c == '\r' || c == '\n' { 305 return +1 306 } 307 308 // Try to detect boundaryDash 309 if c == '-' { 310 if len(buf) == len(prefix)+1 { 311 if readErr != nil { 312 // Prefix + "-" does not match 313 return -1 314 } 315 return 0 316 } 317 if buf[len(prefix)+1] == '-' { 318 return +1 319 } 320 } 321 322 return -1 323} 324 325func (p *Part) Close() error { 326 io.Copy(io.Discard, p) 327 return nil 328} 329 330// Reader is an iterator over parts in a MIME multipart body. 331// Reader's underlying parser consumes its input as needed. Seeking 332// isn't supported. 333type Reader struct { 334 bufReader *bufio.Reader 335 tempDir string // used in tests 336 337 currentPart *Part 338 partsRead int 339 340 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 341 nlDashBoundary []byte // nl + "--boundary" 342 dashBoundaryDash []byte // "--boundary--" 343 dashBoundary []byte // "--boundary" 344} 345 346// maxMIMEHeaderSize is the maximum size of a MIME header we will parse, 347// including header keys, values, and map overhead. 348const maxMIMEHeaderSize = 10 << 20 349 350// multipartmaxheaders is the maximum number of header entries NextPart will return, 351// as well as the maximum combined total of header entries Reader.ReadForm will return 352// in FileHeaders. 353var multipartmaxheaders = godebug.New("multipartmaxheaders") 354 355func maxMIMEHeaders() int64 { 356 if s := multipartmaxheaders.Value(); s != "" { 357 if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 { 358 multipartmaxheaders.IncNonDefault() 359 return v 360 } 361 } 362 return 10000 363} 364 365// NextPart returns the next part in the multipart or an error. 366// When there are no more parts, the error [io.EOF] is returned. 367// 368// As a special case, if the "Content-Transfer-Encoding" header 369// has a value of "quoted-printable", that header is instead 370// hidden and the body is transparently decoded during Read calls. 371func (r *Reader) NextPart() (*Part, error) { 372 return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) 373} 374 375// NextRawPart returns the next part in the multipart or an error. 376// When there are no more parts, the error [io.EOF] is returned. 377// 378// Unlike [Reader.NextPart], it does not have special handling for 379// "Content-Transfer-Encoding: quoted-printable". 380func (r *Reader) NextRawPart() (*Part, error) { 381 return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) 382} 383 384func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 385 if r.currentPart != nil { 386 r.currentPart.Close() 387 } 388 if string(r.dashBoundary) == "--" { 389 return nil, fmt.Errorf("multipart: boundary is empty") 390 } 391 expectNewPart := false 392 for { 393 line, err := r.bufReader.ReadSlice('\n') 394 395 if err == io.EOF && r.isFinalBoundary(line) { 396 // If the buffer ends in "--boundary--" without the 397 // trailing "\r\n", ReadSlice will return an error 398 // (since it's missing the '\n'), but this is a valid 399 // multipart EOF so we need to return io.EOF instead of 400 // a fmt-wrapped one. 401 return nil, io.EOF 402 } 403 if err != nil { 404 return nil, fmt.Errorf("multipart: NextPart: %w", err) 405 } 406 407 if r.isBoundaryDelimiterLine(line) { 408 r.partsRead++ 409 bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders) 410 if err != nil { 411 return nil, err 412 } 413 r.currentPart = bp 414 return bp, nil 415 } 416 417 if r.isFinalBoundary(line) { 418 // Expected EOF 419 return nil, io.EOF 420 } 421 422 if expectNewPart { 423 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 424 } 425 426 if r.partsRead == 0 { 427 // skip line 428 continue 429 } 430 431 // Consume the "\n" or "\r\n" separator between the 432 // body of the previous part and the boundary line we 433 // now expect will follow. (either a new part or the 434 // end boundary) 435 if bytes.Equal(line, r.nl) { 436 expectNewPart = true 437 continue 438 } 439 440 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 441 } 442} 443 444// isFinalBoundary reports whether line is the final boundary line 445// indicating that all parts are over. 446// It matches `^--boundary--[ \t]*(\r\n)?$` 447func (r *Reader) isFinalBoundary(line []byte) bool { 448 if !bytes.HasPrefix(line, r.dashBoundaryDash) { 449 return false 450 } 451 rest := line[len(r.dashBoundaryDash):] 452 rest = skipLWSPChar(rest) 453 return len(rest) == 0 || bytes.Equal(rest, r.nl) 454} 455 456func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 457 // https://tools.ietf.org/html/rfc2046#section-5.1 458 // The boundary delimiter line is then defined as a line 459 // consisting entirely of two hyphen characters ("-", 460 // decimal value 45) followed by the boundary parameter 461 // value from the Content-Type header field, optional linear 462 // whitespace, and a terminating CRLF. 463 if !bytes.HasPrefix(line, r.dashBoundary) { 464 return false 465 } 466 rest := line[len(r.dashBoundary):] 467 rest = skipLWSPChar(rest) 468 469 // On the first part, see our lines are ending in \n instead of \r\n 470 // and switch into that mode if so. This is a violation of the spec, 471 // but occurs in practice. 472 if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 473 r.nl = r.nl[1:] 474 r.nlDashBoundary = r.nlDashBoundary[1:] 475 } 476 return bytes.Equal(rest, r.nl) 477} 478 479// skipLWSPChar returns b with leading spaces and tabs removed. 480// RFC 822 defines: 481// 482// LWSP-char = SPACE / HTAB 483func skipLWSPChar(b []byte) []byte { 484 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 485 b = b[1:] 486 } 487 return b 488} 489