1// Copyright 2012 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package build 6 7import ( 8 "bufio" 9 "bytes" 10 "errors" 11 "fmt" 12 "go/ast" 13 "go/parser" 14 "go/scanner" 15 "go/token" 16 "io" 17 "strconv" 18 "strings" 19 "unicode" 20 "unicode/utf8" 21 _ "unsafe" // for linkname 22) 23 24type importReader struct { 25 b *bufio.Reader 26 buf []byte 27 peek byte 28 err error 29 eof bool 30 nerr int 31 pos token.Position 32} 33 34var bom = []byte{0xef, 0xbb, 0xbf} 35 36func newImportReader(name string, r io.Reader) *importReader { 37 b := bufio.NewReader(r) 38 // Remove leading UTF-8 BOM. 39 // Per https://golang.org/ref/spec#Source_code_representation: 40 // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF) 41 // if it is the first Unicode code point in the source text. 42 if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) { 43 b.Discard(3) 44 } 45 return &importReader{ 46 b: b, 47 pos: token.Position{ 48 Filename: name, 49 Line: 1, 50 Column: 1, 51 }, 52 } 53} 54 55func isIdent(c byte) bool { 56 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf 57} 58 59var ( 60 errSyntax = errors.New("syntax error") 61 errNUL = errors.New("unexpected NUL in input") 62) 63 64// syntaxError records a syntax error, but only if an I/O error has not already been recorded. 65func (r *importReader) syntaxError() { 66 if r.err == nil { 67 r.err = errSyntax 68 } 69} 70 71// readByte reads the next byte from the input, saves it in buf, and returns it. 72// If an error occurs, readByte records the error in r.err and returns 0. 73func (r *importReader) readByte() byte { 74 c, err := r.b.ReadByte() 75 if err == nil { 76 r.buf = append(r.buf, c) 77 if c == 0 { 78 err = errNUL 79 } 80 } 81 if err != nil { 82 if err == io.EOF { 83 r.eof = true 84 } else if r.err == nil { 85 r.err = err 86 } 87 c = 0 88 } 89 return c 90} 91 92// readByteNoBuf is like readByte but doesn't buffer the byte. 93// It exhausts r.buf before reading from r.b. 94func (r *importReader) readByteNoBuf() byte { 95 var c byte 96 var err error 97 if len(r.buf) > 0 { 98 c = r.buf[0] 99 r.buf = r.buf[1:] 100 } else { 101 c, err = r.b.ReadByte() 102 if err == nil && c == 0 { 103 err = errNUL 104 } 105 } 106 107 if err != nil { 108 if err == io.EOF { 109 r.eof = true 110 } else if r.err == nil { 111 r.err = err 112 } 113 return 0 114 } 115 r.pos.Offset++ 116 if c == '\n' { 117 r.pos.Line++ 118 r.pos.Column = 1 119 } else { 120 r.pos.Column++ 121 } 122 return c 123} 124 125// peekByte returns the next byte from the input reader but does not advance beyond it. 126// If skipSpace is set, peekByte skips leading spaces and comments. 127func (r *importReader) peekByte(skipSpace bool) byte { 128 if r.err != nil { 129 if r.nerr++; r.nerr > 10000 { 130 panic("go/build: import reader looping") 131 } 132 return 0 133 } 134 135 // Use r.peek as first input byte. 136 // Don't just return r.peek here: it might have been left by peekByte(false) 137 // and this might be peekByte(true). 138 c := r.peek 139 if c == 0 { 140 c = r.readByte() 141 } 142 for r.err == nil && !r.eof { 143 if skipSpace { 144 // For the purposes of this reader, semicolons are never necessary to 145 // understand the input and are treated as spaces. 146 switch c { 147 case ' ', '\f', '\t', '\r', '\n', ';': 148 c = r.readByte() 149 continue 150 151 case '/': 152 c = r.readByte() 153 if c == '/' { 154 for c != '\n' && r.err == nil && !r.eof { 155 c = r.readByte() 156 } 157 } else if c == '*' { 158 var c1 byte 159 for (c != '*' || c1 != '/') && r.err == nil { 160 if r.eof { 161 r.syntaxError() 162 } 163 c, c1 = c1, r.readByte() 164 } 165 } else { 166 r.syntaxError() 167 } 168 c = r.readByte() 169 continue 170 } 171 } 172 break 173 } 174 r.peek = c 175 return r.peek 176} 177 178// nextByte is like peekByte but advances beyond the returned byte. 179func (r *importReader) nextByte(skipSpace bool) byte { 180 c := r.peekByte(skipSpace) 181 r.peek = 0 182 return c 183} 184 185var goEmbed = []byte("go:embed") 186 187// findEmbed advances the input reader to the next //go:embed comment. 188// It reports whether it found a comment. 189// (Otherwise it found an error or EOF.) 190func (r *importReader) findEmbed(first bool) bool { 191 // The import block scan stopped after a non-space character, 192 // so the reader is not at the start of a line on the first call. 193 // After that, each //go:embed extraction leaves the reader 194 // at the end of a line. 195 startLine := !first 196 var c byte 197 for r.err == nil && !r.eof { 198 c = r.readByteNoBuf() 199 Reswitch: 200 switch c { 201 default: 202 startLine = false 203 204 case '\n': 205 startLine = true 206 207 case ' ', '\t': 208 // leave startLine alone 209 210 case '"': 211 startLine = false 212 for r.err == nil { 213 if r.eof { 214 r.syntaxError() 215 } 216 c = r.readByteNoBuf() 217 if c == '\\' { 218 r.readByteNoBuf() 219 if r.err != nil { 220 r.syntaxError() 221 return false 222 } 223 continue 224 } 225 if c == '"' { 226 c = r.readByteNoBuf() 227 goto Reswitch 228 } 229 } 230 goto Reswitch 231 232 case '`': 233 startLine = false 234 for r.err == nil { 235 if r.eof { 236 r.syntaxError() 237 } 238 c = r.readByteNoBuf() 239 if c == '`' { 240 c = r.readByteNoBuf() 241 goto Reswitch 242 } 243 } 244 245 case '\'': 246 startLine = false 247 for r.err == nil { 248 if r.eof { 249 r.syntaxError() 250 } 251 c = r.readByteNoBuf() 252 if c == '\\' { 253 r.readByteNoBuf() 254 if r.err != nil { 255 r.syntaxError() 256 return false 257 } 258 continue 259 } 260 if c == '\'' { 261 c = r.readByteNoBuf() 262 goto Reswitch 263 } 264 } 265 266 case '/': 267 c = r.readByteNoBuf() 268 switch c { 269 default: 270 startLine = false 271 goto Reswitch 272 273 case '*': 274 var c1 byte 275 for (c != '*' || c1 != '/') && r.err == nil { 276 if r.eof { 277 r.syntaxError() 278 } 279 c, c1 = c1, r.readByteNoBuf() 280 } 281 startLine = false 282 283 case '/': 284 if startLine { 285 // Try to read this as a //go:embed comment. 286 for i := range goEmbed { 287 c = r.readByteNoBuf() 288 if c != goEmbed[i] { 289 goto SkipSlashSlash 290 } 291 } 292 c = r.readByteNoBuf() 293 if c == ' ' || c == '\t' { 294 // Found one! 295 return true 296 } 297 } 298 SkipSlashSlash: 299 for c != '\n' && r.err == nil && !r.eof { 300 c = r.readByteNoBuf() 301 } 302 startLine = true 303 } 304 } 305 } 306 return false 307} 308 309// readKeyword reads the given keyword from the input. 310// If the keyword is not present, readKeyword records a syntax error. 311func (r *importReader) readKeyword(kw string) { 312 r.peekByte(true) 313 for i := 0; i < len(kw); i++ { 314 if r.nextByte(false) != kw[i] { 315 r.syntaxError() 316 return 317 } 318 } 319 if isIdent(r.peekByte(false)) { 320 r.syntaxError() 321 } 322} 323 324// readIdent reads an identifier from the input. 325// If an identifier is not present, readIdent records a syntax error. 326func (r *importReader) readIdent() { 327 c := r.peekByte(true) 328 if !isIdent(c) { 329 r.syntaxError() 330 return 331 } 332 for isIdent(r.peekByte(false)) { 333 r.peek = 0 334 } 335} 336 337// readString reads a quoted string literal from the input. 338// If an identifier is not present, readString records a syntax error. 339func (r *importReader) readString() { 340 switch r.nextByte(true) { 341 case '`': 342 for r.err == nil { 343 if r.nextByte(false) == '`' { 344 break 345 } 346 if r.eof { 347 r.syntaxError() 348 } 349 } 350 case '"': 351 for r.err == nil { 352 c := r.nextByte(false) 353 if c == '"' { 354 break 355 } 356 if r.eof || c == '\n' { 357 r.syntaxError() 358 } 359 if c == '\\' { 360 r.nextByte(false) 361 } 362 } 363 default: 364 r.syntaxError() 365 } 366} 367 368// readImport reads an import clause - optional identifier followed by quoted string - 369// from the input. 370func (r *importReader) readImport() { 371 c := r.peekByte(true) 372 if c == '.' { 373 r.peek = 0 374 } else if isIdent(c) { 375 r.readIdent() 376 } 377 r.readString() 378} 379 380// readComments is like io.ReadAll, except that it only reads the leading 381// block of comments in the file. 382// 383// readComments should be an internal detail, 384// but widely used packages access it using linkname. 385// Notable members of the hall of shame include: 386// - github.com/bazelbuild/bazel-gazelle 387// 388// Do not remove or change the type signature. 389// See go.dev/issue/67401. 390// 391//go:linkname readComments 392func readComments(f io.Reader) ([]byte, error) { 393 r := newImportReader("", f) 394 r.peekByte(true) 395 if r.err == nil && !r.eof { 396 // Didn't reach EOF, so must have found a non-space byte. Remove it. 397 r.buf = r.buf[:len(r.buf)-1] 398 } 399 return r.buf, r.err 400} 401 402// readGoInfo expects a Go file as input and reads the file up to and including the import section. 403// It records what it learned in *info. 404// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, 405// info.imports and info.embeds. 406// 407// It only returns an error if there are problems reading the file, 408// not for syntax errors in the file itself. 409func readGoInfo(f io.Reader, info *fileInfo) error { 410 r := newImportReader(info.name, f) 411 412 r.readKeyword("package") 413 r.readIdent() 414 for r.peekByte(true) == 'i' { 415 r.readKeyword("import") 416 if r.peekByte(true) == '(' { 417 r.nextByte(false) 418 for r.peekByte(true) != ')' && r.err == nil { 419 r.readImport() 420 } 421 r.nextByte(false) 422 } else { 423 r.readImport() 424 } 425 } 426 427 info.header = r.buf 428 429 // If we stopped successfully before EOF, we read a byte that told us we were done. 430 // Return all but that last byte, which would cause a syntax error if we let it through. 431 if r.err == nil && !r.eof { 432 info.header = r.buf[:len(r.buf)-1] 433 } 434 435 // If we stopped for a syntax error, consume the whole file so that 436 // we are sure we don't change the errors that go/parser returns. 437 if r.err == errSyntax { 438 r.err = nil 439 for r.err == nil && !r.eof { 440 r.readByte() 441 } 442 info.header = r.buf 443 } 444 if r.err != nil { 445 return r.err 446 } 447 448 if info.fset == nil { 449 return nil 450 } 451 452 // Parse file header & record imports. 453 info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments) 454 if info.parseErr != nil { 455 return nil 456 } 457 458 hasEmbed := false 459 for _, decl := range info.parsed.Decls { 460 d, ok := decl.(*ast.GenDecl) 461 if !ok { 462 continue 463 } 464 for _, dspec := range d.Specs { 465 spec, ok := dspec.(*ast.ImportSpec) 466 if !ok { 467 continue 468 } 469 quoted := spec.Path.Value 470 path, err := strconv.Unquote(quoted) 471 if err != nil { 472 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) 473 } 474 if !isValidImport(path) { 475 // The parser used to return a parse error for invalid import paths, but 476 // no longer does, so check for and create the error here instead. 477 info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path} 478 info.imports = nil 479 return nil 480 } 481 if path == "embed" { 482 hasEmbed = true 483 } 484 485 doc := spec.Doc 486 if doc == nil && len(d.Specs) == 1 { 487 doc = d.Doc 488 } 489 info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) 490 } 491 } 492 493 // Extract directives. 494 for _, group := range info.parsed.Comments { 495 if group.Pos() >= info.parsed.Package { 496 break 497 } 498 for _, c := range group.List { 499 if strings.HasPrefix(c.Text, "//go:") { 500 info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)}) 501 } 502 } 503 } 504 505 // If the file imports "embed", 506 // we have to look for //go:embed comments 507 // in the remainder of the file. 508 // The compiler will enforce the mapping of comments to 509 // declared variables. We just need to know the patterns. 510 // If there were //go:embed comments earlier in the file 511 // (near the package statement or imports), the compiler 512 // will reject them. They can be (and have already been) ignored. 513 if hasEmbed { 514 var line []byte 515 for first := true; r.findEmbed(first); first = false { 516 line = line[:0] 517 pos := r.pos 518 for { 519 c := r.readByteNoBuf() 520 if c == '\n' || r.err != nil || r.eof { 521 break 522 } 523 line = append(line, c) 524 } 525 // Add args if line is well-formed. 526 // Ignore badly-formed lines - the compiler will report them when it finds them, 527 // and we can pretend they are not there to help go list succeed with what it knows. 528 embs, err := parseGoEmbed(string(line), pos) 529 if err == nil { 530 info.embeds = append(info.embeds, embs...) 531 } 532 } 533 } 534 535 return nil 536} 537 538// isValidImport checks if the import is a valid import using the more strict 539// checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations. 540// It was ported from the function of the same name that was removed from the 541// parser in CL 424855, when the parser stopped doing these checks. 542func isValidImport(s string) bool { 543 const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD" 544 for _, r := range s { 545 if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) { 546 return false 547 } 548 } 549 return s != "" 550} 551 552// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 553// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 554// This is based on a similar function in cmd/compile/internal/gc/noder.go; 555// this version calculates position information as well. 556func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 557 trimBytes := func(n int) { 558 pos.Offset += n 559 pos.Column += utf8.RuneCountInString(args[:n]) 560 args = args[n:] 561 } 562 trimSpace := func() { 563 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 564 trimBytes(len(args) - len(trim)) 565 } 566 567 var list []fileEmbed 568 for trimSpace(); args != ""; trimSpace() { 569 var path string 570 pathPos := pos 571 Switch: 572 switch args[0] { 573 default: 574 i := len(args) 575 for j, c := range args { 576 if unicode.IsSpace(c) { 577 i = j 578 break 579 } 580 } 581 path = args[:i] 582 trimBytes(i) 583 584 case '`': 585 var ok bool 586 path, _, ok = strings.Cut(args[1:], "`") 587 if !ok { 588 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 589 } 590 trimBytes(1 + len(path) + 1) 591 592 case '"': 593 i := 1 594 for ; i < len(args); i++ { 595 if args[i] == '\\' { 596 i++ 597 continue 598 } 599 if args[i] == '"' { 600 q, err := strconv.Unquote(args[:i+1]) 601 if err != nil { 602 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 603 } 604 path = q 605 trimBytes(i + 1) 606 break Switch 607 } 608 } 609 if i >= len(args) { 610 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 611 } 612 } 613 614 if args != "" { 615 r, _ := utf8.DecodeRuneInString(args) 616 if !unicode.IsSpace(r) { 617 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 618 } 619 } 620 list = append(list, fileEmbed{path, pathPos}) 621 } 622 return list, nil 623} 624