1// Copyright 2012 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file was adapted from Go src/go/build/read.go at commit 8634a234df2a 6// on 2021-01-26. It's used to extract metadata from .go files without requiring 7// them to be in the same directory. 8 9package main 10 11import ( 12 "bufio" 13 "errors" 14 "fmt" 15 "go/ast" 16 "go/parser" 17 "go/token" 18 "io" 19 "strconv" 20 "strings" 21 "unicode" 22 "unicode/utf8" 23) 24 25type importReader struct { 26 b *bufio.Reader 27 buf []byte 28 peek byte 29 err error 30 eof bool 31 nerr int 32 pos token.Position 33} 34 35func newImportReader(name string, r io.Reader) *importReader { 36 return &importReader{ 37 b: bufio.NewReader(r), 38 pos: token.Position{ 39 Filename: name, 40 Line: 1, 41 Column: 1, 42 }, 43 } 44} 45 46func isIdent(c byte) bool { 47 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf 48} 49 50var ( 51 errSyntax = errors.New("syntax error") 52 errNUL = errors.New("unexpected NUL in input") 53) 54 55// syntaxError records a syntax error, but only if an I/O error has not already been recorded. 56func (r *importReader) syntaxError() { 57 if r.err == nil { 58 r.err = errSyntax 59 } 60} 61 62// readByte reads the next byte from the input, saves it in buf, and returns it. 63// If an error occurs, readByte records the error in r.err and returns 0. 64func (r *importReader) readByte() byte { 65 c, err := r.b.ReadByte() 66 if err == nil { 67 r.buf = append(r.buf, c) 68 if c == 0 { 69 err = errNUL 70 } 71 } 72 if err != nil { 73 if err == io.EOF { 74 r.eof = true 75 } else if r.err == nil { 76 r.err = err 77 } 78 c = 0 79 } 80 return c 81} 82 83// readByteNoBuf is like readByte but doesn't buffer the byte. 84// It exhausts r.buf before reading from r.b. 85func (r *importReader) readByteNoBuf() byte { 86 var c byte 87 var err error 88 if len(r.buf) > 0 { 89 c = r.buf[0] 90 r.buf = r.buf[1:] 91 } else { 92 c, err = r.b.ReadByte() 93 if err == nil && c == 0 { 94 err = errNUL 95 } 96 } 97 98 if err != nil { 99 if err == io.EOF { 100 r.eof = true 101 } else if r.err == nil { 102 r.err = err 103 } 104 return 0 105 } 106 r.pos.Offset++ 107 if c == '\n' { 108 r.pos.Line++ 109 r.pos.Column = 1 110 } else { 111 r.pos.Column++ 112 } 113 return c 114} 115 116// peekByte returns the next byte from the input reader but does not advance beyond it. 117// If skipSpace is set, peekByte skips leading spaces and comments. 118func (r *importReader) peekByte(skipSpace bool) byte { 119 if r.err != nil { 120 if r.nerr++; r.nerr > 10000 { 121 panic("go/build: import reader looping") 122 } 123 return 0 124 } 125 126 // Use r.peek as first input byte. 127 // Don't just return r.peek here: it might have been left by peekByte(false) 128 // and this might be peekByte(true). 129 c := r.peek 130 if c == 0 { 131 c = r.readByte() 132 } 133 for r.err == nil && !r.eof { 134 if skipSpace { 135 // For the purposes of this reader, semicolons are never necessary to 136 // understand the input and are treated as spaces. 137 switch c { 138 case ' ', '\f', '\t', '\r', '\n', ';': 139 c = r.readByte() 140 continue 141 142 case '/': 143 c = r.readByte() 144 if c == '/' { 145 for c != '\n' && r.err == nil && !r.eof { 146 c = r.readByte() 147 } 148 } else if c == '*' { 149 var c1 byte 150 for (c != '*' || c1 != '/') && r.err == nil { 151 if r.eof { 152 r.syntaxError() 153 } 154 c, c1 = c1, r.readByte() 155 } 156 } else { 157 r.syntaxError() 158 } 159 c = r.readByte() 160 continue 161 } 162 } 163 break 164 } 165 r.peek = c 166 return r.peek 167} 168 169// nextByte is like peekByte but advances beyond the returned byte. 170func (r *importReader) nextByte(skipSpace bool) byte { 171 c := r.peekByte(skipSpace) 172 r.peek = 0 173 return c 174} 175 176var goEmbed = []byte("go:embed") 177 178// findEmbed advances the input reader to the next //go:embed comment. 179// It reports whether it found a comment. 180// (Otherwise it found an error or EOF.) 181func (r *importReader) findEmbed(first bool) bool { 182 // The import block scan stopped after a non-space character, 183 // so the reader is not at the start of a line on the first call. 184 // After that, each //go:embed extraction leaves the reader 185 // at the end of a line. 186 startLine := !first 187 var c byte 188 for r.err == nil && !r.eof { 189 c = r.readByteNoBuf() 190 Reswitch: 191 switch c { 192 default: 193 startLine = false 194 195 case '\n': 196 startLine = true 197 198 case ' ', '\t': 199 // leave startLine alone 200 201 case '"': 202 startLine = false 203 for r.err == nil { 204 if r.eof { 205 r.syntaxError() 206 } 207 c = r.readByteNoBuf() 208 if c == '\\' { 209 r.readByteNoBuf() 210 if r.err != nil { 211 r.syntaxError() 212 return false 213 } 214 continue 215 } 216 if c == '"' { 217 c = r.readByteNoBuf() 218 goto Reswitch 219 } 220 } 221 goto Reswitch 222 223 case '`': 224 startLine = false 225 for r.err == nil { 226 if r.eof { 227 r.syntaxError() 228 } 229 c = r.readByteNoBuf() 230 if c == '`' { 231 c = r.readByteNoBuf() 232 goto Reswitch 233 } 234 } 235 236 case '/': 237 c = r.readByteNoBuf() 238 switch c { 239 default: 240 startLine = false 241 goto Reswitch 242 243 case '*': 244 var c1 byte 245 for (c != '*' || c1 != '/') && r.err == nil { 246 if r.eof { 247 r.syntaxError() 248 } 249 c, c1 = c1, r.readByteNoBuf() 250 } 251 startLine = false 252 253 case '/': 254 if startLine { 255 // Try to read this as a //go:embed comment. 256 for i := range goEmbed { 257 c = r.readByteNoBuf() 258 if c != goEmbed[i] { 259 goto SkipSlashSlash 260 } 261 } 262 c = r.readByteNoBuf() 263 if c == ' ' || c == '\t' { 264 // Found one! 265 return true 266 } 267 } 268 SkipSlashSlash: 269 for c != '\n' && r.err == nil && !r.eof { 270 c = r.readByteNoBuf() 271 } 272 startLine = true 273 } 274 } 275 } 276 return false 277} 278 279// readKeyword reads the given keyword from the input. 280// If the keyword is not present, readKeyword records a syntax error. 281func (r *importReader) readKeyword(kw string) { 282 r.peekByte(true) 283 for i := 0; i < len(kw); i++ { 284 if r.nextByte(false) != kw[i] { 285 r.syntaxError() 286 return 287 } 288 } 289 if isIdent(r.peekByte(false)) { 290 r.syntaxError() 291 } 292} 293 294// readIdent reads an identifier from the input. 295// If an identifier is not present, readIdent records a syntax error. 296func (r *importReader) readIdent() { 297 c := r.peekByte(true) 298 if !isIdent(c) { 299 r.syntaxError() 300 return 301 } 302 for isIdent(r.peekByte(false)) { 303 r.peek = 0 304 } 305} 306 307// readString reads a quoted string literal from the input. 308// If an identifier is not present, readString records a syntax error. 309func (r *importReader) readString() { 310 switch r.nextByte(true) { 311 case '`': 312 for r.err == nil { 313 if r.nextByte(false) == '`' { 314 break 315 } 316 if r.eof { 317 r.syntaxError() 318 } 319 } 320 case '"': 321 for r.err == nil { 322 c := r.nextByte(false) 323 if c == '"' { 324 break 325 } 326 if r.eof || c == '\n' { 327 r.syntaxError() 328 } 329 if c == '\\' { 330 r.nextByte(false) 331 } 332 } 333 default: 334 r.syntaxError() 335 } 336} 337 338// readImport reads an import clause - optional identifier followed by quoted string - 339// from the input. 340func (r *importReader) readImport() { 341 c := r.peekByte(true) 342 if c == '.' { 343 r.peek = 0 344 } else if isIdent(c) { 345 r.readIdent() 346 } 347 r.readString() 348} 349 350// readComments is like io.ReadAll, except that it only reads the leading 351// block of comments in the file. 352func readComments(f io.Reader) ([]byte, error) { 353 r := newImportReader("", f) 354 r.peekByte(true) 355 if r.err == nil && !r.eof { 356 // Didn't reach EOF, so must have found a non-space byte. Remove it. 357 r.buf = r.buf[:len(r.buf)-1] 358 } 359 return r.buf, r.err 360} 361 362// readGoInfo expects a Go file as input and reads the file up to and including the import section. 363// It records what it learned in *info. 364// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, 365// info.imports, info.embeds, and info.embedErr. 366// 367// It only returns an error if there are problems reading the file, 368// not for syntax errors in the file itself. 369func readGoInfo(f io.Reader, info *fileInfo) error { 370 r := newImportReader(info.filename, f) 371 372 r.readKeyword("package") 373 r.readIdent() 374 for r.peekByte(true) == 'i' { 375 r.readKeyword("import") 376 if r.peekByte(true) == '(' { 377 r.nextByte(false) 378 for r.peekByte(true) != ')' && r.err == nil { 379 r.readImport() 380 } 381 r.nextByte(false) 382 } else { 383 r.readImport() 384 } 385 } 386 387 info.header = r.buf 388 389 // If we stopped successfully before EOF, we read a byte that told us we were done. 390 // Return all but that last byte, which would cause a syntax error if we let it through. 391 if r.err == nil && !r.eof { 392 info.header = r.buf[:len(r.buf)-1] 393 } 394 395 // If we stopped for a syntax error, consume the whole file so that 396 // we are sure we don't change the errors that go/parser returns. 397 if r.err == errSyntax { 398 r.err = nil 399 for r.err == nil && !r.eof { 400 r.readByte() 401 } 402 info.header = r.buf 403 } 404 if r.err != nil { 405 return r.err 406 } 407 408 if info.fset == nil { 409 return nil 410 } 411 412 // Parse file header & record imports. 413 info.parsed, info.parseErr = parser.ParseFile(info.fset, info.filename, info.header, parser.ImportsOnly|parser.ParseComments) 414 if info.parseErr != nil { 415 return nil 416 } 417 info.pkg = info.parsed.Name.Name 418 419 hasEmbed := false 420 for _, decl := range info.parsed.Decls { 421 d, ok := decl.(*ast.GenDecl) 422 if !ok { 423 continue 424 } 425 for _, dspec := range d.Specs { 426 spec, ok := dspec.(*ast.ImportSpec) 427 if !ok { 428 continue 429 } 430 quoted := spec.Path.Value 431 path, err := strconv.Unquote(quoted) 432 if err != nil { 433 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) 434 } 435 if path == "embed" { 436 hasEmbed = true 437 } 438 439 doc := spec.Doc 440 if doc == nil && len(d.Specs) == 1 { 441 doc = d.Doc 442 } 443 info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) 444 } 445 } 446 447 // If the file imports "embed", 448 // we have to look for //go:embed comments 449 // in the remainder of the file. 450 // The compiler will enforce the mapping of comments to 451 // declared variables. We just need to know the patterns. 452 // If there were //go:embed comments earlier in the file 453 // (near the package statement or imports), the compiler 454 // will reject them. They can be (and have already been) ignored. 455 if hasEmbed { 456 var line []byte 457 for first := true; r.findEmbed(first); first = false { 458 line = line[:0] 459 pos := r.pos 460 for { 461 c := r.readByteNoBuf() 462 if c == '\n' || r.err != nil || r.eof { 463 break 464 } 465 line = append(line, c) 466 } 467 // Add args if line is well-formed. 468 // Ignore badly-formed lines - the compiler will report them when it finds them, 469 // and we can pretend they are not there to help go list succeed with what it knows. 470 embs, err := parseGoEmbed(string(line), pos) 471 if err == nil { 472 info.embeds = append(info.embeds, embs...) 473 } 474 } 475 } 476 477 return nil 478} 479 480// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 481// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 482// This is based on a similar function in cmd/compile/internal/gc/noder.go; 483// this version calculates position information as well. 484func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 485 trimBytes := func(n int) { 486 pos.Offset += n 487 pos.Column += utf8.RuneCountInString(args[:n]) 488 args = args[n:] 489 } 490 trimSpace := func() { 491 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 492 trimBytes(len(args) - len(trim)) 493 } 494 495 var list []fileEmbed 496 for trimSpace(); args != ""; trimSpace() { 497 var path string 498 pathPos := pos 499 Switch: 500 switch args[0] { 501 default: 502 i := len(args) 503 for j, c := range args { 504 if unicode.IsSpace(c) { 505 i = j 506 break 507 } 508 } 509 path = args[:i] 510 trimBytes(i) 511 512 case '`': 513 i := strings.Index(args[1:], "`") 514 if i < 0 { 515 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 516 } 517 path = args[1 : 1+i] 518 trimBytes(1 + i + 1) 519 520 case '"': 521 i := 1 522 for ; i < len(args); i++ { 523 if args[i] == '\\' { 524 i++ 525 continue 526 } 527 if args[i] == '"' { 528 q, err := strconv.Unquote(args[:i+1]) 529 if err != nil { 530 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 531 } 532 path = q 533 trimBytes(i + 1) 534 break Switch 535 } 536 } 537 if i >= len(args) { 538 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 539 } 540 } 541 542 if args != "" { 543 r, _ := utf8.DecodeRuneInString(args) 544 if !unicode.IsSpace(r) { 545 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 546 } 547 } 548 list = append(list, fileEmbed{path, pathPos}) 549 } 550 return list, nil 551} 552