1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package build
6
7import (
8	"bufio"
9	"bytes"
10	"errors"
11	"fmt"
12	"go/ast"
13	"go/parser"
14	"go/scanner"
15	"go/token"
16	"io"
17	"strconv"
18	"strings"
19	"unicode"
20	"unicode/utf8"
21	_ "unsafe" // for linkname
22)
23
24type importReader struct {
25	b    *bufio.Reader
26	buf  []byte
27	peek byte
28	err  error
29	eof  bool
30	nerr int
31	pos  token.Position
32}
33
34var bom = []byte{0xef, 0xbb, 0xbf}
35
36func newImportReader(name string, r io.Reader) *importReader {
37	b := bufio.NewReader(r)
38	// Remove leading UTF-8 BOM.
39	// Per https://golang.org/ref/spec#Source_code_representation:
40	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
41	// if it is the first Unicode code point in the source text.
42	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
43		b.Discard(3)
44	}
45	return &importReader{
46		b: b,
47		pos: token.Position{
48			Filename: name,
49			Line:     1,
50			Column:   1,
51		},
52	}
53}
54
55func isIdent(c byte) bool {
56	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
57}
58
59var (
60	errSyntax = errors.New("syntax error")
61	errNUL    = errors.New("unexpected NUL in input")
62)
63
64// syntaxError records a syntax error, but only if an I/O error has not already been recorded.
65func (r *importReader) syntaxError() {
66	if r.err == nil {
67		r.err = errSyntax
68	}
69}
70
71// readByte reads the next byte from the input, saves it in buf, and returns it.
72// If an error occurs, readByte records the error in r.err and returns 0.
73func (r *importReader) readByte() byte {
74	c, err := r.b.ReadByte()
75	if err == nil {
76		r.buf = append(r.buf, c)
77		if c == 0 {
78			err = errNUL
79		}
80	}
81	if err != nil {
82		if err == io.EOF {
83			r.eof = true
84		} else if r.err == nil {
85			r.err = err
86		}
87		c = 0
88	}
89	return c
90}
91
92// readByteNoBuf is like readByte but doesn't buffer the byte.
93// It exhausts r.buf before reading from r.b.
94func (r *importReader) readByteNoBuf() byte {
95	var c byte
96	var err error
97	if len(r.buf) > 0 {
98		c = r.buf[0]
99		r.buf = r.buf[1:]
100	} else {
101		c, err = r.b.ReadByte()
102		if err == nil && c == 0 {
103			err = errNUL
104		}
105	}
106
107	if err != nil {
108		if err == io.EOF {
109			r.eof = true
110		} else if r.err == nil {
111			r.err = err
112		}
113		return 0
114	}
115	r.pos.Offset++
116	if c == '\n' {
117		r.pos.Line++
118		r.pos.Column = 1
119	} else {
120		r.pos.Column++
121	}
122	return c
123}
124
125// peekByte returns the next byte from the input reader but does not advance beyond it.
126// If skipSpace is set, peekByte skips leading spaces and comments.
127func (r *importReader) peekByte(skipSpace bool) byte {
128	if r.err != nil {
129		if r.nerr++; r.nerr > 10000 {
130			panic("go/build: import reader looping")
131		}
132		return 0
133	}
134
135	// Use r.peek as first input byte.
136	// Don't just return r.peek here: it might have been left by peekByte(false)
137	// and this might be peekByte(true).
138	c := r.peek
139	if c == 0 {
140		c = r.readByte()
141	}
142	for r.err == nil && !r.eof {
143		if skipSpace {
144			// For the purposes of this reader, semicolons are never necessary to
145			// understand the input and are treated as spaces.
146			switch c {
147			case ' ', '\f', '\t', '\r', '\n', ';':
148				c = r.readByte()
149				continue
150
151			case '/':
152				c = r.readByte()
153				if c == '/' {
154					for c != '\n' && r.err == nil && !r.eof {
155						c = r.readByte()
156					}
157				} else if c == '*' {
158					var c1 byte
159					for (c != '*' || c1 != '/') && r.err == nil {
160						if r.eof {
161							r.syntaxError()
162						}
163						c, c1 = c1, r.readByte()
164					}
165				} else {
166					r.syntaxError()
167				}
168				c = r.readByte()
169				continue
170			}
171		}
172		break
173	}
174	r.peek = c
175	return r.peek
176}
177
178// nextByte is like peekByte but advances beyond the returned byte.
179func (r *importReader) nextByte(skipSpace bool) byte {
180	c := r.peekByte(skipSpace)
181	r.peek = 0
182	return c
183}
184
185var goEmbed = []byte("go:embed")
186
187// findEmbed advances the input reader to the next //go:embed comment.
188// It reports whether it found a comment.
189// (Otherwise it found an error or EOF.)
190func (r *importReader) findEmbed(first bool) bool {
191	// The import block scan stopped after a non-space character,
192	// so the reader is not at the start of a line on the first call.
193	// After that, each //go:embed extraction leaves the reader
194	// at the end of a line.
195	startLine := !first
196	var c byte
197	for r.err == nil && !r.eof {
198		c = r.readByteNoBuf()
199	Reswitch:
200		switch c {
201		default:
202			startLine = false
203
204		case '\n':
205			startLine = true
206
207		case ' ', '\t':
208			// leave startLine alone
209
210		case '"':
211			startLine = false
212			for r.err == nil {
213				if r.eof {
214					r.syntaxError()
215				}
216				c = r.readByteNoBuf()
217				if c == '\\' {
218					r.readByteNoBuf()
219					if r.err != nil {
220						r.syntaxError()
221						return false
222					}
223					continue
224				}
225				if c == '"' {
226					c = r.readByteNoBuf()
227					goto Reswitch
228				}
229			}
230			goto Reswitch
231
232		case '`':
233			startLine = false
234			for r.err == nil {
235				if r.eof {
236					r.syntaxError()
237				}
238				c = r.readByteNoBuf()
239				if c == '`' {
240					c = r.readByteNoBuf()
241					goto Reswitch
242				}
243			}
244
245		case '\'':
246			startLine = false
247			for r.err == nil {
248				if r.eof {
249					r.syntaxError()
250				}
251				c = r.readByteNoBuf()
252				if c == '\\' {
253					r.readByteNoBuf()
254					if r.err != nil {
255						r.syntaxError()
256						return false
257					}
258					continue
259				}
260				if c == '\'' {
261					c = r.readByteNoBuf()
262					goto Reswitch
263				}
264			}
265
266		case '/':
267			c = r.readByteNoBuf()
268			switch c {
269			default:
270				startLine = false
271				goto Reswitch
272
273			case '*':
274				var c1 byte
275				for (c != '*' || c1 != '/') && r.err == nil {
276					if r.eof {
277						r.syntaxError()
278					}
279					c, c1 = c1, r.readByteNoBuf()
280				}
281				startLine = false
282
283			case '/':
284				if startLine {
285					// Try to read this as a //go:embed comment.
286					for i := range goEmbed {
287						c = r.readByteNoBuf()
288						if c != goEmbed[i] {
289							goto SkipSlashSlash
290						}
291					}
292					c = r.readByteNoBuf()
293					if c == ' ' || c == '\t' {
294						// Found one!
295						return true
296					}
297				}
298			SkipSlashSlash:
299				for c != '\n' && r.err == nil && !r.eof {
300					c = r.readByteNoBuf()
301				}
302				startLine = true
303			}
304		}
305	}
306	return false
307}
308
309// readKeyword reads the given keyword from the input.
310// If the keyword is not present, readKeyword records a syntax error.
311func (r *importReader) readKeyword(kw string) {
312	r.peekByte(true)
313	for i := 0; i < len(kw); i++ {
314		if r.nextByte(false) != kw[i] {
315			r.syntaxError()
316			return
317		}
318	}
319	if isIdent(r.peekByte(false)) {
320		r.syntaxError()
321	}
322}
323
324// readIdent reads an identifier from the input.
325// If an identifier is not present, readIdent records a syntax error.
326func (r *importReader) readIdent() {
327	c := r.peekByte(true)
328	if !isIdent(c) {
329		r.syntaxError()
330		return
331	}
332	for isIdent(r.peekByte(false)) {
333		r.peek = 0
334	}
335}
336
337// readString reads a quoted string literal from the input.
338// If an identifier is not present, readString records a syntax error.
339func (r *importReader) readString() {
340	switch r.nextByte(true) {
341	case '`':
342		for r.err == nil {
343			if r.nextByte(false) == '`' {
344				break
345			}
346			if r.eof {
347				r.syntaxError()
348			}
349		}
350	case '"':
351		for r.err == nil {
352			c := r.nextByte(false)
353			if c == '"' {
354				break
355			}
356			if r.eof || c == '\n' {
357				r.syntaxError()
358			}
359			if c == '\\' {
360				r.nextByte(false)
361			}
362		}
363	default:
364		r.syntaxError()
365	}
366}
367
368// readImport reads an import clause - optional identifier followed by quoted string -
369// from the input.
370func (r *importReader) readImport() {
371	c := r.peekByte(true)
372	if c == '.' {
373		r.peek = 0
374	} else if isIdent(c) {
375		r.readIdent()
376	}
377	r.readString()
378}
379
380// readComments is like io.ReadAll, except that it only reads the leading
381// block of comments in the file.
382//
383// readComments should be an internal detail,
384// but widely used packages access it using linkname.
385// Notable members of the hall of shame include:
386//   - github.com/bazelbuild/bazel-gazelle
387//
388// Do not remove or change the type signature.
389// See go.dev/issue/67401.
390//
391//go:linkname readComments
392func readComments(f io.Reader) ([]byte, error) {
393	r := newImportReader("", f)
394	r.peekByte(true)
395	if r.err == nil && !r.eof {
396		// Didn't reach EOF, so must have found a non-space byte. Remove it.
397		r.buf = r.buf[:len(r.buf)-1]
398	}
399	return r.buf, r.err
400}
401
402// readGoInfo expects a Go file as input and reads the file up to and including the import section.
403// It records what it learned in *info.
404// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
405// info.imports and info.embeds.
406//
407// It only returns an error if there are problems reading the file,
408// not for syntax errors in the file itself.
409func readGoInfo(f io.Reader, info *fileInfo) error {
410	r := newImportReader(info.name, f)
411
412	r.readKeyword("package")
413	r.readIdent()
414	for r.peekByte(true) == 'i' {
415		r.readKeyword("import")
416		if r.peekByte(true) == '(' {
417			r.nextByte(false)
418			for r.peekByte(true) != ')' && r.err == nil {
419				r.readImport()
420			}
421			r.nextByte(false)
422		} else {
423			r.readImport()
424		}
425	}
426
427	info.header = r.buf
428
429	// If we stopped successfully before EOF, we read a byte that told us we were done.
430	// Return all but that last byte, which would cause a syntax error if we let it through.
431	if r.err == nil && !r.eof {
432		info.header = r.buf[:len(r.buf)-1]
433	}
434
435	// If we stopped for a syntax error, consume the whole file so that
436	// we are sure we don't change the errors that go/parser returns.
437	if r.err == errSyntax {
438		r.err = nil
439		for r.err == nil && !r.eof {
440			r.readByte()
441		}
442		info.header = r.buf
443	}
444	if r.err != nil {
445		return r.err
446	}
447
448	if info.fset == nil {
449		return nil
450	}
451
452	// Parse file header & record imports.
453	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
454	if info.parseErr != nil {
455		return nil
456	}
457
458	hasEmbed := false
459	for _, decl := range info.parsed.Decls {
460		d, ok := decl.(*ast.GenDecl)
461		if !ok {
462			continue
463		}
464		for _, dspec := range d.Specs {
465			spec, ok := dspec.(*ast.ImportSpec)
466			if !ok {
467				continue
468			}
469			quoted := spec.Path.Value
470			path, err := strconv.Unquote(quoted)
471			if err != nil {
472				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
473			}
474			if !isValidImport(path) {
475				// The parser used to return a parse error for invalid import paths, but
476				// no longer does, so check for and create the error here instead.
477				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
478				info.imports = nil
479				return nil
480			}
481			if path == "embed" {
482				hasEmbed = true
483			}
484
485			doc := spec.Doc
486			if doc == nil && len(d.Specs) == 1 {
487				doc = d.Doc
488			}
489			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
490		}
491	}
492
493	// Extract directives.
494	for _, group := range info.parsed.Comments {
495		if group.Pos() >= info.parsed.Package {
496			break
497		}
498		for _, c := range group.List {
499			if strings.HasPrefix(c.Text, "//go:") {
500				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
501			}
502		}
503	}
504
505	// If the file imports "embed",
506	// we have to look for //go:embed comments
507	// in the remainder of the file.
508	// The compiler will enforce the mapping of comments to
509	// declared variables. We just need to know the patterns.
510	// If there were //go:embed comments earlier in the file
511	// (near the package statement or imports), the compiler
512	// will reject them. They can be (and have already been) ignored.
513	if hasEmbed {
514		var line []byte
515		for first := true; r.findEmbed(first); first = false {
516			line = line[:0]
517			pos := r.pos
518			for {
519				c := r.readByteNoBuf()
520				if c == '\n' || r.err != nil || r.eof {
521					break
522				}
523				line = append(line, c)
524			}
525			// Add args if line is well-formed.
526			// Ignore badly-formed lines - the compiler will report them when it finds them,
527			// and we can pretend they are not there to help go list succeed with what it knows.
528			embs, err := parseGoEmbed(string(line), pos)
529			if err == nil {
530				info.embeds = append(info.embeds, embs...)
531			}
532		}
533	}
534
535	return nil
536}
537
538// isValidImport checks if the import is a valid import using the more strict
539// checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
540// It was ported from the function of the same name that was removed from the
541// parser in CL 424855, when the parser stopped doing these checks.
542func isValidImport(s string) bool {
543	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
544	for _, r := range s {
545		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
546			return false
547		}
548	}
549	return s != ""
550}
551
552// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
553// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
554// This is based on a similar function in cmd/compile/internal/gc/noder.go;
555// this version calculates position information as well.
556func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
557	trimBytes := func(n int) {
558		pos.Offset += n
559		pos.Column += utf8.RuneCountInString(args[:n])
560		args = args[n:]
561	}
562	trimSpace := func() {
563		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
564		trimBytes(len(args) - len(trim))
565	}
566
567	var list []fileEmbed
568	for trimSpace(); args != ""; trimSpace() {
569		var path string
570		pathPos := pos
571	Switch:
572		switch args[0] {
573		default:
574			i := len(args)
575			for j, c := range args {
576				if unicode.IsSpace(c) {
577					i = j
578					break
579				}
580			}
581			path = args[:i]
582			trimBytes(i)
583
584		case '`':
585			var ok bool
586			path, _, ok = strings.Cut(args[1:], "`")
587			if !ok {
588				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
589			}
590			trimBytes(1 + len(path) + 1)
591
592		case '"':
593			i := 1
594			for ; i < len(args); i++ {
595				if args[i] == '\\' {
596					i++
597					continue
598				}
599				if args[i] == '"' {
600					q, err := strconv.Unquote(args[:i+1])
601					if err != nil {
602						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
603					}
604					path = q
605					trimBytes(i + 1)
606					break Switch
607				}
608			}
609			if i >= len(args) {
610				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
611			}
612		}
613
614		if args != "" {
615			r, _ := utf8.DecodeRuneInString(args)
616			if !unicode.IsSpace(r) {
617				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
618			}
619		}
620		list = append(list, fileEmbed{path, pathPos})
621	}
622	return list, nil
623}
624