xref: /aosp_15_r20/external/bazelbuild-rules_go/go/tools/builders/read.go (revision 9bb1b549b6a84214c53be0924760be030e66b93a)
1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file was adapted from Go src/go/build/read.go at commit 8634a234df2a
6// on 2021-01-26. It's used to extract metadata from .go files without requiring
7// them to be in the same directory.
8
9package main
10
11import (
12	"bufio"
13	"errors"
14	"fmt"
15	"go/ast"
16	"go/parser"
17	"go/token"
18	"io"
19	"strconv"
20	"strings"
21	"unicode"
22	"unicode/utf8"
23)
24
25type importReader struct {
26	b    *bufio.Reader
27	buf  []byte
28	peek byte
29	err  error
30	eof  bool
31	nerr int
32	pos  token.Position
33}
34
35func newImportReader(name string, r io.Reader) *importReader {
36	return &importReader{
37		b: bufio.NewReader(r),
38		pos: token.Position{
39			Filename: name,
40			Line:     1,
41			Column:   1,
42		},
43	}
44}
45
46func isIdent(c byte) bool {
47	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
48}
49
50var (
51	errSyntax = errors.New("syntax error")
52	errNUL    = errors.New("unexpected NUL in input")
53)
54
55// syntaxError records a syntax error, but only if an I/O error has not already been recorded.
56func (r *importReader) syntaxError() {
57	if r.err == nil {
58		r.err = errSyntax
59	}
60}
61
62// readByte reads the next byte from the input, saves it in buf, and returns it.
63// If an error occurs, readByte records the error in r.err and returns 0.
64func (r *importReader) readByte() byte {
65	c, err := r.b.ReadByte()
66	if err == nil {
67		r.buf = append(r.buf, c)
68		if c == 0 {
69			err = errNUL
70		}
71	}
72	if err != nil {
73		if err == io.EOF {
74			r.eof = true
75		} else if r.err == nil {
76			r.err = err
77		}
78		c = 0
79	}
80	return c
81}
82
83// readByteNoBuf is like readByte but doesn't buffer the byte.
84// It exhausts r.buf before reading from r.b.
85func (r *importReader) readByteNoBuf() byte {
86	var c byte
87	var err error
88	if len(r.buf) > 0 {
89		c = r.buf[0]
90		r.buf = r.buf[1:]
91	} else {
92		c, err = r.b.ReadByte()
93		if err == nil && c == 0 {
94			err = errNUL
95		}
96	}
97
98	if err != nil {
99		if err == io.EOF {
100			r.eof = true
101		} else if r.err == nil {
102			r.err = err
103		}
104		return 0
105	}
106	r.pos.Offset++
107	if c == '\n' {
108		r.pos.Line++
109		r.pos.Column = 1
110	} else {
111		r.pos.Column++
112	}
113	return c
114}
115
116// peekByte returns the next byte from the input reader but does not advance beyond it.
117// If skipSpace is set, peekByte skips leading spaces and comments.
118func (r *importReader) peekByte(skipSpace bool) byte {
119	if r.err != nil {
120		if r.nerr++; r.nerr > 10000 {
121			panic("go/build: import reader looping")
122		}
123		return 0
124	}
125
126	// Use r.peek as first input byte.
127	// Don't just return r.peek here: it might have been left by peekByte(false)
128	// and this might be peekByte(true).
129	c := r.peek
130	if c == 0 {
131		c = r.readByte()
132	}
133	for r.err == nil && !r.eof {
134		if skipSpace {
135			// For the purposes of this reader, semicolons are never necessary to
136			// understand the input and are treated as spaces.
137			switch c {
138			case ' ', '\f', '\t', '\r', '\n', ';':
139				c = r.readByte()
140				continue
141
142			case '/':
143				c = r.readByte()
144				if c == '/' {
145					for c != '\n' && r.err == nil && !r.eof {
146						c = r.readByte()
147					}
148				} else if c == '*' {
149					var c1 byte
150					for (c != '*' || c1 != '/') && r.err == nil {
151						if r.eof {
152							r.syntaxError()
153						}
154						c, c1 = c1, r.readByte()
155					}
156				} else {
157					r.syntaxError()
158				}
159				c = r.readByte()
160				continue
161			}
162		}
163		break
164	}
165	r.peek = c
166	return r.peek
167}
168
169// nextByte is like peekByte but advances beyond the returned byte.
170func (r *importReader) nextByte(skipSpace bool) byte {
171	c := r.peekByte(skipSpace)
172	r.peek = 0
173	return c
174}
175
176var goEmbed = []byte("go:embed")
177
178// findEmbed advances the input reader to the next //go:embed comment.
179// It reports whether it found a comment.
180// (Otherwise it found an error or EOF.)
181func (r *importReader) findEmbed(first bool) bool {
182	// The import block scan stopped after a non-space character,
183	// so the reader is not at the start of a line on the first call.
184	// After that, each //go:embed extraction leaves the reader
185	// at the end of a line.
186	startLine := !first
187	var c byte
188	for r.err == nil && !r.eof {
189		c = r.readByteNoBuf()
190	Reswitch:
191		switch c {
192		default:
193			startLine = false
194
195		case '\n':
196			startLine = true
197
198		case ' ', '\t':
199			// leave startLine alone
200
201		case '"':
202			startLine = false
203			for r.err == nil {
204				if r.eof {
205					r.syntaxError()
206				}
207				c = r.readByteNoBuf()
208				if c == '\\' {
209					r.readByteNoBuf()
210					if r.err != nil {
211						r.syntaxError()
212						return false
213					}
214					continue
215				}
216				if c == '"' {
217					c = r.readByteNoBuf()
218					goto Reswitch
219				}
220			}
221			goto Reswitch
222
223		case '`':
224			startLine = false
225			for r.err == nil {
226				if r.eof {
227					r.syntaxError()
228				}
229				c = r.readByteNoBuf()
230				if c == '`' {
231					c = r.readByteNoBuf()
232					goto Reswitch
233				}
234			}
235
236		case '/':
237			c = r.readByteNoBuf()
238			switch c {
239			default:
240				startLine = false
241				goto Reswitch
242
243			case '*':
244				var c1 byte
245				for (c != '*' || c1 != '/') && r.err == nil {
246					if r.eof {
247						r.syntaxError()
248					}
249					c, c1 = c1, r.readByteNoBuf()
250				}
251				startLine = false
252
253			case '/':
254				if startLine {
255					// Try to read this as a //go:embed comment.
256					for i := range goEmbed {
257						c = r.readByteNoBuf()
258						if c != goEmbed[i] {
259							goto SkipSlashSlash
260						}
261					}
262					c = r.readByteNoBuf()
263					if c == ' ' || c == '\t' {
264						// Found one!
265						return true
266					}
267				}
268			SkipSlashSlash:
269				for c != '\n' && r.err == nil && !r.eof {
270					c = r.readByteNoBuf()
271				}
272				startLine = true
273			}
274		}
275	}
276	return false
277}
278
279// readKeyword reads the given keyword from the input.
280// If the keyword is not present, readKeyword records a syntax error.
281func (r *importReader) readKeyword(kw string) {
282	r.peekByte(true)
283	for i := 0; i < len(kw); i++ {
284		if r.nextByte(false) != kw[i] {
285			r.syntaxError()
286			return
287		}
288	}
289	if isIdent(r.peekByte(false)) {
290		r.syntaxError()
291	}
292}
293
294// readIdent reads an identifier from the input.
295// If an identifier is not present, readIdent records a syntax error.
296func (r *importReader) readIdent() {
297	c := r.peekByte(true)
298	if !isIdent(c) {
299		r.syntaxError()
300		return
301	}
302	for isIdent(r.peekByte(false)) {
303		r.peek = 0
304	}
305}
306
307// readString reads a quoted string literal from the input.
308// If an identifier is not present, readString records a syntax error.
309func (r *importReader) readString() {
310	switch r.nextByte(true) {
311	case '`':
312		for r.err == nil {
313			if r.nextByte(false) == '`' {
314				break
315			}
316			if r.eof {
317				r.syntaxError()
318			}
319		}
320	case '"':
321		for r.err == nil {
322			c := r.nextByte(false)
323			if c == '"' {
324				break
325			}
326			if r.eof || c == '\n' {
327				r.syntaxError()
328			}
329			if c == '\\' {
330				r.nextByte(false)
331			}
332		}
333	default:
334		r.syntaxError()
335	}
336}
337
338// readImport reads an import clause - optional identifier followed by quoted string -
339// from the input.
340func (r *importReader) readImport() {
341	c := r.peekByte(true)
342	if c == '.' {
343		r.peek = 0
344	} else if isIdent(c) {
345		r.readIdent()
346	}
347	r.readString()
348}
349
350// readComments is like io.ReadAll, except that it only reads the leading
351// block of comments in the file.
352func readComments(f io.Reader) ([]byte, error) {
353	r := newImportReader("", f)
354	r.peekByte(true)
355	if r.err == nil && !r.eof {
356		// Didn't reach EOF, so must have found a non-space byte. Remove it.
357		r.buf = r.buf[:len(r.buf)-1]
358	}
359	return r.buf, r.err
360}
361
362// readGoInfo expects a Go file as input and reads the file up to and including the import section.
363// It records what it learned in *info.
364// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
365// info.imports, info.embeds, and info.embedErr.
366//
367// It only returns an error if there are problems reading the file,
368// not for syntax errors in the file itself.
369func readGoInfo(f io.Reader, info *fileInfo) error {
370	r := newImportReader(info.filename, f)
371
372	r.readKeyword("package")
373	r.readIdent()
374	for r.peekByte(true) == 'i' {
375		r.readKeyword("import")
376		if r.peekByte(true) == '(' {
377			r.nextByte(false)
378			for r.peekByte(true) != ')' && r.err == nil {
379				r.readImport()
380			}
381			r.nextByte(false)
382		} else {
383			r.readImport()
384		}
385	}
386
387	info.header = r.buf
388
389	// If we stopped successfully before EOF, we read a byte that told us we were done.
390	// Return all but that last byte, which would cause a syntax error if we let it through.
391	if r.err == nil && !r.eof {
392		info.header = r.buf[:len(r.buf)-1]
393	}
394
395	// If we stopped for a syntax error, consume the whole file so that
396	// we are sure we don't change the errors that go/parser returns.
397	if r.err == errSyntax {
398		r.err = nil
399		for r.err == nil && !r.eof {
400			r.readByte()
401		}
402		info.header = r.buf
403	}
404	if r.err != nil {
405		return r.err
406	}
407
408	if info.fset == nil {
409		return nil
410	}
411
412	// Parse file header & record imports.
413	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.filename, info.header, parser.ImportsOnly|parser.ParseComments)
414	if info.parseErr != nil {
415		return nil
416	}
417	info.pkg = info.parsed.Name.Name
418
419	hasEmbed := false
420	for _, decl := range info.parsed.Decls {
421		d, ok := decl.(*ast.GenDecl)
422		if !ok {
423			continue
424		}
425		for _, dspec := range d.Specs {
426			spec, ok := dspec.(*ast.ImportSpec)
427			if !ok {
428				continue
429			}
430			quoted := spec.Path.Value
431			path, err := strconv.Unquote(quoted)
432			if err != nil {
433				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
434			}
435			if path == "embed" {
436				hasEmbed = true
437			}
438
439			doc := spec.Doc
440			if doc == nil && len(d.Specs) == 1 {
441				doc = d.Doc
442			}
443			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
444		}
445	}
446
447	// If the file imports "embed",
448	// we have to look for //go:embed comments
449	// in the remainder of the file.
450	// The compiler will enforce the mapping of comments to
451	// declared variables. We just need to know the patterns.
452	// If there were //go:embed comments earlier in the file
453	// (near the package statement or imports), the compiler
454	// will reject them. They can be (and have already been) ignored.
455	if hasEmbed {
456		var line []byte
457		for first := true; r.findEmbed(first); first = false {
458			line = line[:0]
459			pos := r.pos
460			for {
461				c := r.readByteNoBuf()
462				if c == '\n' || r.err != nil || r.eof {
463					break
464				}
465				line = append(line, c)
466			}
467			// Add args if line is well-formed.
468			// Ignore badly-formed lines - the compiler will report them when it finds them,
469			// and we can pretend they are not there to help go list succeed with what it knows.
470			embs, err := parseGoEmbed(string(line), pos)
471			if err == nil {
472				info.embeds = append(info.embeds, embs...)
473			}
474		}
475	}
476
477	return nil
478}
479
480// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
481// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
482// This is based on a similar function in cmd/compile/internal/gc/noder.go;
483// this version calculates position information as well.
484func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
485	trimBytes := func(n int) {
486		pos.Offset += n
487		pos.Column += utf8.RuneCountInString(args[:n])
488		args = args[n:]
489	}
490	trimSpace := func() {
491		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
492		trimBytes(len(args) - len(trim))
493	}
494
495	var list []fileEmbed
496	for trimSpace(); args != ""; trimSpace() {
497		var path string
498		pathPos := pos
499	Switch:
500		switch args[0] {
501		default:
502			i := len(args)
503			for j, c := range args {
504				if unicode.IsSpace(c) {
505					i = j
506					break
507				}
508			}
509			path = args[:i]
510			trimBytes(i)
511
512		case '`':
513			i := strings.Index(args[1:], "`")
514			if i < 0 {
515				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
516			}
517			path = args[1 : 1+i]
518			trimBytes(1 + i + 1)
519
520		case '"':
521			i := 1
522			for ; i < len(args); i++ {
523				if args[i] == '\\' {
524					i++
525					continue
526				}
527				if args[i] == '"' {
528					q, err := strconv.Unquote(args[:i+1])
529					if err != nil {
530						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
531					}
532					path = q
533					trimBytes(i + 1)
534					break Switch
535				}
536			}
537			if i >= len(args) {
538				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
539			}
540		}
541
542		if args != "" {
543			r, _ := utf8.DecodeRuneInString(args)
544			if !unicode.IsSpace(r) {
545				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
546			}
547		}
548		list = append(list, fileEmbed{path, pathPos})
549	}
550	return list, nil
551}
552