1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfile
6
7import (
8	"bytes"
9	"errors"
10	"fmt"
11	"os"
12	"strconv"
13	"strings"
14	"unicode"
15	"unicode/utf8"
16)
17
18// A Position describes an arbitrary source position in a file, including the
19// file, line, column, and byte offset.
20type Position struct {
21	Line     int // line in input (starting at 1)
22	LineRune int // rune in line (starting at 1)
23	Byte     int // byte in input (starting at 0)
24}
25
26// add returns the position at the end of s, assuming it starts at p.
27func (p Position) add(s string) Position {
28	p.Byte += len(s)
29	if n := strings.Count(s, "\n"); n > 0 {
30		p.Line += n
31		s = s[strings.LastIndex(s, "\n")+1:]
32		p.LineRune = 1
33	}
34	p.LineRune += utf8.RuneCountInString(s)
35	return p
36}
37
38// An Expr represents an input element.
39type Expr interface {
40	// Span returns the start and end position of the expression,
41	// excluding leading or trailing comments.
42	Span() (start, end Position)
43
44	// Comment returns the comments attached to the expression.
45	// This method would normally be named 'Comments' but that
46	// would interfere with embedding a type of the same name.
47	Comment() *Comments
48}
49
50// A Comment represents a single // comment.
51type Comment struct {
52	Start  Position
53	Token  string // without trailing newline
54	Suffix bool   // an end of line (not whole line) comment
55}
56
57// Comments collects the comments associated with an expression.
58type Comments struct {
59	Before []Comment // whole-line comments before this expression
60	Suffix []Comment // end-of-line comments after this expression
61
62	// For top-level expressions only, After lists whole-line
63	// comments following the expression.
64	After []Comment
65}
66
67// Comment returns the receiver. This isn't useful by itself, but
68// a [Comments] struct is embedded into all the expression
69// implementation types, and this gives each of those a Comment
70// method to satisfy the Expr interface.
71func (c *Comments) Comment() *Comments {
72	return c
73}
74
75// A FileSyntax represents an entire go.mod file.
76type FileSyntax struct {
77	Name string // file path
78	Comments
79	Stmt []Expr
80}
81
82func (x *FileSyntax) Span() (start, end Position) {
83	if len(x.Stmt) == 0 {
84		return
85	}
86	start, _ = x.Stmt[0].Span()
87	_, end = x.Stmt[len(x.Stmt)-1].Span()
88	return start, end
89}
90
91// addLine adds a line containing the given tokens to the file.
92//
93// If the first token of the hint matches the first token of the
94// line, the new line is added at the end of the block containing hint,
95// extracting hint into a new block if it is not yet in one.
96//
97// If the hint is non-nil buts its first token does not match,
98// the new line is added after the block containing hint
99// (or hint itself, if not in a block).
100//
101// If no hint is provided, addLine appends the line to the end of
102// the last block with a matching first token,
103// or to the end of the file if no such block exists.
104func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
105	if hint == nil {
106		// If no hint given, add to the last statement of the given type.
107	Loop:
108		for i := len(x.Stmt) - 1; i >= 0; i-- {
109			stmt := x.Stmt[i]
110			switch stmt := stmt.(type) {
111			case *Line:
112				if stmt.Token != nil && stmt.Token[0] == tokens[0] {
113					hint = stmt
114					break Loop
115				}
116			case *LineBlock:
117				if stmt.Token[0] == tokens[0] {
118					hint = stmt
119					break Loop
120				}
121			}
122		}
123	}
124
125	newLineAfter := func(i int) *Line {
126		new := &Line{Token: tokens}
127		if i == len(x.Stmt) {
128			x.Stmt = append(x.Stmt, new)
129		} else {
130			x.Stmt = append(x.Stmt, nil)
131			copy(x.Stmt[i+2:], x.Stmt[i+1:])
132			x.Stmt[i+1] = new
133		}
134		return new
135	}
136
137	if hint != nil {
138		for i, stmt := range x.Stmt {
139			switch stmt := stmt.(type) {
140			case *Line:
141				if stmt == hint {
142					if stmt.Token == nil || stmt.Token[0] != tokens[0] {
143						return newLineAfter(i)
144					}
145
146					// Convert line to line block.
147					stmt.InBlock = true
148					block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
149					stmt.Token = stmt.Token[1:]
150					x.Stmt[i] = block
151					new := &Line{Token: tokens[1:], InBlock: true}
152					block.Line = append(block.Line, new)
153					return new
154				}
155
156			case *LineBlock:
157				if stmt == hint {
158					if stmt.Token[0] != tokens[0] {
159						return newLineAfter(i)
160					}
161
162					new := &Line{Token: tokens[1:], InBlock: true}
163					stmt.Line = append(stmt.Line, new)
164					return new
165				}
166
167				for j, line := range stmt.Line {
168					if line == hint {
169						if stmt.Token[0] != tokens[0] {
170							return newLineAfter(i)
171						}
172
173						// Add new line after hint within the block.
174						stmt.Line = append(stmt.Line, nil)
175						copy(stmt.Line[j+2:], stmt.Line[j+1:])
176						new := &Line{Token: tokens[1:], InBlock: true}
177						stmt.Line[j+1] = new
178						return new
179					}
180				}
181			}
182		}
183	}
184
185	new := &Line{Token: tokens}
186	x.Stmt = append(x.Stmt, new)
187	return new
188}
189
190func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
191	if line.InBlock {
192		tokens = tokens[1:]
193	}
194	line.Token = tokens
195}
196
197// markRemoved modifies line so that it (and its end-of-line comment, if any)
198// will be dropped by (*FileSyntax).Cleanup.
199func (line *Line) markRemoved() {
200	line.Token = nil
201	line.Comments.Suffix = nil
202}
203
204// Cleanup cleans up the file syntax x after any edit operations.
205// To avoid quadratic behavior, (*Line).markRemoved marks the line as dead
206// by setting line.Token = nil but does not remove it from the slice
207// in which it appears. After edits have all been indicated,
208// calling Cleanup cleans out the dead lines.
209func (x *FileSyntax) Cleanup() {
210	w := 0
211	for _, stmt := range x.Stmt {
212		switch stmt := stmt.(type) {
213		case *Line:
214			if stmt.Token == nil {
215				continue
216			}
217		case *LineBlock:
218			ww := 0
219			for _, line := range stmt.Line {
220				if line.Token != nil {
221					stmt.Line[ww] = line
222					ww++
223				}
224			}
225			if ww == 0 {
226				continue
227			}
228			if ww == 1 && len(stmt.RParen.Comments.Before) == 0 {
229				// Collapse block into single line.
230				line := &Line{
231					Comments: Comments{
232						Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
233						Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
234						After:  commentsAdd(stmt.Line[0].After, stmt.After),
235					},
236					Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
237				}
238				x.Stmt[w] = line
239				w++
240				continue
241			}
242			stmt.Line = stmt.Line[:ww]
243		}
244		x.Stmt[w] = stmt
245		w++
246	}
247	x.Stmt = x.Stmt[:w]
248}
249
250func commentsAdd(x, y []Comment) []Comment {
251	return append(x[:len(x):len(x)], y...)
252}
253
254func stringsAdd(x, y []string) []string {
255	return append(x[:len(x):len(x)], y...)
256}
257
258// A CommentBlock represents a top-level block of comments separate
259// from any rule.
260type CommentBlock struct {
261	Comments
262	Start Position
263}
264
265func (x *CommentBlock) Span() (start, end Position) {
266	return x.Start, x.Start
267}
268
269// A Line is a single line of tokens.
270type Line struct {
271	Comments
272	Start   Position
273	Token   []string
274	InBlock bool
275	End     Position
276}
277
278func (x *Line) Span() (start, end Position) {
279	return x.Start, x.End
280}
281
282// A LineBlock is a factored block of lines, like
283//
284//	require (
285//		"x"
286//		"y"
287//	)
288type LineBlock struct {
289	Comments
290	Start  Position
291	LParen LParen
292	Token  []string
293	Line   []*Line
294	RParen RParen
295}
296
297func (x *LineBlock) Span() (start, end Position) {
298	return x.Start, x.RParen.Pos.add(")")
299}
300
301// An LParen represents the beginning of a parenthesized line block.
302// It is a place to store suffix comments.
303type LParen struct {
304	Comments
305	Pos Position
306}
307
308func (x *LParen) Span() (start, end Position) {
309	return x.Pos, x.Pos.add(")")
310}
311
312// An RParen represents the end of a parenthesized line block.
313// It is a place to store whole-line (before) comments.
314type RParen struct {
315	Comments
316	Pos Position
317}
318
319func (x *RParen) Span() (start, end Position) {
320	return x.Pos, x.Pos.add(")")
321}
322
323// An input represents a single input file being parsed.
324type input struct {
325	// Lexing state.
326	filename   string    // name of input file, for errors
327	complete   []byte    // entire input
328	remaining  []byte    // remaining input
329	tokenStart []byte    // token being scanned to end of input
330	token      token     // next token to be returned by lex, peek
331	pos        Position  // current input position
332	comments   []Comment // accumulated comments
333
334	// Parser state.
335	file        *FileSyntax // returned top-level syntax tree
336	parseErrors ErrorList   // errors encountered during parsing
337
338	// Comment assignment state.
339	pre  []Expr // all expressions, in preorder traversal
340	post []Expr // all expressions, in postorder traversal
341}
342
343func newInput(filename string, data []byte) *input {
344	return &input{
345		filename:  filename,
346		complete:  data,
347		remaining: data,
348		pos:       Position{Line: 1, LineRune: 1, Byte: 0},
349	}
350}
351
352// parse parses the input file.
353func parse(file string, data []byte) (f *FileSyntax, err error) {
354	// The parser panics for both routine errors like syntax errors
355	// and for programmer bugs like array index errors.
356	// Turn both into error returns. Catching bug panics is
357	// especially important when processing many files.
358	in := newInput(file, data)
359	defer func() {
360		if e := recover(); e != nil && e != &in.parseErrors {
361			in.parseErrors = append(in.parseErrors, Error{
362				Filename: in.filename,
363				Pos:      in.pos,
364				Err:      fmt.Errorf("internal error: %v", e),
365			})
366		}
367		if err == nil && len(in.parseErrors) > 0 {
368			err = in.parseErrors
369		}
370	}()
371
372	// Prime the lexer by reading in the first token. It will be available
373	// in the next peek() or lex() call.
374	in.readToken()
375
376	// Invoke the parser.
377	in.parseFile()
378	if len(in.parseErrors) > 0 {
379		return nil, in.parseErrors
380	}
381	in.file.Name = in.filename
382
383	// Assign comments to nearby syntax.
384	in.assignComments()
385
386	return in.file, nil
387}
388
389// Error is called to report an error.
390// Error does not return: it panics.
391func (in *input) Error(s string) {
392	in.parseErrors = append(in.parseErrors, Error{
393		Filename: in.filename,
394		Pos:      in.pos,
395		Err:      errors.New(s),
396	})
397	panic(&in.parseErrors)
398}
399
400// eof reports whether the input has reached end of file.
401func (in *input) eof() bool {
402	return len(in.remaining) == 0
403}
404
405// peekRune returns the next rune in the input without consuming it.
406func (in *input) peekRune() int {
407	if len(in.remaining) == 0 {
408		return 0
409	}
410	r, _ := utf8.DecodeRune(in.remaining)
411	return int(r)
412}
413
414// peekPrefix reports whether the remaining input begins with the given prefix.
415func (in *input) peekPrefix(prefix string) bool {
416	// This is like bytes.HasPrefix(in.remaining, []byte(prefix))
417	// but without the allocation of the []byte copy of prefix.
418	for i := 0; i < len(prefix); i++ {
419		if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
420			return false
421		}
422	}
423	return true
424}
425
426// readRune consumes and returns the next rune in the input.
427func (in *input) readRune() int {
428	if len(in.remaining) == 0 {
429		in.Error("internal lexer error: readRune at EOF")
430	}
431	r, size := utf8.DecodeRune(in.remaining)
432	in.remaining = in.remaining[size:]
433	if r == '\n' {
434		in.pos.Line++
435		in.pos.LineRune = 1
436	} else {
437		in.pos.LineRune++
438	}
439	in.pos.Byte += size
440	return int(r)
441}
442
443type token struct {
444	kind   tokenKind
445	pos    Position
446	endPos Position
447	text   string
448}
449
450type tokenKind int
451
452const (
453	_EOF tokenKind = -(iota + 1)
454	_EOLCOMMENT
455	_IDENT
456	_STRING
457	_COMMENT
458
459	// newlines and punctuation tokens are allowed as ASCII codes.
460)
461
462func (k tokenKind) isComment() bool {
463	return k == _COMMENT || k == _EOLCOMMENT
464}
465
466// isEOL returns whether a token terminates a line.
467func (k tokenKind) isEOL() bool {
468	return k == _EOF || k == _EOLCOMMENT || k == '\n'
469}
470
471// startToken marks the beginning of the next input token.
472// It must be followed by a call to endToken, once the token's text has
473// been consumed using readRune.
474func (in *input) startToken() {
475	in.tokenStart = in.remaining
476	in.token.text = ""
477	in.token.pos = in.pos
478}
479
480// endToken marks the end of an input token.
481// It records the actual token string in tok.text.
482// A single trailing newline (LF or CRLF) will be removed from comment tokens.
483func (in *input) endToken(kind tokenKind) {
484	in.token.kind = kind
485	text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
486	if kind.isComment() {
487		if strings.HasSuffix(text, "\r\n") {
488			text = text[:len(text)-2]
489		} else {
490			text = strings.TrimSuffix(text, "\n")
491		}
492	}
493	in.token.text = text
494	in.token.endPos = in.pos
495}
496
497// peek returns the kind of the next token returned by lex.
498func (in *input) peek() tokenKind {
499	return in.token.kind
500}
501
502// lex is called from the parser to obtain the next input token.
503func (in *input) lex() token {
504	tok := in.token
505	in.readToken()
506	return tok
507}
508
509// readToken lexes the next token from the text and stores it in in.token.
510func (in *input) readToken() {
511	// Skip past spaces, stopping at non-space or EOF.
512	for !in.eof() {
513		c := in.peekRune()
514		if c == ' ' || c == '\t' || c == '\r' {
515			in.readRune()
516			continue
517		}
518
519		// Comment runs to end of line.
520		if in.peekPrefix("//") {
521			in.startToken()
522
523			// Is this comment the only thing on its line?
524			// Find the last \n before this // and see if it's all
525			// spaces from there to here.
526			i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
527			suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
528			in.readRune()
529			in.readRune()
530
531			// Consume comment.
532			for len(in.remaining) > 0 && in.readRune() != '\n' {
533			}
534
535			// If we are at top level (not in a statement), hand the comment to
536			// the parser as a _COMMENT token. The grammar is written
537			// to handle top-level comments itself.
538			if !suffix {
539				in.endToken(_COMMENT)
540				return
541			}
542
543			// Otherwise, save comment for later attachment to syntax tree.
544			in.endToken(_EOLCOMMENT)
545			in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
546			return
547		}
548
549		if in.peekPrefix("/*") {
550			in.Error("mod files must use // comments (not /* */ comments)")
551		}
552
553		// Found non-space non-comment.
554		break
555	}
556
557	// Found the beginning of the next token.
558	in.startToken()
559
560	// End of file.
561	if in.eof() {
562		in.endToken(_EOF)
563		return
564	}
565
566	// Punctuation tokens.
567	switch c := in.peekRune(); c {
568	case '\n', '(', ')', '[', ']', '{', '}', ',':
569		in.readRune()
570		in.endToken(tokenKind(c))
571		return
572
573	case '"', '`': // quoted string
574		quote := c
575		in.readRune()
576		for {
577			if in.eof() {
578				in.pos = in.token.pos
579				in.Error("unexpected EOF in string")
580			}
581			if in.peekRune() == '\n' {
582				in.Error("unexpected newline in string")
583			}
584			c := in.readRune()
585			if c == quote {
586				break
587			}
588			if c == '\\' && quote != '`' {
589				if in.eof() {
590					in.pos = in.token.pos
591					in.Error("unexpected EOF in string")
592				}
593				in.readRune()
594			}
595		}
596		in.endToken(_STRING)
597		return
598	}
599
600	// Checked all punctuation. Must be identifier token.
601	if c := in.peekRune(); !isIdent(c) {
602		in.Error(fmt.Sprintf("unexpected input character %#q", c))
603	}
604
605	// Scan over identifier.
606	for isIdent(in.peekRune()) {
607		if in.peekPrefix("//") {
608			break
609		}
610		if in.peekPrefix("/*") {
611			in.Error("mod files must use // comments (not /* */ comments)")
612		}
613		in.readRune()
614	}
615	in.endToken(_IDENT)
616}
617
618// isIdent reports whether c is an identifier rune.
619// We treat most printable runes as identifier runes, except for a handful of
620// ASCII punctuation characters.
621func isIdent(c int) bool {
622	switch r := rune(c); r {
623	case ' ', '(', ')', '[', ']', '{', '}', ',':
624		return false
625	default:
626		return !unicode.IsSpace(r) && unicode.IsPrint(r)
627	}
628}
629
630// Comment assignment.
631// We build two lists of all subexpressions, preorder and postorder.
632// The preorder list is ordered by start location, with outer expressions first.
633// The postorder list is ordered by end location, with outer expressions last.
634// We use the preorder list to assign each whole-line comment to the syntax
635// immediately following it, and we use the postorder list to assign each
636// end-of-line comment to the syntax immediately preceding it.
637
638// order walks the expression adding it and its subexpressions to the
639// preorder and postorder lists.
640func (in *input) order(x Expr) {
641	if x != nil {
642		in.pre = append(in.pre, x)
643	}
644	switch x := x.(type) {
645	default:
646		panic(fmt.Errorf("order: unexpected type %T", x))
647	case nil:
648		// nothing
649	case *LParen, *RParen:
650		// nothing
651	case *CommentBlock:
652		// nothing
653	case *Line:
654		// nothing
655	case *FileSyntax:
656		for _, stmt := range x.Stmt {
657			in.order(stmt)
658		}
659	case *LineBlock:
660		in.order(&x.LParen)
661		for _, l := range x.Line {
662			in.order(l)
663		}
664		in.order(&x.RParen)
665	}
666	if x != nil {
667		in.post = append(in.post, x)
668	}
669}
670
671// assignComments attaches comments to nearby syntax.
672func (in *input) assignComments() {
673	const debug = false
674
675	// Generate preorder and postorder lists.
676	in.order(in.file)
677
678	// Split into whole-line comments and suffix comments.
679	var line, suffix []Comment
680	for _, com := range in.comments {
681		if com.Suffix {
682			suffix = append(suffix, com)
683		} else {
684			line = append(line, com)
685		}
686	}
687
688	if debug {
689		for _, c := range line {
690			fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
691		}
692	}
693
694	// Assign line comments to syntax immediately following.
695	for _, x := range in.pre {
696		start, _ := x.Span()
697		if debug {
698			fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
699		}
700		xcom := x.Comment()
701		for len(line) > 0 && start.Byte >= line[0].Start.Byte {
702			if debug {
703				fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
704			}
705			xcom.Before = append(xcom.Before, line[0])
706			line = line[1:]
707		}
708	}
709
710	// Remaining line comments go at end of file.
711	in.file.After = append(in.file.After, line...)
712
713	if debug {
714		for _, c := range suffix {
715			fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
716		}
717	}
718
719	// Assign suffix comments to syntax immediately before.
720	for i := len(in.post) - 1; i >= 0; i-- {
721		x := in.post[i]
722
723		start, end := x.Span()
724		if debug {
725			fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
726		}
727
728		// Do not assign suffix comments to end of line block or whole file.
729		// Instead assign them to the last element inside.
730		switch x.(type) {
731		case *FileSyntax:
732			continue
733		}
734
735		// Do not assign suffix comments to something that starts
736		// on an earlier line, so that in
737		//
738		//	x ( y
739		//		z ) // comment
740		//
741		// we assign the comment to z and not to x ( ... ).
742		if start.Line != end.Line {
743			continue
744		}
745		xcom := x.Comment()
746		for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
747			if debug {
748				fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
749			}
750			xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
751			suffix = suffix[:len(suffix)-1]
752		}
753	}
754
755	// We assigned suffix comments in reverse.
756	// If multiple suffix comments were appended to the same
757	// expression node, they are now in reverse. Fix that.
758	for _, x := range in.post {
759		reverseComments(x.Comment().Suffix)
760	}
761
762	// Remaining suffix comments go at beginning of file.
763	in.file.Before = append(in.file.Before, suffix...)
764}
765
766// reverseComments reverses the []Comment list.
767func reverseComments(list []Comment) {
768	for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
769		list[i], list[j] = list[j], list[i]
770	}
771}
772
773func (in *input) parseFile() {
774	in.file = new(FileSyntax)
775	var cb *CommentBlock
776	for {
777		switch in.peek() {
778		case '\n':
779			in.lex()
780			if cb != nil {
781				in.file.Stmt = append(in.file.Stmt, cb)
782				cb = nil
783			}
784		case _COMMENT:
785			tok := in.lex()
786			if cb == nil {
787				cb = &CommentBlock{Start: tok.pos}
788			}
789			com := cb.Comment()
790			com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
791		case _EOF:
792			if cb != nil {
793				in.file.Stmt = append(in.file.Stmt, cb)
794			}
795			return
796		default:
797			in.parseStmt()
798			if cb != nil {
799				in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
800				cb = nil
801			}
802		}
803	}
804}
805
806func (in *input) parseStmt() {
807	tok := in.lex()
808	start := tok.pos
809	end := tok.endPos
810	tokens := []string{tok.text}
811	for {
812		tok := in.lex()
813		switch {
814		case tok.kind.isEOL():
815			in.file.Stmt = append(in.file.Stmt, &Line{
816				Start: start,
817				Token: tokens,
818				End:   end,
819			})
820			return
821
822		case tok.kind == '(':
823			if next := in.peek(); next.isEOL() {
824				// Start of block: no more tokens on this line.
825				in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
826				return
827			} else if next == ')' {
828				rparen := in.lex()
829				if in.peek().isEOL() {
830					// Empty block.
831					in.lex()
832					in.file.Stmt = append(in.file.Stmt, &LineBlock{
833						Start:  start,
834						Token:  tokens,
835						LParen: LParen{Pos: tok.pos},
836						RParen: RParen{Pos: rparen.pos},
837					})
838					return
839				}
840				// '( )' in the middle of the line, not a block.
841				tokens = append(tokens, tok.text, rparen.text)
842			} else {
843				// '(' in the middle of the line, not a block.
844				tokens = append(tokens, tok.text)
845			}
846
847		default:
848			tokens = append(tokens, tok.text)
849			end = tok.endPos
850		}
851	}
852}
853
854func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
855	x := &LineBlock{
856		Start:  start,
857		Token:  token,
858		LParen: LParen{Pos: lparen.pos},
859	}
860	var comments []Comment
861	for {
862		switch in.peek() {
863		case _EOLCOMMENT:
864			// Suffix comment, will be attached later by assignComments.
865			in.lex()
866		case '\n':
867			// Blank line. Add an empty comment to preserve it.
868			in.lex()
869			if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
870				comments = append(comments, Comment{})
871			}
872		case _COMMENT:
873			tok := in.lex()
874			comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
875		case _EOF:
876			in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
877		case ')':
878			rparen := in.lex()
879			x.RParen.Before = comments
880			x.RParen.Pos = rparen.pos
881			if !in.peek().isEOL() {
882				in.Error("syntax error (expected newline after closing paren)")
883			}
884			in.lex()
885			return x
886		default:
887			l := in.parseLine()
888			x.Line = append(x.Line, l)
889			l.Comment().Before = comments
890			comments = nil
891		}
892	}
893}
894
895func (in *input) parseLine() *Line {
896	tok := in.lex()
897	if tok.kind.isEOL() {
898		in.Error("internal parse error: parseLine at end of line")
899	}
900	start := tok.pos
901	end := tok.endPos
902	tokens := []string{tok.text}
903	for {
904		tok := in.lex()
905		if tok.kind.isEOL() {
906			return &Line{
907				Start:   start,
908				Token:   tokens,
909				End:     end,
910				InBlock: true,
911			}
912		}
913		tokens = append(tokens, tok.text)
914		end = tok.endPos
915	}
916}
917
918var (
919	slashSlash = []byte("//")
920	moduleStr  = []byte("module")
921)
922
923// ModulePath returns the module path from the gomod file text.
924// If it cannot find a module path, it returns an empty string.
925// It is tolerant of unrelated problems in the go.mod file.
926func ModulePath(mod []byte) string {
927	for len(mod) > 0 {
928		line := mod
929		mod = nil
930		if i := bytes.IndexByte(line, '\n'); i >= 0 {
931			line, mod = line[:i], line[i+1:]
932		}
933		if i := bytes.Index(line, slashSlash); i >= 0 {
934			line = line[:i]
935		}
936		line = bytes.TrimSpace(line)
937		if !bytes.HasPrefix(line, moduleStr) {
938			continue
939		}
940		line = line[len(moduleStr):]
941		n := len(line)
942		line = bytes.TrimSpace(line)
943		if len(line) == n || len(line) == 0 {
944			continue
945		}
946
947		if line[0] == '"' || line[0] == '`' {
948			p, err := strconv.Unquote(string(line))
949			if err != nil {
950				return "" // malformed quoted string or multiline module path
951			}
952			return p
953		}
954
955		return string(line)
956	}
957	return "" // missing module path
958}
959