xref: /aosp_15_r20/external/starlark-go/syntax/scan_test.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1*4947cdc7SCole Faust// Copyright 2017 The Bazel Authors. All rights reserved.
2*4947cdc7SCole Faust// Use of this source code is governed by a BSD-style
3*4947cdc7SCole Faust// license that can be found in the LICENSE file.
4*4947cdc7SCole Faust
5*4947cdc7SCole Faustpackage syntax
6*4947cdc7SCole Faust
7*4947cdc7SCole Faustimport (
8*4947cdc7SCole Faust	"bytes"
9*4947cdc7SCole Faust	"fmt"
10*4947cdc7SCole Faust	"go/build"
11*4947cdc7SCole Faust	"io/ioutil"
12*4947cdc7SCole Faust	"path/filepath"
13*4947cdc7SCole Faust	"strings"
14*4947cdc7SCole Faust	"testing"
15*4947cdc7SCole Faust)
16*4947cdc7SCole Faust
17*4947cdc7SCole Faustfunc scan(src interface{}) (tokens string, err error) {
18*4947cdc7SCole Faust	sc, err := newScanner("foo.star", src, false)
19*4947cdc7SCole Faust	if err != nil {
20*4947cdc7SCole Faust		return "", err
21*4947cdc7SCole Faust	}
22*4947cdc7SCole Faust
23*4947cdc7SCole Faust	defer sc.recover(&err)
24*4947cdc7SCole Faust
25*4947cdc7SCole Faust	var buf bytes.Buffer
26*4947cdc7SCole Faust	var val tokenValue
27*4947cdc7SCole Faust	for {
28*4947cdc7SCole Faust		tok := sc.nextToken(&val)
29*4947cdc7SCole Faust
30*4947cdc7SCole Faust		if buf.Len() > 0 {
31*4947cdc7SCole Faust			buf.WriteByte(' ')
32*4947cdc7SCole Faust		}
33*4947cdc7SCole Faust		switch tok {
34*4947cdc7SCole Faust		case EOF:
35*4947cdc7SCole Faust			buf.WriteString("EOF")
36*4947cdc7SCole Faust		case IDENT:
37*4947cdc7SCole Faust			buf.WriteString(val.raw)
38*4947cdc7SCole Faust		case INT:
39*4947cdc7SCole Faust			if val.bigInt != nil {
40*4947cdc7SCole Faust				fmt.Fprintf(&buf, "%d", val.bigInt)
41*4947cdc7SCole Faust			} else {
42*4947cdc7SCole Faust				fmt.Fprintf(&buf, "%d", val.int)
43*4947cdc7SCole Faust			}
44*4947cdc7SCole Faust		case FLOAT:
45*4947cdc7SCole Faust			fmt.Fprintf(&buf, "%e", val.float)
46*4947cdc7SCole Faust		case STRING, BYTES:
47*4947cdc7SCole Faust			buf.WriteString(Quote(val.string, tok == BYTES))
48*4947cdc7SCole Faust		default:
49*4947cdc7SCole Faust			buf.WriteString(tok.String())
50*4947cdc7SCole Faust		}
51*4947cdc7SCole Faust		if tok == EOF {
52*4947cdc7SCole Faust			break
53*4947cdc7SCole Faust		}
54*4947cdc7SCole Faust	}
55*4947cdc7SCole Faust	return buf.String(), nil
56*4947cdc7SCole Faust}
57*4947cdc7SCole Faust
58*4947cdc7SCole Faustfunc TestScanner(t *testing.T) {
59*4947cdc7SCole Faust	for _, test := range []struct {
60*4947cdc7SCole Faust		input, want string
61*4947cdc7SCole Faust	}{
62*4947cdc7SCole Faust		{``, "EOF"},
63*4947cdc7SCole Faust		{`123`, "123 EOF"},
64*4947cdc7SCole Faust		{`x.y`, "x . y EOF"},
65*4947cdc7SCole Faust		{`chocolateclair`, `chocolate . éclair EOF`},
66*4947cdc7SCole Faust		{`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
67*4947cdc7SCole Faust		{`print(x)`, "print ( x ) EOF"},
68*4947cdc7SCole Faust		{`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
69*4947cdc7SCole Faust		{"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
70*4947cdc7SCole Faust		{`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
71*4947cdc7SCole Faust		{`# hello
72*4947cdc7SCole Faustprint(x)`, "print ( x ) EOF"},
73*4947cdc7SCole Faust		{`# hello
74*4947cdc7SCole Faustprint(1)
75*4947cdc7SCole Faustcc_binary(name="foo")
76*4947cdc7SCole Faustdef f(x):
77*4947cdc7SCole Faust		return x+1
78*4947cdc7SCole Faustprint(1)
79*4947cdc7SCole Faust`,
80*4947cdc7SCole Faust			`print ( 1 ) newline ` +
81*4947cdc7SCole Faust				`cc_binary ( name = "foo" ) newline ` +
82*4947cdc7SCole Faust				`def f ( x ) : newline ` +
83*4947cdc7SCole Faust				`indent return x + 1 newline ` +
84*4947cdc7SCole Faust				`outdent print ( 1 ) newline ` +
85*4947cdc7SCole Faust				`EOF`},
86*4947cdc7SCole Faust		// EOF should act line an implicit newline.
87*4947cdc7SCole Faust		{`def f(): pass`,
88*4947cdc7SCole Faust			"def f ( ) : pass EOF"},
89*4947cdc7SCole Faust		{`def f():
90*4947cdc7SCole Faust	pass`,
91*4947cdc7SCole Faust			"def f ( ) : newline indent pass newline outdent EOF"},
92*4947cdc7SCole Faust		{`def f():
93*4947cdc7SCole Faust	pass
94*4947cdc7SCole Faust# oops`,
95*4947cdc7SCole Faust			"def f ( ) : newline indent pass newline outdent EOF"},
96*4947cdc7SCole Faust		{`def f():
97*4947cdc7SCole Faust	pass \
98*4947cdc7SCole Faust`,
99*4947cdc7SCole Faust			"def f ( ) : newline indent pass newline outdent EOF"},
100*4947cdc7SCole Faust		{`def f():
101*4947cdc7SCole Faust	pass
102*4947cdc7SCole Faust`,
103*4947cdc7SCole Faust			"def f ( ) : newline indent pass newline outdent EOF"},
104*4947cdc7SCole Faust		{`pass
105*4947cdc7SCole Faust
106*4947cdc7SCole Faust
107*4947cdc7SCole Faustpass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
108*4947cdc7SCole Faust		{`def f():
109*4947cdc7SCole Faust    pass
110*4947cdc7SCole Faust    `, "def f ( ) : newline indent pass newline outdent EOF"},
111*4947cdc7SCole Faust		{`def f():
112*4947cdc7SCole Faust    pass
113*4947cdc7SCole Faust    ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
114*4947cdc7SCole Faust		{"pass", "pass EOF"},
115*4947cdc7SCole Faust		{"pass\n", "pass newline EOF"},
116*4947cdc7SCole Faust		{"pass\n ", "pass newline EOF"},
117*4947cdc7SCole Faust		{"pass\n \n", "pass newline EOF"},
118*4947cdc7SCole Faust		{"if x:\n  pass\n ", "if x : newline indent pass newline outdent EOF"},
119*4947cdc7SCole Faust		{`x = 1 + \
120*4947cdc7SCole Faust2`, `x = 1 + 2 EOF`},
121*4947cdc7SCole Faust		{`x = 'a\nb'`, `x = "a\nb" EOF`},
122*4947cdc7SCole Faust		{`x = r'a\nb'`, `x = "a\\nb" EOF`},
123*4947cdc7SCole Faust		{"x = 'a\\\nb'", `x = "ab" EOF`},
124*4947cdc7SCole Faust		{`x = '\''`, `x = "'" EOF`},
125*4947cdc7SCole Faust		{`x = "\""`, `x = "\"" EOF`},
126*4947cdc7SCole Faust		{`x = r'\''`, `x = "\\'" EOF`},
127*4947cdc7SCole Faust		{`x = '''\''''`, `x = "'" EOF`},
128*4947cdc7SCole Faust		{`x = r'''\''''`, `x = "\\'" EOF`},
129*4947cdc7SCole Faust		{`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
130*4947cdc7SCole Faust		{"x = '''a\nb'''", `x = "a\nb" EOF`},
131*4947cdc7SCole Faust		{"x = '''a\rb'''", `x = "a\nb" EOF`},
132*4947cdc7SCole Faust		{"x = '''a\r\nb'''", `x = "a\nb" EOF`},
133*4947cdc7SCole Faust		{"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
134*4947cdc7SCole Faust		{"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
135*4947cdc7SCole Faust		{"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
136*4947cdc7SCole Faust		{"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
137*4947cdc7SCole Faust		{"a\rb", `a newline b EOF`},
138*4947cdc7SCole Faust		{"a\nb", `a newline b EOF`},
139*4947cdc7SCole Faust		{"a\r\nb", `a newline b EOF`},
140*4947cdc7SCole Faust		{"a\n\nb", `a newline b EOF`},
141*4947cdc7SCole Faust		// numbers
142*4947cdc7SCole Faust		{"0", `0 EOF`},
143*4947cdc7SCole Faust		{"00", `0 EOF`},
144*4947cdc7SCole Faust		{"0.", `0.000000e+00 EOF`},
145*4947cdc7SCole Faust		{"0.e1", `0.000000e+00 EOF`},
146*4947cdc7SCole Faust		{".0", `0.000000e+00 EOF`},
147*4947cdc7SCole Faust		{"0.0", `0.000000e+00 EOF`},
148*4947cdc7SCole Faust		{".e1", `. e1 EOF`},
149*4947cdc7SCole Faust		{"1", `1 EOF`},
150*4947cdc7SCole Faust		{"1.", `1.000000e+00 EOF`},
151*4947cdc7SCole Faust		{".1", `1.000000e-01 EOF`},
152*4947cdc7SCole Faust		{".1e1", `1.000000e+00 EOF`},
153*4947cdc7SCole Faust		{".1e+1", `1.000000e+00 EOF`},
154*4947cdc7SCole Faust		{".1e-1", `1.000000e-02 EOF`},
155*4947cdc7SCole Faust		{"1e1", `1.000000e+01 EOF`},
156*4947cdc7SCole Faust		{"1e+1", `1.000000e+01 EOF`},
157*4947cdc7SCole Faust		{"1e-1", `1.000000e-01 EOF`},
158*4947cdc7SCole Faust		{"123", `123 EOF`},
159*4947cdc7SCole Faust		{"123e45", `1.230000e+47 EOF`},
160*4947cdc7SCole Faust		{"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
161*4947cdc7SCole Faust		{"12345678901234567890", `12345678901234567890 EOF`},
162*4947cdc7SCole Faust		// hex
163*4947cdc7SCole Faust		{"0xA", `10 EOF`},
164*4947cdc7SCole Faust		{"0xAAG", `170 G EOF`},
165*4947cdc7SCole Faust		{"0xG", `foo.star:1:1: invalid hex literal`},
166*4947cdc7SCole Faust		{"0XA", `10 EOF`},
167*4947cdc7SCole Faust		{"0XG", `foo.star:1:1: invalid hex literal`},
168*4947cdc7SCole Faust		{"0xA.", `10 . EOF`},
169*4947cdc7SCole Faust		{"0xA.e1", `10 . e1 EOF`},
170*4947cdc7SCole Faust		{"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
171*4947cdc7SCole Faust		// binary
172*4947cdc7SCole Faust		{"0b1010", `10 EOF`},
173*4947cdc7SCole Faust		{"0B111101", `61 EOF`},
174*4947cdc7SCole Faust		{"0b3", `foo.star:1:3: invalid binary literal`},
175*4947cdc7SCole Faust		{"0b1010201", `10 201 EOF`},
176*4947cdc7SCole Faust		{"0b1010.01", `10 1.000000e-02 EOF`},
177*4947cdc7SCole Faust		{"0b0000", `0 EOF`},
178*4947cdc7SCole Faust		// octal
179*4947cdc7SCole Faust		{"0o123", `83 EOF`},
180*4947cdc7SCole Faust		{"0o12834", `10 834 EOF`},
181*4947cdc7SCole Faust		{"0o12934", `10 934 EOF`},
182*4947cdc7SCole Faust		{"0o12934.", `10 9.340000e+02 EOF`},
183*4947cdc7SCole Faust		{"0o12934.1", `10 9.341000e+02 EOF`},
184*4947cdc7SCole Faust		{"0o12934e1", `10 9.340000e+03 EOF`},
185*4947cdc7SCole Faust		{"0o123.", `83 . EOF`},
186*4947cdc7SCole Faust		{"0o123.1", `83 1.000000e-01 EOF`},
187*4947cdc7SCole Faust		{"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
188*4947cdc7SCole Faust		{"012834", `foo.star:1:1: invalid int literal`},
189*4947cdc7SCole Faust		{"012934", `foo.star:1:1: invalid int literal`},
190*4947cdc7SCole Faust		{"i = 012934", `foo.star:1:5: invalid int literal`},
191*4947cdc7SCole Faust		// octal escapes in string literals
192*4947cdc7SCole Faust		{`"\037"`, `"\x1f" EOF`},
193*4947cdc7SCole Faust		{`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
194*4947cdc7SCole Faust		{`"\378"`, `"\x1f8" EOF`},                               // = '\37' + '8'
195*4947cdc7SCole Faust		{`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
196*4947cdc7SCole Faust		// hex escapes
197*4947cdc7SCole Faust		{`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
198*4947cdc7SCole Faust		{`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
199*4947cdc7SCole Faust		{`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
200*4947cdc7SCole Faust		{`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
201*4947cdc7SCole Faust		{`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
202*4947cdc7SCole Faust		{`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
203*4947cdc7SCole Faust		{`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
204*4947cdc7SCole Faust		// Unicode escapes
205*4947cdc7SCole Faust		// \uXXXX
206*4947cdc7SCole Faust		{`"\u0400"`, `"Ѐ" EOF`},
207*4947cdc7SCole Faust		{`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
208*4947cdc7SCole Faust		{`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
209*4947cdc7SCole Faust		{`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
210*4947cdc7SCole Faust		{`"\u4E16"`, `"世" EOF`},
211*4947cdc7SCole Faust		{`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
212*4947cdc7SCole Faust		// \UXXXXXXXX
213*4947cdc7SCole Faust		{`"\U00000400"`, `"Ѐ" EOF`},
214*4947cdc7SCole Faust		{`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
215*4947cdc7SCole Faust		{`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
216*4947cdc7SCole Faust		{`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
217*4947cdc7SCole Faust		{`"\U0010FFFF"`, `"\U0010ffff" EOF`},
218*4947cdc7SCole Faust		{`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
219*4947cdc7SCole Faust		{`"\U0001F63F"`, `"��" EOF`},
220*4947cdc7SCole Faust		{`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
221*4947cdc7SCole Faust
222*4947cdc7SCole Faust		// backslash escapes
223*4947cdc7SCole Faust		// As in Go, a backslash must escape something.
224*4947cdc7SCole Faust		// (Python started issuing a deprecation warning in 3.6.)
225*4947cdc7SCole Faust		{`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
226*4947cdc7SCole Faust		{`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
227*4947cdc7SCole Faust		{`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
228*4947cdc7SCole Faust		{`"\""`, `"\"" EOF`},
229*4947cdc7SCole Faust		{`"\'"`, `"'" EOF`},
230*4947cdc7SCole Faust		{`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
231*4947cdc7SCole Faust		{`'\''`, `"'" EOF`},
232*4947cdc7SCole Faust		{`'\"'`, `"\"" EOF`},
233*4947cdc7SCole Faust		{`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
234*4947cdc7SCole Faust		{`"""\""""`, `"\"" EOF`},
235*4947cdc7SCole Faust		{`"""\'"""`, `"'" EOF`},
236*4947cdc7SCole Faust		{`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
237*4947cdc7SCole Faust		{`'''\''''`, `"'" EOF`},
238*4947cdc7SCole Faust		{`'''\"'''`, `"\"" EOF`},
239*4947cdc7SCole Faust		{`r"\w"`, `"\\w" EOF`},
240*4947cdc7SCole Faust		{`r"\""`, `"\\\"" EOF`},
241*4947cdc7SCole Faust		{`r"\'"`, `"\\'" EOF`},
242*4947cdc7SCole Faust		{`r'\w'`, `"\\w" EOF`},
243*4947cdc7SCole Faust		{`r'\''`, `"\\'" EOF`},
244*4947cdc7SCole Faust		{`r'\"'`, `"\\\"" EOF`},
245*4947cdc7SCole Faust		{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
246*4947cdc7SCole Faust		{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
247*4947cdc7SCole Faust		// bytes literals (where they differ from text strings)
248*4947cdc7SCole Faust		{`b"AЀ世��"`, `b"AЀ世��`},                                       // 1-4 byte encodings, literal
249*4947cdc7SCole Faust		{`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世��"`},                // same, as escapes
250*4947cdc7SCole Faust		{`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
251*4947cdc7SCole Faust		{`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
252*4947cdc7SCole Faust		{`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
253*4947cdc7SCole Faust		// floats starting with octal digits
254*4947cdc7SCole Faust		{"012934.", `1.293400e+04 EOF`},
255*4947cdc7SCole Faust		{"012934.1", `1.293410e+04 EOF`},
256*4947cdc7SCole Faust		{"012934e1", `1.293400e+05 EOF`},
257*4947cdc7SCole Faust		{"0123.", `1.230000e+02 EOF`},
258*4947cdc7SCole Faust		{"0123.1", `1.231000e+02 EOF`},
259*4947cdc7SCole Faust		// github.com/google/skylark/issues/16
260*4947cdc7SCole Faust		{"x ! 0", "foo.star:1:3: unexpected input character '!'"},
261*4947cdc7SCole Faust		// github.com/google/starlark-go/issues/80
262*4947cdc7SCole Faust		{"([{<>}])", "( [ { < > } ] ) EOF"},
263*4947cdc7SCole Faust		{"f();", "f ( ) ; EOF"},
264*4947cdc7SCole Faust		// github.com/google/starlark-go/issues/104
265*4947cdc7SCole Faust		{"def f():\n  if x:\n    pass\n  ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
266*4947cdc7SCole Faust		{`while cond: pass`, "while cond : pass EOF"},
267*4947cdc7SCole Faust		// github.com/google/starlark-go/issues/107
268*4947cdc7SCole Faust		{"~= ~= 5", "~ = ~ = 5 EOF"},
269*4947cdc7SCole Faust		{"0in", "0 in EOF"},
270*4947cdc7SCole Faust		{"0or", "foo.star:1:3: invalid octal literal"},
271*4947cdc7SCole Faust		{"6in", "6 in EOF"},
272*4947cdc7SCole Faust		{"6or", "6 or EOF"},
273*4947cdc7SCole Faust	} {
274*4947cdc7SCole Faust		got, err := scan(test.input)
275*4947cdc7SCole Faust		if err != nil {
276*4947cdc7SCole Faust			got = err.(Error).Error()
277*4947cdc7SCole Faust		}
278*4947cdc7SCole Faust		// Prefix match allows us to truncate errors in expecations.
279*4947cdc7SCole Faust		// Success cases all end in EOF.
280*4947cdc7SCole Faust		if !strings.HasPrefix(got, test.want) {
281*4947cdc7SCole Faust			t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
282*4947cdc7SCole Faust		}
283*4947cdc7SCole Faust	}
284*4947cdc7SCole Faust}
285*4947cdc7SCole Faust
286*4947cdc7SCole Faust// dataFile is the same as starlarktest.DataFile.
287*4947cdc7SCole Faust// We make a copy to avoid a dependency cycle.
288*4947cdc7SCole Faustvar dataFile = func(pkgdir, filename string) string {
289*4947cdc7SCole Faust	return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
290*4947cdc7SCole Faust}
291*4947cdc7SCole Faust
292*4947cdc7SCole Faustfunc BenchmarkScan(b *testing.B) {
293*4947cdc7SCole Faust	filename := dataFile("syntax", "testdata/scan.star")
294*4947cdc7SCole Faust	b.StopTimer()
295*4947cdc7SCole Faust	data, err := ioutil.ReadFile(filename)
296*4947cdc7SCole Faust	if err != nil {
297*4947cdc7SCole Faust		b.Fatal(err)
298*4947cdc7SCole Faust	}
299*4947cdc7SCole Faust	b.StartTimer()
300*4947cdc7SCole Faust
301*4947cdc7SCole Faust	for i := 0; i < b.N; i++ {
302*4947cdc7SCole Faust		sc, err := newScanner(filename, data, false)
303*4947cdc7SCole Faust		if err != nil {
304*4947cdc7SCole Faust			b.Fatal(err)
305*4947cdc7SCole Faust		}
306*4947cdc7SCole Faust		var val tokenValue
307*4947cdc7SCole Faust		for sc.nextToken(&val) != EOF {
308*4947cdc7SCole Faust		}
309*4947cdc7SCole Faust	}
310*4947cdc7SCole Faust}
311