1*4947cdc7SCole Faust// Copyright 2017 The Bazel Authors. All rights reserved. 2*4947cdc7SCole Faust// Use of this source code is governed by a BSD-style 3*4947cdc7SCole Faust// license that can be found in the LICENSE file. 4*4947cdc7SCole Faust 5*4947cdc7SCole Faustpackage syntax 6*4947cdc7SCole Faust 7*4947cdc7SCole Faustimport ( 8*4947cdc7SCole Faust "bytes" 9*4947cdc7SCole Faust "fmt" 10*4947cdc7SCole Faust "go/build" 11*4947cdc7SCole Faust "io/ioutil" 12*4947cdc7SCole Faust "path/filepath" 13*4947cdc7SCole Faust "strings" 14*4947cdc7SCole Faust "testing" 15*4947cdc7SCole Faust) 16*4947cdc7SCole Faust 17*4947cdc7SCole Faustfunc scan(src interface{}) (tokens string, err error) { 18*4947cdc7SCole Faust sc, err := newScanner("foo.star", src, false) 19*4947cdc7SCole Faust if err != nil { 20*4947cdc7SCole Faust return "", err 21*4947cdc7SCole Faust } 22*4947cdc7SCole Faust 23*4947cdc7SCole Faust defer sc.recover(&err) 24*4947cdc7SCole Faust 25*4947cdc7SCole Faust var buf bytes.Buffer 26*4947cdc7SCole Faust var val tokenValue 27*4947cdc7SCole Faust for { 28*4947cdc7SCole Faust tok := sc.nextToken(&val) 29*4947cdc7SCole Faust 30*4947cdc7SCole Faust if buf.Len() > 0 { 31*4947cdc7SCole Faust buf.WriteByte(' ') 32*4947cdc7SCole Faust } 33*4947cdc7SCole Faust switch tok { 34*4947cdc7SCole Faust case EOF: 35*4947cdc7SCole Faust buf.WriteString("EOF") 36*4947cdc7SCole Faust case IDENT: 37*4947cdc7SCole Faust buf.WriteString(val.raw) 38*4947cdc7SCole Faust case INT: 39*4947cdc7SCole Faust if val.bigInt != nil { 40*4947cdc7SCole Faust fmt.Fprintf(&buf, "%d", val.bigInt) 41*4947cdc7SCole Faust } else { 42*4947cdc7SCole Faust fmt.Fprintf(&buf, "%d", val.int) 43*4947cdc7SCole Faust } 44*4947cdc7SCole Faust case FLOAT: 45*4947cdc7SCole Faust fmt.Fprintf(&buf, "%e", val.float) 46*4947cdc7SCole Faust case STRING, BYTES: 47*4947cdc7SCole Faust buf.WriteString(Quote(val.string, tok == BYTES)) 48*4947cdc7SCole Faust default: 49*4947cdc7SCole Faust buf.WriteString(tok.String()) 50*4947cdc7SCole Faust } 51*4947cdc7SCole Faust if tok == EOF { 52*4947cdc7SCole Faust break 53*4947cdc7SCole Faust } 54*4947cdc7SCole Faust } 55*4947cdc7SCole Faust return buf.String(), nil 56*4947cdc7SCole Faust} 57*4947cdc7SCole Faust 58*4947cdc7SCole Faustfunc TestScanner(t *testing.T) { 59*4947cdc7SCole Faust for _, test := range []struct { 60*4947cdc7SCole Faust input, want string 61*4947cdc7SCole Faust }{ 62*4947cdc7SCole Faust {``, "EOF"}, 63*4947cdc7SCole Faust {`123`, "123 EOF"}, 64*4947cdc7SCole Faust {`x.y`, "x . y EOF"}, 65*4947cdc7SCole Faust {`chocolate.éclair`, `chocolate . éclair EOF`}, 66*4947cdc7SCole Faust {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, 67*4947cdc7SCole Faust {`print(x)`, "print ( x ) EOF"}, 68*4947cdc7SCole Faust {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, 69*4947cdc7SCole Faust {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token 70*4947cdc7SCole Faust {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, 71*4947cdc7SCole Faust {`# hello 72*4947cdc7SCole Faustprint(x)`, "print ( x ) EOF"}, 73*4947cdc7SCole Faust {`# hello 74*4947cdc7SCole Faustprint(1) 75*4947cdc7SCole Faustcc_binary(name="foo") 76*4947cdc7SCole Faustdef f(x): 77*4947cdc7SCole Faust return x+1 78*4947cdc7SCole Faustprint(1) 79*4947cdc7SCole Faust`, 80*4947cdc7SCole Faust `print ( 1 ) newline ` + 81*4947cdc7SCole Faust `cc_binary ( name = "foo" ) newline ` + 82*4947cdc7SCole Faust `def f ( x ) : newline ` + 83*4947cdc7SCole Faust `indent return x + 1 newline ` + 84*4947cdc7SCole Faust `outdent print ( 1 ) newline ` + 85*4947cdc7SCole Faust `EOF`}, 86*4947cdc7SCole Faust // EOF should act line an implicit newline. 87*4947cdc7SCole Faust {`def f(): pass`, 88*4947cdc7SCole Faust "def f ( ) : pass EOF"}, 89*4947cdc7SCole Faust {`def f(): 90*4947cdc7SCole Faust pass`, 91*4947cdc7SCole Faust "def f ( ) : newline indent pass newline outdent EOF"}, 92*4947cdc7SCole Faust {`def f(): 93*4947cdc7SCole Faust pass 94*4947cdc7SCole Faust# oops`, 95*4947cdc7SCole Faust "def f ( ) : newline indent pass newline outdent EOF"}, 96*4947cdc7SCole Faust {`def f(): 97*4947cdc7SCole Faust pass \ 98*4947cdc7SCole Faust`, 99*4947cdc7SCole Faust "def f ( ) : newline indent pass newline outdent EOF"}, 100*4947cdc7SCole Faust {`def f(): 101*4947cdc7SCole Faust pass 102*4947cdc7SCole Faust`, 103*4947cdc7SCole Faust "def f ( ) : newline indent pass newline outdent EOF"}, 104*4947cdc7SCole Faust {`pass 105*4947cdc7SCole Faust 106*4947cdc7SCole Faust 107*4947cdc7SCole Faustpass`, "pass newline pass EOF"}, // consecutive newlines are consolidated 108*4947cdc7SCole Faust {`def f(): 109*4947cdc7SCole Faust pass 110*4947cdc7SCole Faust `, "def f ( ) : newline indent pass newline outdent EOF"}, 111*4947cdc7SCole Faust {`def f(): 112*4947cdc7SCole Faust pass 113*4947cdc7SCole Faust ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, 114*4947cdc7SCole Faust {"pass", "pass EOF"}, 115*4947cdc7SCole Faust {"pass\n", "pass newline EOF"}, 116*4947cdc7SCole Faust {"pass\n ", "pass newline EOF"}, 117*4947cdc7SCole Faust {"pass\n \n", "pass newline EOF"}, 118*4947cdc7SCole Faust {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, 119*4947cdc7SCole Faust {`x = 1 + \ 120*4947cdc7SCole Faust2`, `x = 1 + 2 EOF`}, 121*4947cdc7SCole Faust {`x = 'a\nb'`, `x = "a\nb" EOF`}, 122*4947cdc7SCole Faust {`x = r'a\nb'`, `x = "a\\nb" EOF`}, 123*4947cdc7SCole Faust {"x = 'a\\\nb'", `x = "ab" EOF`}, 124*4947cdc7SCole Faust {`x = '\''`, `x = "'" EOF`}, 125*4947cdc7SCole Faust {`x = "\""`, `x = "\"" EOF`}, 126*4947cdc7SCole Faust {`x = r'\''`, `x = "\\'" EOF`}, 127*4947cdc7SCole Faust {`x = '''\''''`, `x = "'" EOF`}, 128*4947cdc7SCole Faust {`x = r'''\''''`, `x = "\\'" EOF`}, 129*4947cdc7SCole Faust {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, 130*4947cdc7SCole Faust {"x = '''a\nb'''", `x = "a\nb" EOF`}, 131*4947cdc7SCole Faust {"x = '''a\rb'''", `x = "a\nb" EOF`}, 132*4947cdc7SCole Faust {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, 133*4947cdc7SCole Faust {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, 134*4947cdc7SCole Faust {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, 135*4947cdc7SCole Faust {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, 136*4947cdc7SCole Faust {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, 137*4947cdc7SCole Faust {"a\rb", `a newline b EOF`}, 138*4947cdc7SCole Faust {"a\nb", `a newline b EOF`}, 139*4947cdc7SCole Faust {"a\r\nb", `a newline b EOF`}, 140*4947cdc7SCole Faust {"a\n\nb", `a newline b EOF`}, 141*4947cdc7SCole Faust // numbers 142*4947cdc7SCole Faust {"0", `0 EOF`}, 143*4947cdc7SCole Faust {"00", `0 EOF`}, 144*4947cdc7SCole Faust {"0.", `0.000000e+00 EOF`}, 145*4947cdc7SCole Faust {"0.e1", `0.000000e+00 EOF`}, 146*4947cdc7SCole Faust {".0", `0.000000e+00 EOF`}, 147*4947cdc7SCole Faust {"0.0", `0.000000e+00 EOF`}, 148*4947cdc7SCole Faust {".e1", `. e1 EOF`}, 149*4947cdc7SCole Faust {"1", `1 EOF`}, 150*4947cdc7SCole Faust {"1.", `1.000000e+00 EOF`}, 151*4947cdc7SCole Faust {".1", `1.000000e-01 EOF`}, 152*4947cdc7SCole Faust {".1e1", `1.000000e+00 EOF`}, 153*4947cdc7SCole Faust {".1e+1", `1.000000e+00 EOF`}, 154*4947cdc7SCole Faust {".1e-1", `1.000000e-02 EOF`}, 155*4947cdc7SCole Faust {"1e1", `1.000000e+01 EOF`}, 156*4947cdc7SCole Faust {"1e+1", `1.000000e+01 EOF`}, 157*4947cdc7SCole Faust {"1e-1", `1.000000e-01 EOF`}, 158*4947cdc7SCole Faust {"123", `123 EOF`}, 159*4947cdc7SCole Faust {"123e45", `1.230000e+47 EOF`}, 160*4947cdc7SCole Faust {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, 161*4947cdc7SCole Faust {"12345678901234567890", `12345678901234567890 EOF`}, 162*4947cdc7SCole Faust // hex 163*4947cdc7SCole Faust {"0xA", `10 EOF`}, 164*4947cdc7SCole Faust {"0xAAG", `170 G EOF`}, 165*4947cdc7SCole Faust {"0xG", `foo.star:1:1: invalid hex literal`}, 166*4947cdc7SCole Faust {"0XA", `10 EOF`}, 167*4947cdc7SCole Faust {"0XG", `foo.star:1:1: invalid hex literal`}, 168*4947cdc7SCole Faust {"0xA.", `10 . EOF`}, 169*4947cdc7SCole Faust {"0xA.e1", `10 . e1 EOF`}, 170*4947cdc7SCole Faust {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, 171*4947cdc7SCole Faust // binary 172*4947cdc7SCole Faust {"0b1010", `10 EOF`}, 173*4947cdc7SCole Faust {"0B111101", `61 EOF`}, 174*4947cdc7SCole Faust {"0b3", `foo.star:1:3: invalid binary literal`}, 175*4947cdc7SCole Faust {"0b1010201", `10 201 EOF`}, 176*4947cdc7SCole Faust {"0b1010.01", `10 1.000000e-02 EOF`}, 177*4947cdc7SCole Faust {"0b0000", `0 EOF`}, 178*4947cdc7SCole Faust // octal 179*4947cdc7SCole Faust {"0o123", `83 EOF`}, 180*4947cdc7SCole Faust {"0o12834", `10 834 EOF`}, 181*4947cdc7SCole Faust {"0o12934", `10 934 EOF`}, 182*4947cdc7SCole Faust {"0o12934.", `10 9.340000e+02 EOF`}, 183*4947cdc7SCole Faust {"0o12934.1", `10 9.341000e+02 EOF`}, 184*4947cdc7SCole Faust {"0o12934e1", `10 9.340000e+03 EOF`}, 185*4947cdc7SCole Faust {"0o123.", `83 . EOF`}, 186*4947cdc7SCole Faust {"0o123.1", `83 1.000000e-01 EOF`}, 187*4947cdc7SCole Faust {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`}, 188*4947cdc7SCole Faust {"012834", `foo.star:1:1: invalid int literal`}, 189*4947cdc7SCole Faust {"012934", `foo.star:1:1: invalid int literal`}, 190*4947cdc7SCole Faust {"i = 012934", `foo.star:1:5: invalid int literal`}, 191*4947cdc7SCole Faust // octal escapes in string literals 192*4947cdc7SCole Faust {`"\037"`, `"\x1f" EOF`}, 193*4947cdc7SCole Faust {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`}, 194*4947cdc7SCole Faust {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' 195*4947cdc7SCole Faust {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3 196*4947cdc7SCole Faust // hex escapes 197*4947cdc7SCole Faust {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable 198*4947cdc7SCole Faust {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`}, 199*4947cdc7SCole Faust {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`}, 200*4947cdc7SCole Faust {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`}, 201*4947cdc7SCole Faust {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`}, 202*4947cdc7SCole Faust {`"\x"`, `foo.star:1:1: truncated escape sequence \x`}, 203*4947cdc7SCole Faust {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`}, 204*4947cdc7SCole Faust // Unicode escapes 205*4947cdc7SCole Faust // \uXXXX 206*4947cdc7SCole Faust {`"\u0400"`, `"Ѐ" EOF`}, 207*4947cdc7SCole Faust {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`}, 208*4947cdc7SCole Faust {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' 209*4947cdc7SCole Faust {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`}, 210*4947cdc7SCole Faust {`"\u4E16"`, `"世" EOF`}, 211*4947cdc7SCole Faust {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate 212*4947cdc7SCole Faust // \UXXXXXXXX 213*4947cdc7SCole Faust {`"\U00000400"`, `"Ѐ" EOF`}, 214*4947cdc7SCole Faust {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`}, 215*4947cdc7SCole Faust {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0' 216*4947cdc7SCole Faust {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`}, 217*4947cdc7SCole Faust {`"\U0010FFFF"`, `"\U0010ffff" EOF`}, 218*4947cdc7SCole Faust {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`}, 219*4947cdc7SCole Faust {`"\U0001F63F"`, `"" EOF`}, 220*4947cdc7SCole Faust {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate 221*4947cdc7SCole Faust 222*4947cdc7SCole Faust // backslash escapes 223*4947cdc7SCole Faust // As in Go, a backslash must escape something. 224*4947cdc7SCole Faust // (Python started issuing a deprecation warning in 3.6.) 225*4947cdc7SCole Faust {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`}, 226*4947cdc7SCole Faust {`"\+"`, `foo.star:1:1: invalid escape sequence \+`}, 227*4947cdc7SCole Faust {`"\w"`, `foo.star:1:1: invalid escape sequence \w`}, 228*4947cdc7SCole Faust {`"\""`, `"\"" EOF`}, 229*4947cdc7SCole Faust {`"\'"`, `"'" EOF`}, 230*4947cdc7SCole Faust {`'\w'`, `foo.star:1:1: invalid escape sequence \w`}, 231*4947cdc7SCole Faust {`'\''`, `"'" EOF`}, 232*4947cdc7SCole Faust {`'\"'`, `"\"" EOF`}, 233*4947cdc7SCole Faust {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`}, 234*4947cdc7SCole Faust {`"""\""""`, `"\"" EOF`}, 235*4947cdc7SCole Faust {`"""\'"""`, `"'" EOF`}, 236*4947cdc7SCole Faust {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`}, 237*4947cdc7SCole Faust {`'''\''''`, `"'" EOF`}, 238*4947cdc7SCole Faust {`'''\"'''`, `"\"" EOF`}, 239*4947cdc7SCole Faust {`r"\w"`, `"\\w" EOF`}, 240*4947cdc7SCole Faust {`r"\""`, `"\\\"" EOF`}, 241*4947cdc7SCole Faust {`r"\'"`, `"\\'" EOF`}, 242*4947cdc7SCole Faust {`r'\w'`, `"\\w" EOF`}, 243*4947cdc7SCole Faust {`r'\''`, `"\\'" EOF`}, 244*4947cdc7SCole Faust {`r'\"'`, `"\\\"" EOF`}, 245*4947cdc7SCole Faust {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`}, 246*4947cdc7SCole Faust {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`}, 247*4947cdc7SCole Faust // bytes literals (where they differ from text strings) 248*4947cdc7SCole Faust {`b"AЀ世"`, `b"AЀ世`}, // 1-4 byte encodings, literal 249*4947cdc7SCole Faust {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世"`}, // same, as escapes 250*4947cdc7SCole Faust {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII 251*4947cdc7SCole Faust {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`}, 252*4947cdc7SCole Faust {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string) 253*4947cdc7SCole Faust // floats starting with octal digits 254*4947cdc7SCole Faust {"012934.", `1.293400e+04 EOF`}, 255*4947cdc7SCole Faust {"012934.1", `1.293410e+04 EOF`}, 256*4947cdc7SCole Faust {"012934e1", `1.293400e+05 EOF`}, 257*4947cdc7SCole Faust {"0123.", `1.230000e+02 EOF`}, 258*4947cdc7SCole Faust {"0123.1", `1.231000e+02 EOF`}, 259*4947cdc7SCole Faust // github.com/google/skylark/issues/16 260*4947cdc7SCole Faust {"x ! 0", "foo.star:1:3: unexpected input character '!'"}, 261*4947cdc7SCole Faust // github.com/google/starlark-go/issues/80 262*4947cdc7SCole Faust {"([{<>}])", "( [ { < > } ] ) EOF"}, 263*4947cdc7SCole Faust {"f();", "f ( ) ; EOF"}, 264*4947cdc7SCole Faust // github.com/google/starlark-go/issues/104 265*4947cdc7SCole Faust {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`}, 266*4947cdc7SCole Faust {`while cond: pass`, "while cond : pass EOF"}, 267*4947cdc7SCole Faust // github.com/google/starlark-go/issues/107 268*4947cdc7SCole Faust {"~= ~= 5", "~ = ~ = 5 EOF"}, 269*4947cdc7SCole Faust {"0in", "0 in EOF"}, 270*4947cdc7SCole Faust {"0or", "foo.star:1:3: invalid octal literal"}, 271*4947cdc7SCole Faust {"6in", "6 in EOF"}, 272*4947cdc7SCole Faust {"6or", "6 or EOF"}, 273*4947cdc7SCole Faust } { 274*4947cdc7SCole Faust got, err := scan(test.input) 275*4947cdc7SCole Faust if err != nil { 276*4947cdc7SCole Faust got = err.(Error).Error() 277*4947cdc7SCole Faust } 278*4947cdc7SCole Faust // Prefix match allows us to truncate errors in expecations. 279*4947cdc7SCole Faust // Success cases all end in EOF. 280*4947cdc7SCole Faust if !strings.HasPrefix(got, test.want) { 281*4947cdc7SCole Faust t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) 282*4947cdc7SCole Faust } 283*4947cdc7SCole Faust } 284*4947cdc7SCole Faust} 285*4947cdc7SCole Faust 286*4947cdc7SCole Faust// dataFile is the same as starlarktest.DataFile. 287*4947cdc7SCole Faust// We make a copy to avoid a dependency cycle. 288*4947cdc7SCole Faustvar dataFile = func(pkgdir, filename string) string { 289*4947cdc7SCole Faust return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename) 290*4947cdc7SCole Faust} 291*4947cdc7SCole Faust 292*4947cdc7SCole Faustfunc BenchmarkScan(b *testing.B) { 293*4947cdc7SCole Faust filename := dataFile("syntax", "testdata/scan.star") 294*4947cdc7SCole Faust b.StopTimer() 295*4947cdc7SCole Faust data, err := ioutil.ReadFile(filename) 296*4947cdc7SCole Faust if err != nil { 297*4947cdc7SCole Faust b.Fatal(err) 298*4947cdc7SCole Faust } 299*4947cdc7SCole Faust b.StartTimer() 300*4947cdc7SCole Faust 301*4947cdc7SCole Faust for i := 0; i < b.N; i++ { 302*4947cdc7SCole Faust sc, err := newScanner(filename, data, false) 303*4947cdc7SCole Faust if err != nil { 304*4947cdc7SCole Faust b.Fatal(err) 305*4947cdc7SCole Faust } 306*4947cdc7SCole Faust var val tokenValue 307*4947cdc7SCole Faust for sc.nextToken(&val) != EOF { 308*4947cdc7SCole Faust } 309*4947cdc7SCole Faust } 310*4947cdc7SCole Faust} 311