1*4947cdc7SCole Faust// Copyright 2017 The Bazel Authors. All rights reserved. 2*4947cdc7SCole Faust// Use of this source code is governed by a BSD-style 3*4947cdc7SCole Faust// license that can be found in the LICENSE file. 4*4947cdc7SCole Faust 5*4947cdc7SCole Faustpackage syntax 6*4947cdc7SCole Faust 7*4947cdc7SCole Faust// This file defines a recursive-descent parser for Starlark. 8*4947cdc7SCole Faust// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7. 9*4947cdc7SCole Faust// 10*4947cdc7SCole Faust// TODO(adonovan): use syntax.Error more systematically throughout the 11*4947cdc7SCole Faust// package. Verify that error positions are correct using the 12*4947cdc7SCole Faust// chunkedfile mechanism. 13*4947cdc7SCole Faust 14*4947cdc7SCole Faustimport "log" 15*4947cdc7SCole Faust 16*4947cdc7SCole Faust// Enable this flag to print the token stream and log.Fatal on the first error. 17*4947cdc7SCole Faustconst debug = false 18*4947cdc7SCole Faust 19*4947cdc7SCole Faust// A Mode value is a set of flags (or 0) that controls optional parser functionality. 20*4947cdc7SCole Fausttype Mode uint 21*4947cdc7SCole Faust 22*4947cdc7SCole Faustconst ( 23*4947cdc7SCole Faust RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 24*4947cdc7SCole Faust) 25*4947cdc7SCole Faust 26*4947cdc7SCole Faust// Parse parses the input data and returns the corresponding parse tree. 27*4947cdc7SCole Faust// 28*4947cdc7SCole Faust// If src != nil, ParseFile parses the source from src and the filename 29*4947cdc7SCole Faust// is only used when recording position information. 30*4947cdc7SCole Faust// The type of the argument for the src parameter must be string, 31*4947cdc7SCole Faust// []byte, io.Reader, or FilePortion. 32*4947cdc7SCole Faust// If src == nil, ParseFile parses the file specified by filename. 33*4947cdc7SCole Faustfunc Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 34*4947cdc7SCole Faust in, err := newScanner(filename, src, mode&RetainComments != 0) 35*4947cdc7SCole Faust if err != nil { 36*4947cdc7SCole Faust return nil, err 37*4947cdc7SCole Faust } 38*4947cdc7SCole Faust p := parser{in: in} 39*4947cdc7SCole Faust defer p.in.recover(&err) 40*4947cdc7SCole Faust 41*4947cdc7SCole Faust p.nextToken() // read first lookahead token 42*4947cdc7SCole Faust f = p.parseFile() 43*4947cdc7SCole Faust if f != nil { 44*4947cdc7SCole Faust f.Path = filename 45*4947cdc7SCole Faust } 46*4947cdc7SCole Faust p.assignComments(f) 47*4947cdc7SCole Faust return f, nil 48*4947cdc7SCole Faust} 49*4947cdc7SCole Faust 50*4947cdc7SCole Faust// ParseCompoundStmt parses a single compound statement: 51*4947cdc7SCole Faust// a blank line, a def, for, while, or if statement, or a 52*4947cdc7SCole Faust// semicolon-separated list of simple statements followed 53*4947cdc7SCole Faust// by a newline. These are the units on which the REPL operates. 54*4947cdc7SCole Faust// ParseCompoundStmt does not consume any following input. 55*4947cdc7SCole Faust// The parser calls the readline function each 56*4947cdc7SCole Faust// time it needs a new line of input. 57*4947cdc7SCole Faustfunc ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 58*4947cdc7SCole Faust in, err := newScanner(filename, readline, false) 59*4947cdc7SCole Faust if err != nil { 60*4947cdc7SCole Faust return nil, err 61*4947cdc7SCole Faust } 62*4947cdc7SCole Faust 63*4947cdc7SCole Faust p := parser{in: in} 64*4947cdc7SCole Faust defer p.in.recover(&err) 65*4947cdc7SCole Faust 66*4947cdc7SCole Faust p.nextToken() // read first lookahead token 67*4947cdc7SCole Faust 68*4947cdc7SCole Faust var stmts []Stmt 69*4947cdc7SCole Faust switch p.tok { 70*4947cdc7SCole Faust case DEF, IF, FOR, WHILE: 71*4947cdc7SCole Faust stmts = p.parseStmt(stmts) 72*4947cdc7SCole Faust case NEWLINE: 73*4947cdc7SCole Faust // blank line 74*4947cdc7SCole Faust default: 75*4947cdc7SCole Faust stmts = p.parseSimpleStmt(stmts, false) 76*4947cdc7SCole Faust // Require but don't consume newline, to avoid blocking again. 77*4947cdc7SCole Faust if p.tok != NEWLINE { 78*4947cdc7SCole Faust p.in.errorf(p.in.pos, "invalid syntax") 79*4947cdc7SCole Faust } 80*4947cdc7SCole Faust } 81*4947cdc7SCole Faust 82*4947cdc7SCole Faust return &File{Path: filename, Stmts: stmts}, nil 83*4947cdc7SCole Faust} 84*4947cdc7SCole Faust 85*4947cdc7SCole Faust// ParseExpr parses a Starlark expression. 86*4947cdc7SCole Faust// A comma-separated list of expressions is parsed as a tuple. 87*4947cdc7SCole Faust// See Parse for explanation of parameters. 88*4947cdc7SCole Faustfunc ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 89*4947cdc7SCole Faust in, err := newScanner(filename, src, mode&RetainComments != 0) 90*4947cdc7SCole Faust if err != nil { 91*4947cdc7SCole Faust return nil, err 92*4947cdc7SCole Faust } 93*4947cdc7SCole Faust p := parser{in: in} 94*4947cdc7SCole Faust defer p.in.recover(&err) 95*4947cdc7SCole Faust 96*4947cdc7SCole Faust p.nextToken() // read first lookahead token 97*4947cdc7SCole Faust 98*4947cdc7SCole Faust // Use parseExpr, not parseTest, to permit an unparenthesized tuple. 99*4947cdc7SCole Faust expr = p.parseExpr(false) 100*4947cdc7SCole Faust 101*4947cdc7SCole Faust // A following newline (e.g. "f()\n") appears outside any brackets, 102*4947cdc7SCole Faust // on a non-blank line, and thus results in a NEWLINE token. 103*4947cdc7SCole Faust if p.tok == NEWLINE { 104*4947cdc7SCole Faust p.nextToken() 105*4947cdc7SCole Faust } 106*4947cdc7SCole Faust 107*4947cdc7SCole Faust if p.tok != EOF { 108*4947cdc7SCole Faust p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) 109*4947cdc7SCole Faust } 110*4947cdc7SCole Faust p.assignComments(expr) 111*4947cdc7SCole Faust return expr, nil 112*4947cdc7SCole Faust} 113*4947cdc7SCole Faust 114*4947cdc7SCole Fausttype parser struct { 115*4947cdc7SCole Faust in *scanner 116*4947cdc7SCole Faust tok Token 117*4947cdc7SCole Faust tokval tokenValue 118*4947cdc7SCole Faust} 119*4947cdc7SCole Faust 120*4947cdc7SCole Faust// nextToken advances the scanner and returns the position of the 121*4947cdc7SCole Faust// previous token. 122*4947cdc7SCole Faustfunc (p *parser) nextToken() Position { 123*4947cdc7SCole Faust oldpos := p.tokval.pos 124*4947cdc7SCole Faust p.tok = p.in.nextToken(&p.tokval) 125*4947cdc7SCole Faust // enable to see the token stream 126*4947cdc7SCole Faust if debug { 127*4947cdc7SCole Faust log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 128*4947cdc7SCole Faust } 129*4947cdc7SCole Faust return oldpos 130*4947cdc7SCole Faust} 131*4947cdc7SCole Faust 132*4947cdc7SCole Faust// file_input = (NEWLINE | stmt)* EOF 133*4947cdc7SCole Faustfunc (p *parser) parseFile() *File { 134*4947cdc7SCole Faust var stmts []Stmt 135*4947cdc7SCole Faust for p.tok != EOF { 136*4947cdc7SCole Faust if p.tok == NEWLINE { 137*4947cdc7SCole Faust p.nextToken() 138*4947cdc7SCole Faust continue 139*4947cdc7SCole Faust } 140*4947cdc7SCole Faust stmts = p.parseStmt(stmts) 141*4947cdc7SCole Faust } 142*4947cdc7SCole Faust return &File{Stmts: stmts} 143*4947cdc7SCole Faust} 144*4947cdc7SCole Faust 145*4947cdc7SCole Faustfunc (p *parser) parseStmt(stmts []Stmt) []Stmt { 146*4947cdc7SCole Faust if p.tok == DEF { 147*4947cdc7SCole Faust return append(stmts, p.parseDefStmt()) 148*4947cdc7SCole Faust } else if p.tok == IF { 149*4947cdc7SCole Faust return append(stmts, p.parseIfStmt()) 150*4947cdc7SCole Faust } else if p.tok == FOR { 151*4947cdc7SCole Faust return append(stmts, p.parseForStmt()) 152*4947cdc7SCole Faust } else if p.tok == WHILE { 153*4947cdc7SCole Faust return append(stmts, p.parseWhileStmt()) 154*4947cdc7SCole Faust } 155*4947cdc7SCole Faust return p.parseSimpleStmt(stmts, true) 156*4947cdc7SCole Faust} 157*4947cdc7SCole Faust 158*4947cdc7SCole Faustfunc (p *parser) parseDefStmt() Stmt { 159*4947cdc7SCole Faust defpos := p.nextToken() // consume DEF 160*4947cdc7SCole Faust id := p.parseIdent() 161*4947cdc7SCole Faust p.consume(LPAREN) 162*4947cdc7SCole Faust params := p.parseParams() 163*4947cdc7SCole Faust p.consume(RPAREN) 164*4947cdc7SCole Faust p.consume(COLON) 165*4947cdc7SCole Faust body := p.parseSuite() 166*4947cdc7SCole Faust return &DefStmt{ 167*4947cdc7SCole Faust Def: defpos, 168*4947cdc7SCole Faust Name: id, 169*4947cdc7SCole Faust Params: params, 170*4947cdc7SCole Faust Body: body, 171*4947cdc7SCole Faust } 172*4947cdc7SCole Faust} 173*4947cdc7SCole Faust 174*4947cdc7SCole Faustfunc (p *parser) parseIfStmt() Stmt { 175*4947cdc7SCole Faust ifpos := p.nextToken() // consume IF 176*4947cdc7SCole Faust cond := p.parseTest() 177*4947cdc7SCole Faust p.consume(COLON) 178*4947cdc7SCole Faust body := p.parseSuite() 179*4947cdc7SCole Faust ifStmt := &IfStmt{ 180*4947cdc7SCole Faust If: ifpos, 181*4947cdc7SCole Faust Cond: cond, 182*4947cdc7SCole Faust True: body, 183*4947cdc7SCole Faust } 184*4947cdc7SCole Faust tail := ifStmt 185*4947cdc7SCole Faust for p.tok == ELIF { 186*4947cdc7SCole Faust elifpos := p.nextToken() // consume ELIF 187*4947cdc7SCole Faust cond := p.parseTest() 188*4947cdc7SCole Faust p.consume(COLON) 189*4947cdc7SCole Faust body := p.parseSuite() 190*4947cdc7SCole Faust elif := &IfStmt{ 191*4947cdc7SCole Faust If: elifpos, 192*4947cdc7SCole Faust Cond: cond, 193*4947cdc7SCole Faust True: body, 194*4947cdc7SCole Faust } 195*4947cdc7SCole Faust tail.ElsePos = elifpos 196*4947cdc7SCole Faust tail.False = []Stmt{elif} 197*4947cdc7SCole Faust tail = elif 198*4947cdc7SCole Faust } 199*4947cdc7SCole Faust if p.tok == ELSE { 200*4947cdc7SCole Faust tail.ElsePos = p.nextToken() // consume ELSE 201*4947cdc7SCole Faust p.consume(COLON) 202*4947cdc7SCole Faust tail.False = p.parseSuite() 203*4947cdc7SCole Faust } 204*4947cdc7SCole Faust return ifStmt 205*4947cdc7SCole Faust} 206*4947cdc7SCole Faust 207*4947cdc7SCole Faustfunc (p *parser) parseForStmt() Stmt { 208*4947cdc7SCole Faust forpos := p.nextToken() // consume FOR 209*4947cdc7SCole Faust vars := p.parseForLoopVariables() 210*4947cdc7SCole Faust p.consume(IN) 211*4947cdc7SCole Faust x := p.parseExpr(false) 212*4947cdc7SCole Faust p.consume(COLON) 213*4947cdc7SCole Faust body := p.parseSuite() 214*4947cdc7SCole Faust return &ForStmt{ 215*4947cdc7SCole Faust For: forpos, 216*4947cdc7SCole Faust Vars: vars, 217*4947cdc7SCole Faust X: x, 218*4947cdc7SCole Faust Body: body, 219*4947cdc7SCole Faust } 220*4947cdc7SCole Faust} 221*4947cdc7SCole Faust 222*4947cdc7SCole Faustfunc (p *parser) parseWhileStmt() Stmt { 223*4947cdc7SCole Faust whilepos := p.nextToken() // consume WHILE 224*4947cdc7SCole Faust cond := p.parseTest() 225*4947cdc7SCole Faust p.consume(COLON) 226*4947cdc7SCole Faust body := p.parseSuite() 227*4947cdc7SCole Faust return &WhileStmt{ 228*4947cdc7SCole Faust While: whilepos, 229*4947cdc7SCole Faust Cond: cond, 230*4947cdc7SCole Faust Body: body, 231*4947cdc7SCole Faust } 232*4947cdc7SCole Faust} 233*4947cdc7SCole Faust 234*4947cdc7SCole Faust// Equivalent to 'exprlist' production in Python grammar. 235*4947cdc7SCole Faust// 236*4947cdc7SCole Faust// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 237*4947cdc7SCole Faustfunc (p *parser) parseForLoopVariables() Expr { 238*4947cdc7SCole Faust // Avoid parseExpr because it would consume the IN token 239*4947cdc7SCole Faust // following x in "for x in y: ...". 240*4947cdc7SCole Faust v := p.parsePrimaryWithSuffix() 241*4947cdc7SCole Faust if p.tok != COMMA { 242*4947cdc7SCole Faust return v 243*4947cdc7SCole Faust } 244*4947cdc7SCole Faust 245*4947cdc7SCole Faust list := []Expr{v} 246*4947cdc7SCole Faust for p.tok == COMMA { 247*4947cdc7SCole Faust p.nextToken() 248*4947cdc7SCole Faust if terminatesExprList(p.tok) { 249*4947cdc7SCole Faust break 250*4947cdc7SCole Faust } 251*4947cdc7SCole Faust list = append(list, p.parsePrimaryWithSuffix()) 252*4947cdc7SCole Faust } 253*4947cdc7SCole Faust return &TupleExpr{List: list} 254*4947cdc7SCole Faust} 255*4947cdc7SCole Faust 256*4947cdc7SCole Faust// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 257*4947cdc7SCole Faust// In REPL mode, it does not consume the NEWLINE. 258*4947cdc7SCole Faustfunc (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { 259*4947cdc7SCole Faust for { 260*4947cdc7SCole Faust stmts = append(stmts, p.parseSmallStmt()) 261*4947cdc7SCole Faust if p.tok != SEMI { 262*4947cdc7SCole Faust break 263*4947cdc7SCole Faust } 264*4947cdc7SCole Faust p.nextToken() // consume SEMI 265*4947cdc7SCole Faust if p.tok == NEWLINE || p.tok == EOF { 266*4947cdc7SCole Faust break 267*4947cdc7SCole Faust } 268*4947cdc7SCole Faust } 269*4947cdc7SCole Faust // EOF without NEWLINE occurs in `if x: pass`, for example. 270*4947cdc7SCole Faust if p.tok != EOF && consumeNL { 271*4947cdc7SCole Faust p.consume(NEWLINE) 272*4947cdc7SCole Faust } 273*4947cdc7SCole Faust 274*4947cdc7SCole Faust return stmts 275*4947cdc7SCole Faust} 276*4947cdc7SCole Faust 277*4947cdc7SCole Faust// small_stmt = RETURN expr? 278*4947cdc7SCole Faust// | PASS | BREAK | CONTINUE 279*4947cdc7SCole Faust// | LOAD ... 280*4947cdc7SCole Faust// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 281*4947cdc7SCole Faust// | expr 282*4947cdc7SCole Faustfunc (p *parser) parseSmallStmt() Stmt { 283*4947cdc7SCole Faust switch p.tok { 284*4947cdc7SCole Faust case RETURN: 285*4947cdc7SCole Faust pos := p.nextToken() // consume RETURN 286*4947cdc7SCole Faust var result Expr 287*4947cdc7SCole Faust if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 288*4947cdc7SCole Faust result = p.parseExpr(false) 289*4947cdc7SCole Faust } 290*4947cdc7SCole Faust return &ReturnStmt{Return: pos, Result: result} 291*4947cdc7SCole Faust 292*4947cdc7SCole Faust case BREAK, CONTINUE, PASS: 293*4947cdc7SCole Faust tok := p.tok 294*4947cdc7SCole Faust pos := p.nextToken() // consume it 295*4947cdc7SCole Faust return &BranchStmt{Token: tok, TokenPos: pos} 296*4947cdc7SCole Faust 297*4947cdc7SCole Faust case LOAD: 298*4947cdc7SCole Faust return p.parseLoadStmt() 299*4947cdc7SCole Faust } 300*4947cdc7SCole Faust 301*4947cdc7SCole Faust // Assignment 302*4947cdc7SCole Faust x := p.parseExpr(false) 303*4947cdc7SCole Faust switch p.tok { 304*4947cdc7SCole Faust case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 305*4947cdc7SCole Faust op := p.tok 306*4947cdc7SCole Faust pos := p.nextToken() // consume op 307*4947cdc7SCole Faust rhs := p.parseExpr(false) 308*4947cdc7SCole Faust return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 309*4947cdc7SCole Faust } 310*4947cdc7SCole Faust 311*4947cdc7SCole Faust // Expression statement (e.g. function call, doc string). 312*4947cdc7SCole Faust return &ExprStmt{X: x} 313*4947cdc7SCole Faust} 314*4947cdc7SCole Faust 315*4947cdc7SCole Faust// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 316*4947cdc7SCole Faustfunc (p *parser) parseLoadStmt() *LoadStmt { 317*4947cdc7SCole Faust loadPos := p.nextToken() // consume LOAD 318*4947cdc7SCole Faust lparen := p.consume(LPAREN) 319*4947cdc7SCole Faust 320*4947cdc7SCole Faust if p.tok != STRING { 321*4947cdc7SCole Faust p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") 322*4947cdc7SCole Faust } 323*4947cdc7SCole Faust module := p.parsePrimary().(*Literal) 324*4947cdc7SCole Faust 325*4947cdc7SCole Faust var from, to []*Ident 326*4947cdc7SCole Faust for p.tok != RPAREN && p.tok != EOF { 327*4947cdc7SCole Faust p.consume(COMMA) 328*4947cdc7SCole Faust if p.tok == RPAREN { 329*4947cdc7SCole Faust break // allow trailing comma 330*4947cdc7SCole Faust } 331*4947cdc7SCole Faust switch p.tok { 332*4947cdc7SCole Faust case STRING: 333*4947cdc7SCole Faust // load("module", "id") 334*4947cdc7SCole Faust // To name is same as original. 335*4947cdc7SCole Faust lit := p.parsePrimary().(*Literal) 336*4947cdc7SCole Faust id := &Ident{ 337*4947cdc7SCole Faust NamePos: lit.TokenPos.add(`"`), 338*4947cdc7SCole Faust Name: lit.Value.(string), 339*4947cdc7SCole Faust } 340*4947cdc7SCole Faust to = append(to, id) 341*4947cdc7SCole Faust from = append(from, id) 342*4947cdc7SCole Faust 343*4947cdc7SCole Faust case IDENT: 344*4947cdc7SCole Faust // load("module", to="from") 345*4947cdc7SCole Faust id := p.parseIdent() 346*4947cdc7SCole Faust to = append(to, id) 347*4947cdc7SCole Faust if p.tok != EQ { 348*4947cdc7SCole Faust p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 349*4947cdc7SCole Faust } 350*4947cdc7SCole Faust p.consume(EQ) 351*4947cdc7SCole Faust if p.tok != STRING { 352*4947cdc7SCole Faust p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 353*4947cdc7SCole Faust } 354*4947cdc7SCole Faust lit := p.parsePrimary().(*Literal) 355*4947cdc7SCole Faust from = append(from, &Ident{ 356*4947cdc7SCole Faust NamePos: lit.TokenPos.add(`"`), 357*4947cdc7SCole Faust Name: lit.Value.(string), 358*4947cdc7SCole Faust }) 359*4947cdc7SCole Faust 360*4947cdc7SCole Faust case RPAREN: 361*4947cdc7SCole Faust p.in.errorf(p.in.pos, "trailing comma in load statement") 362*4947cdc7SCole Faust 363*4947cdc7SCole Faust default: 364*4947cdc7SCole Faust p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) 365*4947cdc7SCole Faust } 366*4947cdc7SCole Faust } 367*4947cdc7SCole Faust rparen := p.consume(RPAREN) 368*4947cdc7SCole Faust 369*4947cdc7SCole Faust if len(to) == 0 { 370*4947cdc7SCole Faust p.in.errorf(lparen, "load statement must import at least 1 symbol") 371*4947cdc7SCole Faust } 372*4947cdc7SCole Faust return &LoadStmt{ 373*4947cdc7SCole Faust Load: loadPos, 374*4947cdc7SCole Faust Module: module, 375*4947cdc7SCole Faust To: to, 376*4947cdc7SCole Faust From: from, 377*4947cdc7SCole Faust Rparen: rparen, 378*4947cdc7SCole Faust } 379*4947cdc7SCole Faust} 380*4947cdc7SCole Faust 381*4947cdc7SCole Faust// suite is typically what follows a COLON (e.g. after DEF or FOR). 382*4947cdc7SCole Faust// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 383*4947cdc7SCole Faustfunc (p *parser) parseSuite() []Stmt { 384*4947cdc7SCole Faust if p.tok == NEWLINE { 385*4947cdc7SCole Faust p.nextToken() // consume NEWLINE 386*4947cdc7SCole Faust p.consume(INDENT) 387*4947cdc7SCole Faust var stmts []Stmt 388*4947cdc7SCole Faust for p.tok != OUTDENT && p.tok != EOF { 389*4947cdc7SCole Faust stmts = p.parseStmt(stmts) 390*4947cdc7SCole Faust } 391*4947cdc7SCole Faust p.consume(OUTDENT) 392*4947cdc7SCole Faust return stmts 393*4947cdc7SCole Faust } 394*4947cdc7SCole Faust 395*4947cdc7SCole Faust return p.parseSimpleStmt(nil, true) 396*4947cdc7SCole Faust} 397*4947cdc7SCole Faust 398*4947cdc7SCole Faustfunc (p *parser) parseIdent() *Ident { 399*4947cdc7SCole Faust if p.tok != IDENT { 400*4947cdc7SCole Faust p.in.error(p.in.pos, "not an identifier") 401*4947cdc7SCole Faust } 402*4947cdc7SCole Faust id := &Ident{ 403*4947cdc7SCole Faust NamePos: p.tokval.pos, 404*4947cdc7SCole Faust Name: p.tokval.raw, 405*4947cdc7SCole Faust } 406*4947cdc7SCole Faust p.nextToken() 407*4947cdc7SCole Faust return id 408*4947cdc7SCole Faust} 409*4947cdc7SCole Faust 410*4947cdc7SCole Faustfunc (p *parser) consume(t Token) Position { 411*4947cdc7SCole Faust if p.tok != t { 412*4947cdc7SCole Faust p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) 413*4947cdc7SCole Faust } 414*4947cdc7SCole Faust return p.nextToken() 415*4947cdc7SCole Faust} 416*4947cdc7SCole Faust 417*4947cdc7SCole Faust// params = (param COMMA)* param COMMA? 418*4947cdc7SCole Faust// | 419*4947cdc7SCole Faust// 420*4947cdc7SCole Faust// param = IDENT 421*4947cdc7SCole Faust// | IDENT EQ test 422*4947cdc7SCole Faust// | STAR 423*4947cdc7SCole Faust// | STAR IDENT 424*4947cdc7SCole Faust// | STARSTAR IDENT 425*4947cdc7SCole Faust// 426*4947cdc7SCole Faust// parseParams parses a parameter list. The resulting expressions are of the form: 427*4947cdc7SCole Faust// 428*4947cdc7SCole Faust// *Ident x 429*4947cdc7SCole Faust// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y 430*4947cdc7SCole Faust// *Unary{Op: STAR} * 431*4947cdc7SCole Faust// *Unary{Op: STAR, X: *Ident} *args 432*4947cdc7SCole Faust// *Unary{Op: STARSTAR, X: *Ident} **kwargs 433*4947cdc7SCole Faustfunc (p *parser) parseParams() []Expr { 434*4947cdc7SCole Faust var params []Expr 435*4947cdc7SCole Faust for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 436*4947cdc7SCole Faust if len(params) > 0 { 437*4947cdc7SCole Faust p.consume(COMMA) 438*4947cdc7SCole Faust } 439*4947cdc7SCole Faust if p.tok == RPAREN { 440*4947cdc7SCole Faust break 441*4947cdc7SCole Faust } 442*4947cdc7SCole Faust 443*4947cdc7SCole Faust // * or *args or **kwargs 444*4947cdc7SCole Faust if p.tok == STAR || p.tok == STARSTAR { 445*4947cdc7SCole Faust op := p.tok 446*4947cdc7SCole Faust pos := p.nextToken() 447*4947cdc7SCole Faust var x Expr 448*4947cdc7SCole Faust if op == STARSTAR || p.tok == IDENT { 449*4947cdc7SCole Faust x = p.parseIdent() 450*4947cdc7SCole Faust } 451*4947cdc7SCole Faust params = append(params, &UnaryExpr{ 452*4947cdc7SCole Faust OpPos: pos, 453*4947cdc7SCole Faust Op: op, 454*4947cdc7SCole Faust X: x, 455*4947cdc7SCole Faust }) 456*4947cdc7SCole Faust continue 457*4947cdc7SCole Faust } 458*4947cdc7SCole Faust 459*4947cdc7SCole Faust // IDENT 460*4947cdc7SCole Faust // IDENT = test 461*4947cdc7SCole Faust id := p.parseIdent() 462*4947cdc7SCole Faust if p.tok == EQ { // default value 463*4947cdc7SCole Faust eq := p.nextToken() 464*4947cdc7SCole Faust dflt := p.parseTest() 465*4947cdc7SCole Faust params = append(params, &BinaryExpr{ 466*4947cdc7SCole Faust X: id, 467*4947cdc7SCole Faust OpPos: eq, 468*4947cdc7SCole Faust Op: EQ, 469*4947cdc7SCole Faust Y: dflt, 470*4947cdc7SCole Faust }) 471*4947cdc7SCole Faust continue 472*4947cdc7SCole Faust } 473*4947cdc7SCole Faust 474*4947cdc7SCole Faust params = append(params, id) 475*4947cdc7SCole Faust } 476*4947cdc7SCole Faust return params 477*4947cdc7SCole Faust} 478*4947cdc7SCole Faust 479*4947cdc7SCole Faust// parseExpr parses an expression, possible consisting of a 480*4947cdc7SCole Faust// comma-separated list of 'test' expressions. 481*4947cdc7SCole Faust// 482*4947cdc7SCole Faust// In many cases we must use parseTest to avoid ambiguity such as 483*4947cdc7SCole Faust// f(x, y) vs. f((x, y)). 484*4947cdc7SCole Faustfunc (p *parser) parseExpr(inParens bool) Expr { 485*4947cdc7SCole Faust x := p.parseTest() 486*4947cdc7SCole Faust if p.tok != COMMA { 487*4947cdc7SCole Faust return x 488*4947cdc7SCole Faust } 489*4947cdc7SCole Faust 490*4947cdc7SCole Faust // tuple 491*4947cdc7SCole Faust exprs := p.parseExprs([]Expr{x}, inParens) 492*4947cdc7SCole Faust return &TupleExpr{List: exprs} 493*4947cdc7SCole Faust} 494*4947cdc7SCole Faust 495*4947cdc7SCole Faust// parseExprs parses a comma-separated list of expressions, starting with the comma. 496*4947cdc7SCole Faust// It is used to parse tuples and list elements. 497*4947cdc7SCole Faust// expr_list = (',' expr)* ','? 498*4947cdc7SCole Faustfunc (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { 499*4947cdc7SCole Faust for p.tok == COMMA { 500*4947cdc7SCole Faust pos := p.nextToken() 501*4947cdc7SCole Faust if terminatesExprList(p.tok) { 502*4947cdc7SCole Faust if !allowTrailingComma { 503*4947cdc7SCole Faust p.in.error(pos, "unparenthesized tuple with trailing comma") 504*4947cdc7SCole Faust } 505*4947cdc7SCole Faust break 506*4947cdc7SCole Faust } 507*4947cdc7SCole Faust exprs = append(exprs, p.parseTest()) 508*4947cdc7SCole Faust } 509*4947cdc7SCole Faust return exprs 510*4947cdc7SCole Faust} 511*4947cdc7SCole Faust 512*4947cdc7SCole Faust// parseTest parses a 'test', a single-component expression. 513*4947cdc7SCole Faustfunc (p *parser) parseTest() Expr { 514*4947cdc7SCole Faust if p.tok == LAMBDA { 515*4947cdc7SCole Faust return p.parseLambda(true) 516*4947cdc7SCole Faust } 517*4947cdc7SCole Faust 518*4947cdc7SCole Faust x := p.parseTestPrec(0) 519*4947cdc7SCole Faust 520*4947cdc7SCole Faust // conditional expression (t IF cond ELSE f) 521*4947cdc7SCole Faust if p.tok == IF { 522*4947cdc7SCole Faust ifpos := p.nextToken() 523*4947cdc7SCole Faust cond := p.parseTestPrec(0) 524*4947cdc7SCole Faust if p.tok != ELSE { 525*4947cdc7SCole Faust p.in.error(ifpos, "conditional expression without else clause") 526*4947cdc7SCole Faust } 527*4947cdc7SCole Faust elsepos := p.nextToken() 528*4947cdc7SCole Faust else_ := p.parseTest() 529*4947cdc7SCole Faust return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 530*4947cdc7SCole Faust } 531*4947cdc7SCole Faust 532*4947cdc7SCole Faust return x 533*4947cdc7SCole Faust} 534*4947cdc7SCole Faust 535*4947cdc7SCole Faust// parseTestNoCond parses a a single-component expression without 536*4947cdc7SCole Faust// consuming a trailing 'if expr else expr'. 537*4947cdc7SCole Faustfunc (p *parser) parseTestNoCond() Expr { 538*4947cdc7SCole Faust if p.tok == LAMBDA { 539*4947cdc7SCole Faust return p.parseLambda(false) 540*4947cdc7SCole Faust } 541*4947cdc7SCole Faust return p.parseTestPrec(0) 542*4947cdc7SCole Faust} 543*4947cdc7SCole Faust 544*4947cdc7SCole Faust// parseLambda parses a lambda expression. 545*4947cdc7SCole Faust// The allowCond flag allows the body to be an 'a if b else c' conditional. 546*4947cdc7SCole Faustfunc (p *parser) parseLambda(allowCond bool) Expr { 547*4947cdc7SCole Faust lambda := p.nextToken() 548*4947cdc7SCole Faust var params []Expr 549*4947cdc7SCole Faust if p.tok != COLON { 550*4947cdc7SCole Faust params = p.parseParams() 551*4947cdc7SCole Faust } 552*4947cdc7SCole Faust p.consume(COLON) 553*4947cdc7SCole Faust 554*4947cdc7SCole Faust var body Expr 555*4947cdc7SCole Faust if allowCond { 556*4947cdc7SCole Faust body = p.parseTest() 557*4947cdc7SCole Faust } else { 558*4947cdc7SCole Faust body = p.parseTestNoCond() 559*4947cdc7SCole Faust } 560*4947cdc7SCole Faust 561*4947cdc7SCole Faust return &LambdaExpr{ 562*4947cdc7SCole Faust Lambda: lambda, 563*4947cdc7SCole Faust Params: params, 564*4947cdc7SCole Faust Body: body, 565*4947cdc7SCole Faust } 566*4947cdc7SCole Faust} 567*4947cdc7SCole Faust 568*4947cdc7SCole Faustfunc (p *parser) parseTestPrec(prec int) Expr { 569*4947cdc7SCole Faust if prec >= len(preclevels) { 570*4947cdc7SCole Faust return p.parsePrimaryWithSuffix() 571*4947cdc7SCole Faust } 572*4947cdc7SCole Faust 573*4947cdc7SCole Faust // expr = NOT expr 574*4947cdc7SCole Faust if p.tok == NOT && prec == int(precedence[NOT]) { 575*4947cdc7SCole Faust pos := p.nextToken() 576*4947cdc7SCole Faust x := p.parseTestPrec(prec) 577*4947cdc7SCole Faust return &UnaryExpr{ 578*4947cdc7SCole Faust OpPos: pos, 579*4947cdc7SCole Faust Op: NOT, 580*4947cdc7SCole Faust X: x, 581*4947cdc7SCole Faust } 582*4947cdc7SCole Faust } 583*4947cdc7SCole Faust 584*4947cdc7SCole Faust return p.parseBinopExpr(prec) 585*4947cdc7SCole Faust} 586*4947cdc7SCole Faust 587*4947cdc7SCole Faust// expr = test (OP test)* 588*4947cdc7SCole Faust// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 589*4947cdc7SCole Faustfunc (p *parser) parseBinopExpr(prec int) Expr { 590*4947cdc7SCole Faust x := p.parseTestPrec(prec + 1) 591*4947cdc7SCole Faust for first := true; ; first = false { 592*4947cdc7SCole Faust if p.tok == NOT { 593*4947cdc7SCole Faust p.nextToken() // consume NOT 594*4947cdc7SCole Faust // In this context, NOT must be followed by IN. 595*4947cdc7SCole Faust // Replace NOT IN by a single NOT_IN token. 596*4947cdc7SCole Faust if p.tok != IN { 597*4947cdc7SCole Faust p.in.errorf(p.in.pos, "got %#v, want in", p.tok) 598*4947cdc7SCole Faust } 599*4947cdc7SCole Faust p.tok = NOT_IN 600*4947cdc7SCole Faust } 601*4947cdc7SCole Faust 602*4947cdc7SCole Faust // Binary operator of specified precedence? 603*4947cdc7SCole Faust opprec := int(precedence[p.tok]) 604*4947cdc7SCole Faust if opprec < prec { 605*4947cdc7SCole Faust return x 606*4947cdc7SCole Faust } 607*4947cdc7SCole Faust 608*4947cdc7SCole Faust // Comparisons are non-associative. 609*4947cdc7SCole Faust if !first && opprec == int(precedence[EQL]) { 610*4947cdc7SCole Faust p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", 611*4947cdc7SCole Faust x.(*BinaryExpr).Op, p.tok) 612*4947cdc7SCole Faust } 613*4947cdc7SCole Faust 614*4947cdc7SCole Faust op := p.tok 615*4947cdc7SCole Faust pos := p.nextToken() 616*4947cdc7SCole Faust y := p.parseTestPrec(opprec + 1) 617*4947cdc7SCole Faust x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 618*4947cdc7SCole Faust } 619*4947cdc7SCole Faust} 620*4947cdc7SCole Faust 621*4947cdc7SCole Faust// precedence maps each operator to its precedence (0-7), or -1 for other tokens. 622*4947cdc7SCole Faustvar precedence [maxToken]int8 623*4947cdc7SCole Faust 624*4947cdc7SCole Faust// preclevels groups operators of equal precedence. 625*4947cdc7SCole Faust// Comparisons are nonassociative; other binary operators associate to the left. 626*4947cdc7SCole Faust// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 627*4947cdc7SCole Faust// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators 628*4947cdc7SCole Faustvar preclevels = [...][]Token{ 629*4947cdc7SCole Faust {OR}, // or 630*4947cdc7SCole Faust {AND}, // and 631*4947cdc7SCole Faust {NOT}, // not (unary) 632*4947cdc7SCole Faust {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 633*4947cdc7SCole Faust {PIPE}, // | 634*4947cdc7SCole Faust {CIRCUMFLEX}, // ^ 635*4947cdc7SCole Faust {AMP}, // & 636*4947cdc7SCole Faust {LTLT, GTGT}, // << >> 637*4947cdc7SCole Faust {MINUS, PLUS}, // - 638*4947cdc7SCole Faust {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 639*4947cdc7SCole Faust} 640*4947cdc7SCole Faust 641*4947cdc7SCole Faustfunc init() { 642*4947cdc7SCole Faust // populate precedence table 643*4947cdc7SCole Faust for i := range precedence { 644*4947cdc7SCole Faust precedence[i] = -1 645*4947cdc7SCole Faust } 646*4947cdc7SCole Faust for level, tokens := range preclevels { 647*4947cdc7SCole Faust for _, tok := range tokens { 648*4947cdc7SCole Faust precedence[tok] = int8(level) 649*4947cdc7SCole Faust } 650*4947cdc7SCole Faust } 651*4947cdc7SCole Faust} 652*4947cdc7SCole Faust 653*4947cdc7SCole Faust// primary_with_suffix = primary 654*4947cdc7SCole Faust// | primary '.' IDENT 655*4947cdc7SCole Faust// | primary slice_suffix 656*4947cdc7SCole Faust// | primary call_suffix 657*4947cdc7SCole Faustfunc (p *parser) parsePrimaryWithSuffix() Expr { 658*4947cdc7SCole Faust x := p.parsePrimary() 659*4947cdc7SCole Faust for { 660*4947cdc7SCole Faust switch p.tok { 661*4947cdc7SCole Faust case DOT: 662*4947cdc7SCole Faust dot := p.nextToken() 663*4947cdc7SCole Faust id := p.parseIdent() 664*4947cdc7SCole Faust x = &DotExpr{Dot: dot, X: x, Name: id} 665*4947cdc7SCole Faust case LBRACK: 666*4947cdc7SCole Faust x = p.parseSliceSuffix(x) 667*4947cdc7SCole Faust case LPAREN: 668*4947cdc7SCole Faust x = p.parseCallSuffix(x) 669*4947cdc7SCole Faust default: 670*4947cdc7SCole Faust return x 671*4947cdc7SCole Faust } 672*4947cdc7SCole Faust } 673*4947cdc7SCole Faust} 674*4947cdc7SCole Faust 675*4947cdc7SCole Faust// slice_suffix = '[' expr? ':' expr? ':' expr? ']' 676*4947cdc7SCole Faustfunc (p *parser) parseSliceSuffix(x Expr) Expr { 677*4947cdc7SCole Faust lbrack := p.nextToken() 678*4947cdc7SCole Faust var lo, hi, step Expr 679*4947cdc7SCole Faust if p.tok != COLON { 680*4947cdc7SCole Faust y := p.parseExpr(false) 681*4947cdc7SCole Faust 682*4947cdc7SCole Faust // index x[y] 683*4947cdc7SCole Faust if p.tok == RBRACK { 684*4947cdc7SCole Faust rbrack := p.nextToken() 685*4947cdc7SCole Faust return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 686*4947cdc7SCole Faust } 687*4947cdc7SCole Faust 688*4947cdc7SCole Faust lo = y 689*4947cdc7SCole Faust } 690*4947cdc7SCole Faust 691*4947cdc7SCole Faust // slice or substring x[lo:hi:step] 692*4947cdc7SCole Faust if p.tok == COLON { 693*4947cdc7SCole Faust p.nextToken() 694*4947cdc7SCole Faust if p.tok != COLON && p.tok != RBRACK { 695*4947cdc7SCole Faust hi = p.parseTest() 696*4947cdc7SCole Faust } 697*4947cdc7SCole Faust } 698*4947cdc7SCole Faust if p.tok == COLON { 699*4947cdc7SCole Faust p.nextToken() 700*4947cdc7SCole Faust if p.tok != RBRACK { 701*4947cdc7SCole Faust step = p.parseTest() 702*4947cdc7SCole Faust } 703*4947cdc7SCole Faust } 704*4947cdc7SCole Faust rbrack := p.consume(RBRACK) 705*4947cdc7SCole Faust return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 706*4947cdc7SCole Faust} 707*4947cdc7SCole Faust 708*4947cdc7SCole Faust// call_suffix = '(' arg_list? ')' 709*4947cdc7SCole Faustfunc (p *parser) parseCallSuffix(fn Expr) Expr { 710*4947cdc7SCole Faust lparen := p.consume(LPAREN) 711*4947cdc7SCole Faust var rparen Position 712*4947cdc7SCole Faust var args []Expr 713*4947cdc7SCole Faust if p.tok == RPAREN { 714*4947cdc7SCole Faust rparen = p.nextToken() 715*4947cdc7SCole Faust } else { 716*4947cdc7SCole Faust args = p.parseArgs() 717*4947cdc7SCole Faust rparen = p.consume(RPAREN) 718*4947cdc7SCole Faust } 719*4947cdc7SCole Faust return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 720*4947cdc7SCole Faust} 721*4947cdc7SCole Faust 722*4947cdc7SCole Faust// parseArgs parses a list of actual parameter values (arguments). 723*4947cdc7SCole Faust// It mirrors the structure of parseParams. 724*4947cdc7SCole Faust// arg_list = ((arg COMMA)* arg COMMA?)? 725*4947cdc7SCole Faustfunc (p *parser) parseArgs() []Expr { 726*4947cdc7SCole Faust var args []Expr 727*4947cdc7SCole Faust for p.tok != RPAREN && p.tok != EOF { 728*4947cdc7SCole Faust if len(args) > 0 { 729*4947cdc7SCole Faust p.consume(COMMA) 730*4947cdc7SCole Faust } 731*4947cdc7SCole Faust if p.tok == RPAREN { 732*4947cdc7SCole Faust break 733*4947cdc7SCole Faust } 734*4947cdc7SCole Faust 735*4947cdc7SCole Faust // *args or **kwargs 736*4947cdc7SCole Faust if p.tok == STAR || p.tok == STARSTAR { 737*4947cdc7SCole Faust op := p.tok 738*4947cdc7SCole Faust pos := p.nextToken() 739*4947cdc7SCole Faust x := p.parseTest() 740*4947cdc7SCole Faust args = append(args, &UnaryExpr{ 741*4947cdc7SCole Faust OpPos: pos, 742*4947cdc7SCole Faust Op: op, 743*4947cdc7SCole Faust X: x, 744*4947cdc7SCole Faust }) 745*4947cdc7SCole Faust continue 746*4947cdc7SCole Faust } 747*4947cdc7SCole Faust 748*4947cdc7SCole Faust // We use a different strategy from Bazel here to stay within LL(1). 749*4947cdc7SCole Faust // Instead of looking ahead two tokens (IDENT, EQ) we parse 750*4947cdc7SCole Faust // 'test = test' then check that the first was an IDENT. 751*4947cdc7SCole Faust x := p.parseTest() 752*4947cdc7SCole Faust 753*4947cdc7SCole Faust if p.tok == EQ { 754*4947cdc7SCole Faust // name = value 755*4947cdc7SCole Faust if _, ok := x.(*Ident); !ok { 756*4947cdc7SCole Faust p.in.errorf(p.in.pos, "keyword argument must have form name=expr") 757*4947cdc7SCole Faust } 758*4947cdc7SCole Faust eq := p.nextToken() 759*4947cdc7SCole Faust y := p.parseTest() 760*4947cdc7SCole Faust x = &BinaryExpr{ 761*4947cdc7SCole Faust X: x, 762*4947cdc7SCole Faust OpPos: eq, 763*4947cdc7SCole Faust Op: EQ, 764*4947cdc7SCole Faust Y: y, 765*4947cdc7SCole Faust } 766*4947cdc7SCole Faust } 767*4947cdc7SCole Faust 768*4947cdc7SCole Faust args = append(args, x) 769*4947cdc7SCole Faust } 770*4947cdc7SCole Faust return args 771*4947cdc7SCole Faust} 772*4947cdc7SCole Faust 773*4947cdc7SCole Faust// primary = IDENT 774*4947cdc7SCole Faust// | INT | FLOAT | STRING | BYTES 775*4947cdc7SCole Faust// | '[' ... // list literal or comprehension 776*4947cdc7SCole Faust// | '{' ... // dict literal or comprehension 777*4947cdc7SCole Faust// | '(' ... // tuple or parenthesized expression 778*4947cdc7SCole Faust// | ('-'|'+'|'~') primary_with_suffix 779*4947cdc7SCole Faustfunc (p *parser) parsePrimary() Expr { 780*4947cdc7SCole Faust switch p.tok { 781*4947cdc7SCole Faust case IDENT: 782*4947cdc7SCole Faust return p.parseIdent() 783*4947cdc7SCole Faust 784*4947cdc7SCole Faust case INT, FLOAT, STRING, BYTES: 785*4947cdc7SCole Faust var val interface{} 786*4947cdc7SCole Faust tok := p.tok 787*4947cdc7SCole Faust switch tok { 788*4947cdc7SCole Faust case INT: 789*4947cdc7SCole Faust if p.tokval.bigInt != nil { 790*4947cdc7SCole Faust val = p.tokval.bigInt 791*4947cdc7SCole Faust } else { 792*4947cdc7SCole Faust val = p.tokval.int 793*4947cdc7SCole Faust } 794*4947cdc7SCole Faust case FLOAT: 795*4947cdc7SCole Faust val = p.tokval.float 796*4947cdc7SCole Faust case STRING, BYTES: 797*4947cdc7SCole Faust val = p.tokval.string 798*4947cdc7SCole Faust } 799*4947cdc7SCole Faust raw := p.tokval.raw 800*4947cdc7SCole Faust pos := p.nextToken() 801*4947cdc7SCole Faust return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 802*4947cdc7SCole Faust 803*4947cdc7SCole Faust case LBRACK: 804*4947cdc7SCole Faust return p.parseList() 805*4947cdc7SCole Faust 806*4947cdc7SCole Faust case LBRACE: 807*4947cdc7SCole Faust return p.parseDict() 808*4947cdc7SCole Faust 809*4947cdc7SCole Faust case LPAREN: 810*4947cdc7SCole Faust lparen := p.nextToken() 811*4947cdc7SCole Faust if p.tok == RPAREN { 812*4947cdc7SCole Faust // empty tuple 813*4947cdc7SCole Faust rparen := p.nextToken() 814*4947cdc7SCole Faust return &TupleExpr{Lparen: lparen, Rparen: rparen} 815*4947cdc7SCole Faust } 816*4947cdc7SCole Faust e := p.parseExpr(true) // allow trailing comma 817*4947cdc7SCole Faust rparen := p.consume(RPAREN) 818*4947cdc7SCole Faust return &ParenExpr{ 819*4947cdc7SCole Faust Lparen: lparen, 820*4947cdc7SCole Faust X: e, 821*4947cdc7SCole Faust Rparen: rparen, 822*4947cdc7SCole Faust } 823*4947cdc7SCole Faust 824*4947cdc7SCole Faust case MINUS, PLUS, TILDE: // unary 825*4947cdc7SCole Faust tok := p.tok 826*4947cdc7SCole Faust pos := p.nextToken() 827*4947cdc7SCole Faust x := p.parsePrimaryWithSuffix() 828*4947cdc7SCole Faust return &UnaryExpr{ 829*4947cdc7SCole Faust OpPos: pos, 830*4947cdc7SCole Faust Op: tok, 831*4947cdc7SCole Faust X: x, 832*4947cdc7SCole Faust } 833*4947cdc7SCole Faust } 834*4947cdc7SCole Faust p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 835*4947cdc7SCole Faust panic("unreachable") 836*4947cdc7SCole Faust} 837*4947cdc7SCole Faust 838*4947cdc7SCole Faust// list = '[' ']' 839*4947cdc7SCole Faust// | '[' expr ']' 840*4947cdc7SCole Faust// | '[' expr expr_list ']' 841*4947cdc7SCole Faust// | '[' expr (FOR loop_variables IN expr)+ ']' 842*4947cdc7SCole Faustfunc (p *parser) parseList() Expr { 843*4947cdc7SCole Faust lbrack := p.nextToken() 844*4947cdc7SCole Faust if p.tok == RBRACK { 845*4947cdc7SCole Faust // empty List 846*4947cdc7SCole Faust rbrack := p.nextToken() 847*4947cdc7SCole Faust return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 848*4947cdc7SCole Faust } 849*4947cdc7SCole Faust 850*4947cdc7SCole Faust x := p.parseTest() 851*4947cdc7SCole Faust 852*4947cdc7SCole Faust if p.tok == FOR { 853*4947cdc7SCole Faust // list comprehension 854*4947cdc7SCole Faust return p.parseComprehensionSuffix(lbrack, x, RBRACK) 855*4947cdc7SCole Faust } 856*4947cdc7SCole Faust 857*4947cdc7SCole Faust exprs := []Expr{x} 858*4947cdc7SCole Faust if p.tok == COMMA { 859*4947cdc7SCole Faust // multi-item list literal 860*4947cdc7SCole Faust exprs = p.parseExprs(exprs, true) // allow trailing comma 861*4947cdc7SCole Faust } 862*4947cdc7SCole Faust 863*4947cdc7SCole Faust rbrack := p.consume(RBRACK) 864*4947cdc7SCole Faust return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 865*4947cdc7SCole Faust} 866*4947cdc7SCole Faust 867*4947cdc7SCole Faust// dict = '{' '}' 868*4947cdc7SCole Faust// | '{' dict_entry_list '}' 869*4947cdc7SCole Faust// | '{' dict_entry FOR loop_variables IN expr '}' 870*4947cdc7SCole Faustfunc (p *parser) parseDict() Expr { 871*4947cdc7SCole Faust lbrace := p.nextToken() 872*4947cdc7SCole Faust if p.tok == RBRACE { 873*4947cdc7SCole Faust // empty dict 874*4947cdc7SCole Faust rbrace := p.nextToken() 875*4947cdc7SCole Faust return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 876*4947cdc7SCole Faust } 877*4947cdc7SCole Faust 878*4947cdc7SCole Faust x := p.parseDictEntry() 879*4947cdc7SCole Faust 880*4947cdc7SCole Faust if p.tok == FOR { 881*4947cdc7SCole Faust // dict comprehension 882*4947cdc7SCole Faust return p.parseComprehensionSuffix(lbrace, x, RBRACE) 883*4947cdc7SCole Faust } 884*4947cdc7SCole Faust 885*4947cdc7SCole Faust entries := []Expr{x} 886*4947cdc7SCole Faust for p.tok == COMMA { 887*4947cdc7SCole Faust p.nextToken() 888*4947cdc7SCole Faust if p.tok == RBRACE { 889*4947cdc7SCole Faust break 890*4947cdc7SCole Faust } 891*4947cdc7SCole Faust entries = append(entries, p.parseDictEntry()) 892*4947cdc7SCole Faust } 893*4947cdc7SCole Faust 894*4947cdc7SCole Faust rbrace := p.consume(RBRACE) 895*4947cdc7SCole Faust return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 896*4947cdc7SCole Faust} 897*4947cdc7SCole Faust 898*4947cdc7SCole Faust// dict_entry = test ':' test 899*4947cdc7SCole Faustfunc (p *parser) parseDictEntry() *DictEntry { 900*4947cdc7SCole Faust k := p.parseTest() 901*4947cdc7SCole Faust colon := p.consume(COLON) 902*4947cdc7SCole Faust v := p.parseTest() 903*4947cdc7SCole Faust return &DictEntry{Key: k, Colon: colon, Value: v} 904*4947cdc7SCole Faust} 905*4947cdc7SCole Faust 906*4947cdc7SCole Faust// comp_suffix = FOR loopvars IN expr comp_suffix 907*4947cdc7SCole Faust// | IF expr comp_suffix 908*4947cdc7SCole Faust// | ']' or ')' (end) 909*4947cdc7SCole Faust// 910*4947cdc7SCole Faust// There can be multiple FOR/IF clauses; the first is always a FOR. 911*4947cdc7SCole Faustfunc (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 912*4947cdc7SCole Faust var clauses []Node 913*4947cdc7SCole Faust for p.tok != endBrace { 914*4947cdc7SCole Faust if p.tok == FOR { 915*4947cdc7SCole Faust pos := p.nextToken() 916*4947cdc7SCole Faust vars := p.parseForLoopVariables() 917*4947cdc7SCole Faust in := p.consume(IN) 918*4947cdc7SCole Faust // Following Python 3, the operand of IN cannot be: 919*4947cdc7SCole Faust // - a conditional expression ('x if y else z'), 920*4947cdc7SCole Faust // due to conflicts in Python grammar 921*4947cdc7SCole Faust // ('if' is used by the comprehension); 922*4947cdc7SCole Faust // - a lambda expression 923*4947cdc7SCole Faust // - an unparenthesized tuple. 924*4947cdc7SCole Faust x := p.parseTestPrec(0) 925*4947cdc7SCole Faust clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 926*4947cdc7SCole Faust } else if p.tok == IF { 927*4947cdc7SCole Faust pos := p.nextToken() 928*4947cdc7SCole Faust cond := p.parseTestNoCond() 929*4947cdc7SCole Faust clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 930*4947cdc7SCole Faust } else { 931*4947cdc7SCole Faust p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) 932*4947cdc7SCole Faust } 933*4947cdc7SCole Faust } 934*4947cdc7SCole Faust rbrace := p.nextToken() 935*4947cdc7SCole Faust 936*4947cdc7SCole Faust return &Comprehension{ 937*4947cdc7SCole Faust Curly: endBrace == RBRACE, 938*4947cdc7SCole Faust Lbrack: lbrace, 939*4947cdc7SCole Faust Body: body, 940*4947cdc7SCole Faust Clauses: clauses, 941*4947cdc7SCole Faust Rbrack: rbrace, 942*4947cdc7SCole Faust } 943*4947cdc7SCole Faust} 944*4947cdc7SCole Faust 945*4947cdc7SCole Faustfunc terminatesExprList(tok Token) bool { 946*4947cdc7SCole Faust switch tok { 947*4947cdc7SCole Faust case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 948*4947cdc7SCole Faust return true 949*4947cdc7SCole Faust } 950*4947cdc7SCole Faust return false 951*4947cdc7SCole Faust} 952*4947cdc7SCole Faust 953*4947cdc7SCole Faust// Comment assignment. 954*4947cdc7SCole Faust// We build two lists of all subnodes, preorder and postorder. 955*4947cdc7SCole Faust// The preorder list is ordered by start location, with outer nodes first. 956*4947cdc7SCole Faust// The postorder list is ordered by end location, with outer nodes last. 957*4947cdc7SCole Faust// We use the preorder list to assign each whole-line comment to the syntax 958*4947cdc7SCole Faust// immediately following it, and we use the postorder list to assign each 959*4947cdc7SCole Faust// end-of-line comment to the syntax immediately preceding it. 960*4947cdc7SCole Faust 961*4947cdc7SCole Faust// flattenAST returns the list of AST nodes, both in prefix order and in postfix 962*4947cdc7SCole Faust// order. 963*4947cdc7SCole Faustfunc flattenAST(root Node) (pre, post []Node) { 964*4947cdc7SCole Faust stack := []Node{} 965*4947cdc7SCole Faust Walk(root, func(n Node) bool { 966*4947cdc7SCole Faust if n != nil { 967*4947cdc7SCole Faust pre = append(pre, n) 968*4947cdc7SCole Faust stack = append(stack, n) 969*4947cdc7SCole Faust } else { 970*4947cdc7SCole Faust post = append(post, stack[len(stack)-1]) 971*4947cdc7SCole Faust stack = stack[:len(stack)-1] 972*4947cdc7SCole Faust } 973*4947cdc7SCole Faust return true 974*4947cdc7SCole Faust }) 975*4947cdc7SCole Faust return pre, post 976*4947cdc7SCole Faust} 977*4947cdc7SCole Faust 978*4947cdc7SCole Faust// assignComments attaches comments to nearby syntax. 979*4947cdc7SCole Faustfunc (p *parser) assignComments(n Node) { 980*4947cdc7SCole Faust // Leave early if there are no comments 981*4947cdc7SCole Faust if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { 982*4947cdc7SCole Faust return 983*4947cdc7SCole Faust } 984*4947cdc7SCole Faust 985*4947cdc7SCole Faust pre, post := flattenAST(n) 986*4947cdc7SCole Faust 987*4947cdc7SCole Faust // Assign line comments to syntax immediately following. 988*4947cdc7SCole Faust line := p.in.lineComments 989*4947cdc7SCole Faust for _, x := range pre { 990*4947cdc7SCole Faust start, _ := x.Span() 991*4947cdc7SCole Faust 992*4947cdc7SCole Faust switch x.(type) { 993*4947cdc7SCole Faust case *File: 994*4947cdc7SCole Faust continue 995*4947cdc7SCole Faust } 996*4947cdc7SCole Faust 997*4947cdc7SCole Faust for len(line) > 0 && !start.isBefore(line[0].Start) { 998*4947cdc7SCole Faust x.AllocComments() 999*4947cdc7SCole Faust x.Comments().Before = append(x.Comments().Before, line[0]) 1000*4947cdc7SCole Faust line = line[1:] 1001*4947cdc7SCole Faust } 1002*4947cdc7SCole Faust } 1003*4947cdc7SCole Faust 1004*4947cdc7SCole Faust // Remaining line comments go at end of file. 1005*4947cdc7SCole Faust if len(line) > 0 { 1006*4947cdc7SCole Faust n.AllocComments() 1007*4947cdc7SCole Faust n.Comments().After = append(n.Comments().After, line...) 1008*4947cdc7SCole Faust } 1009*4947cdc7SCole Faust 1010*4947cdc7SCole Faust // Assign suffix comments to syntax immediately before. 1011*4947cdc7SCole Faust suffix := p.in.suffixComments 1012*4947cdc7SCole Faust for i := len(post) - 1; i >= 0; i-- { 1013*4947cdc7SCole Faust x := post[i] 1014*4947cdc7SCole Faust 1015*4947cdc7SCole Faust // Do not assign suffix comments to file 1016*4947cdc7SCole Faust switch x.(type) { 1017*4947cdc7SCole Faust case *File: 1018*4947cdc7SCole Faust continue 1019*4947cdc7SCole Faust } 1020*4947cdc7SCole Faust 1021*4947cdc7SCole Faust _, end := x.Span() 1022*4947cdc7SCole Faust if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1023*4947cdc7SCole Faust x.AllocComments() 1024*4947cdc7SCole Faust x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1025*4947cdc7SCole Faust suffix = suffix[:len(suffix)-1] 1026*4947cdc7SCole Faust } 1027*4947cdc7SCole Faust } 1028*4947cdc7SCole Faust} 1029