1*60517a1eSAndroid Build Coastguard Worker// Copyright 2023 The Bazel Authors. All rights reserved. 2*60517a1eSAndroid Build Coastguard Worker// 3*60517a1eSAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 4*60517a1eSAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 5*60517a1eSAndroid Build Coastguard Worker// You may obtain a copy of the License at 6*60517a1eSAndroid Build Coastguard Worker// 7*60517a1eSAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 8*60517a1eSAndroid Build Coastguard Worker// 9*60517a1eSAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 10*60517a1eSAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 11*60517a1eSAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*60517a1eSAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 13*60517a1eSAndroid Build Coastguard Worker// limitations under the License. 14*60517a1eSAndroid Build Coastguard Worker 15*60517a1eSAndroid Build Coastguard Workerpackage python 16*60517a1eSAndroid Build Coastguard Worker 17*60517a1eSAndroid Build Coastguard Workerimport ( 18*60517a1eSAndroid Build Coastguard Worker "context" 19*60517a1eSAndroid Build Coastguard Worker "fmt" 20*60517a1eSAndroid Build Coastguard Worker "os" 21*60517a1eSAndroid Build Coastguard Worker "path/filepath" 22*60517a1eSAndroid Build Coastguard Worker "strings" 23*60517a1eSAndroid Build Coastguard Worker 24*60517a1eSAndroid Build Coastguard Worker sitter "github.com/smacker/go-tree-sitter" 25*60517a1eSAndroid Build Coastguard Worker "github.com/smacker/go-tree-sitter/python" 26*60517a1eSAndroid Build Coastguard Worker) 27*60517a1eSAndroid Build Coastguard Worker 28*60517a1eSAndroid Build Coastguard Workerconst ( 29*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeString = "string" 30*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeComment = "comment" 31*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeIdentifier = "identifier" 32*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeDottedName = "dotted_name" 33*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeIfStatement = "if_statement" 34*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeAliasedImport = "aliased_import" 35*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeWildcardImport = "wildcard_import" 36*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeImportStatement = "import_statement" 37*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeComparisonOperator = "comparison_operator" 38*60517a1eSAndroid Build Coastguard Worker sitterNodeTypeImportFromStatement = "import_from_statement" 39*60517a1eSAndroid Build Coastguard Worker) 40*60517a1eSAndroid Build Coastguard Worker 41*60517a1eSAndroid Build Coastguard Workertype ParserOutput struct { 42*60517a1eSAndroid Build Coastguard Worker FileName string 43*60517a1eSAndroid Build Coastguard Worker Modules []module 44*60517a1eSAndroid Build Coastguard Worker Comments []comment 45*60517a1eSAndroid Build Coastguard Worker HasMain bool 46*60517a1eSAndroid Build Coastguard Worker} 47*60517a1eSAndroid Build Coastguard Worker 48*60517a1eSAndroid Build Coastguard Workertype FileParser struct { 49*60517a1eSAndroid Build Coastguard Worker code []byte 50*60517a1eSAndroid Build Coastguard Worker relFilepath string 51*60517a1eSAndroid Build Coastguard Worker output ParserOutput 52*60517a1eSAndroid Build Coastguard Worker} 53*60517a1eSAndroid Build Coastguard Worker 54*60517a1eSAndroid Build Coastguard Workerfunc NewFileParser() *FileParser { 55*60517a1eSAndroid Build Coastguard Worker return &FileParser{} 56*60517a1eSAndroid Build Coastguard Worker} 57*60517a1eSAndroid Build Coastguard Worker 58*60517a1eSAndroid Build Coastguard Workerfunc ParseCode(code []byte) (*sitter.Node, error) { 59*60517a1eSAndroid Build Coastguard Worker parser := sitter.NewParser() 60*60517a1eSAndroid Build Coastguard Worker parser.SetLanguage(python.GetLanguage()) 61*60517a1eSAndroid Build Coastguard Worker 62*60517a1eSAndroid Build Coastguard Worker tree, err := parser.ParseCtx(context.Background(), nil, code) 63*60517a1eSAndroid Build Coastguard Worker if err != nil { 64*60517a1eSAndroid Build Coastguard Worker return nil, err 65*60517a1eSAndroid Build Coastguard Worker } 66*60517a1eSAndroid Build Coastguard Worker 67*60517a1eSAndroid Build Coastguard Worker return tree.RootNode(), nil 68*60517a1eSAndroid Build Coastguard Worker} 69*60517a1eSAndroid Build Coastguard Worker 70*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { 71*60517a1eSAndroid Build Coastguard Worker for i := 0; i < int(node.ChildCount()); i++ { 72*60517a1eSAndroid Build Coastguard Worker if err := ctx.Err(); err != nil { 73*60517a1eSAndroid Build Coastguard Worker return false 74*60517a1eSAndroid Build Coastguard Worker } 75*60517a1eSAndroid Build Coastguard Worker child := node.Child(i) 76*60517a1eSAndroid Build Coastguard Worker if child.Type() == sitterNodeTypeIfStatement && 77*60517a1eSAndroid Build Coastguard Worker child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { 78*60517a1eSAndroid Build Coastguard Worker statement := child.Child(1) 79*60517a1eSAndroid Build Coastguard Worker a, b := statement.Child(0), statement.Child(2) 80*60517a1eSAndroid Build Coastguard Worker // convert "'__main__' == __name__" to "__name__ == '__main__'" 81*60517a1eSAndroid Build Coastguard Worker if b.Type() == sitterNodeTypeIdentifier { 82*60517a1eSAndroid Build Coastguard Worker a, b = b, a 83*60517a1eSAndroid Build Coastguard Worker } 84*60517a1eSAndroid Build Coastguard Worker if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && 85*60517a1eSAndroid Build Coastguard Worker // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) 86*60517a1eSAndroid Build Coastguard Worker // "__main__" is the second child of b. But now, it isn't. 87*60517a1eSAndroid Build Coastguard Worker // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. 88*60517a1eSAndroid Build Coastguard Worker // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 89*60517a1eSAndroid Build Coastguard Worker b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { 90*60517a1eSAndroid Build Coastguard Worker return true 91*60517a1eSAndroid Build Coastguard Worker } 92*60517a1eSAndroid Build Coastguard Worker } 93*60517a1eSAndroid Build Coastguard Worker } 94*60517a1eSAndroid Build Coastguard Worker return false 95*60517a1eSAndroid Build Coastguard Worker} 96*60517a1eSAndroid Build Coastguard Worker 97*60517a1eSAndroid Build Coastguard Workerfunc parseImportStatement(node *sitter.Node, code []byte) (module, bool) { 98*60517a1eSAndroid Build Coastguard Worker switch node.Type() { 99*60517a1eSAndroid Build Coastguard Worker case sitterNodeTypeDottedName: 100*60517a1eSAndroid Build Coastguard Worker return module{ 101*60517a1eSAndroid Build Coastguard Worker Name: node.Content(code), 102*60517a1eSAndroid Build Coastguard Worker LineNumber: node.StartPoint().Row + 1, 103*60517a1eSAndroid Build Coastguard Worker }, true 104*60517a1eSAndroid Build Coastguard Worker case sitterNodeTypeAliasedImport: 105*60517a1eSAndroid Build Coastguard Worker return parseImportStatement(node.Child(0), code) 106*60517a1eSAndroid Build Coastguard Worker case sitterNodeTypeWildcardImport: 107*60517a1eSAndroid Build Coastguard Worker return module{ 108*60517a1eSAndroid Build Coastguard Worker Name: "*", 109*60517a1eSAndroid Build Coastguard Worker LineNumber: node.StartPoint().Row + 1, 110*60517a1eSAndroid Build Coastguard Worker }, true 111*60517a1eSAndroid Build Coastguard Worker } 112*60517a1eSAndroid Build Coastguard Worker return module{}, false 113*60517a1eSAndroid Build Coastguard Worker} 114*60517a1eSAndroid Build Coastguard Worker 115*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseImportStatements(node *sitter.Node) bool { 116*60517a1eSAndroid Build Coastguard Worker if node.Type() == sitterNodeTypeImportStatement { 117*60517a1eSAndroid Build Coastguard Worker for j := 1; j < int(node.ChildCount()); j++ { 118*60517a1eSAndroid Build Coastguard Worker m, ok := parseImportStatement(node.Child(j), p.code) 119*60517a1eSAndroid Build Coastguard Worker if !ok { 120*60517a1eSAndroid Build Coastguard Worker continue 121*60517a1eSAndroid Build Coastguard Worker } 122*60517a1eSAndroid Build Coastguard Worker m.Filepath = p.relFilepath 123*60517a1eSAndroid Build Coastguard Worker if strings.HasPrefix(m.Name, ".") { 124*60517a1eSAndroid Build Coastguard Worker continue 125*60517a1eSAndroid Build Coastguard Worker } 126*60517a1eSAndroid Build Coastguard Worker p.output.Modules = append(p.output.Modules, m) 127*60517a1eSAndroid Build Coastguard Worker } 128*60517a1eSAndroid Build Coastguard Worker } else if node.Type() == sitterNodeTypeImportFromStatement { 129*60517a1eSAndroid Build Coastguard Worker from := node.Child(1).Content(p.code) 130*60517a1eSAndroid Build Coastguard Worker if strings.HasPrefix(from, ".") { 131*60517a1eSAndroid Build Coastguard Worker return true 132*60517a1eSAndroid Build Coastguard Worker } 133*60517a1eSAndroid Build Coastguard Worker for j := 3; j < int(node.ChildCount()); j++ { 134*60517a1eSAndroid Build Coastguard Worker m, ok := parseImportStatement(node.Child(j), p.code) 135*60517a1eSAndroid Build Coastguard Worker if !ok { 136*60517a1eSAndroid Build Coastguard Worker continue 137*60517a1eSAndroid Build Coastguard Worker } 138*60517a1eSAndroid Build Coastguard Worker m.Filepath = p.relFilepath 139*60517a1eSAndroid Build Coastguard Worker m.From = from 140*60517a1eSAndroid Build Coastguard Worker m.Name = fmt.Sprintf("%s.%s", from, m.Name) 141*60517a1eSAndroid Build Coastguard Worker p.output.Modules = append(p.output.Modules, m) 142*60517a1eSAndroid Build Coastguard Worker } 143*60517a1eSAndroid Build Coastguard Worker } else { 144*60517a1eSAndroid Build Coastguard Worker return false 145*60517a1eSAndroid Build Coastguard Worker } 146*60517a1eSAndroid Build Coastguard Worker return true 147*60517a1eSAndroid Build Coastguard Worker} 148*60517a1eSAndroid Build Coastguard Worker 149*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseComments(node *sitter.Node) bool { 150*60517a1eSAndroid Build Coastguard Worker if node.Type() == sitterNodeTypeComment { 151*60517a1eSAndroid Build Coastguard Worker p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) 152*60517a1eSAndroid Build Coastguard Worker return true 153*60517a1eSAndroid Build Coastguard Worker } 154*60517a1eSAndroid Build Coastguard Worker return false 155*60517a1eSAndroid Build Coastguard Worker} 156*60517a1eSAndroid Build Coastguard Worker 157*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) { 158*60517a1eSAndroid Build Coastguard Worker p.code = code 159*60517a1eSAndroid Build Coastguard Worker p.relFilepath = filepath.Join(relPackagePath, filename) 160*60517a1eSAndroid Build Coastguard Worker p.output.FileName = filename 161*60517a1eSAndroid Build Coastguard Worker} 162*60517a1eSAndroid Build Coastguard Worker 163*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parse(ctx context.Context, node *sitter.Node) { 164*60517a1eSAndroid Build Coastguard Worker if node == nil { 165*60517a1eSAndroid Build Coastguard Worker return 166*60517a1eSAndroid Build Coastguard Worker } 167*60517a1eSAndroid Build Coastguard Worker for i := 0; i < int(node.ChildCount()); i++ { 168*60517a1eSAndroid Build Coastguard Worker if err := ctx.Err(); err != nil { 169*60517a1eSAndroid Build Coastguard Worker return 170*60517a1eSAndroid Build Coastguard Worker } 171*60517a1eSAndroid Build Coastguard Worker child := node.Child(i) 172*60517a1eSAndroid Build Coastguard Worker if p.parseImportStatements(child) { 173*60517a1eSAndroid Build Coastguard Worker continue 174*60517a1eSAndroid Build Coastguard Worker } 175*60517a1eSAndroid Build Coastguard Worker if p.parseComments(child) { 176*60517a1eSAndroid Build Coastguard Worker continue 177*60517a1eSAndroid Build Coastguard Worker } 178*60517a1eSAndroid Build Coastguard Worker p.parse(ctx, child) 179*60517a1eSAndroid Build Coastguard Worker } 180*60517a1eSAndroid Build Coastguard Worker} 181*60517a1eSAndroid Build Coastguard Worker 182*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { 183*60517a1eSAndroid Build Coastguard Worker rootNode, err := ParseCode(p.code) 184*60517a1eSAndroid Build Coastguard Worker if err != nil { 185*60517a1eSAndroid Build Coastguard Worker return nil, err 186*60517a1eSAndroid Build Coastguard Worker } 187*60517a1eSAndroid Build Coastguard Worker 188*60517a1eSAndroid Build Coastguard Worker p.output.HasMain = p.parseMain(ctx, rootNode) 189*60517a1eSAndroid Build Coastguard Worker 190*60517a1eSAndroid Build Coastguard Worker p.parse(ctx, rootNode) 191*60517a1eSAndroid Build Coastguard Worker return &p.output, nil 192*60517a1eSAndroid Build Coastguard Worker} 193*60517a1eSAndroid Build Coastguard Worker 194*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) { 195*60517a1eSAndroid Build Coastguard Worker code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename)) 196*60517a1eSAndroid Build Coastguard Worker if err != nil { 197*60517a1eSAndroid Build Coastguard Worker return nil, err 198*60517a1eSAndroid Build Coastguard Worker } 199*60517a1eSAndroid Build Coastguard Worker p.SetCodeAndFile(code, relPackagePath, filename) 200*60517a1eSAndroid Build Coastguard Worker return p.Parse(ctx) 201*60517a1eSAndroid Build Coastguard Worker} 202