1// Copyright 2023 The Bazel Authors. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package python 16 17import ( 18 "context" 19 "fmt" 20 "os" 21 "path/filepath" 22 "strings" 23 24 sitter "github.com/smacker/go-tree-sitter" 25 "github.com/smacker/go-tree-sitter/python" 26) 27 28const ( 29 sitterNodeTypeString = "string" 30 sitterNodeTypeComment = "comment" 31 sitterNodeTypeIdentifier = "identifier" 32 sitterNodeTypeDottedName = "dotted_name" 33 sitterNodeTypeIfStatement = "if_statement" 34 sitterNodeTypeAliasedImport = "aliased_import" 35 sitterNodeTypeWildcardImport = "wildcard_import" 36 sitterNodeTypeImportStatement = "import_statement" 37 sitterNodeTypeComparisonOperator = "comparison_operator" 38 sitterNodeTypeImportFromStatement = "import_from_statement" 39) 40 41type ParserOutput struct { 42 FileName string 43 Modules []module 44 Comments []comment 45 HasMain bool 46} 47 48type FileParser struct { 49 code []byte 50 relFilepath string 51 output ParserOutput 52} 53 54func NewFileParser() *FileParser { 55 return &FileParser{} 56} 57 58func ParseCode(code []byte) (*sitter.Node, error) { 59 parser := sitter.NewParser() 60 parser.SetLanguage(python.GetLanguage()) 61 62 tree, err := parser.ParseCtx(context.Background(), nil, code) 63 if err != nil { 64 return nil, err 65 } 66 67 return tree.RootNode(), nil 68} 69 70func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { 71 for i := 0; i < int(node.ChildCount()); i++ { 72 if err := ctx.Err(); err != nil { 73 return false 74 } 75 child := node.Child(i) 76 if child.Type() == sitterNodeTypeIfStatement && 77 child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { 78 statement := child.Child(1) 79 a, b := statement.Child(0), statement.Child(2) 80 // convert "'__main__' == __name__" to "__name__ == '__main__'" 81 if b.Type() == sitterNodeTypeIdentifier { 82 a, b = b, a 83 } 84 if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && 85 // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) 86 // "__main__" is the second child of b. But now, it isn't. 87 // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. 88 // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 89 b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { 90 return true 91 } 92 } 93 } 94 return false 95} 96 97func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { 98 switch node.Type() { 99 case sitterNodeTypeDottedName: 100 return module{ 101 Name: node.Content(code), 102 LineNumber: node.StartPoint().Row + 1, 103 }, true 104 case sitterNodeTypeAliasedImport: 105 return parseImportStatement(node.Child(0), code) 106 case sitterNodeTypeWildcardImport: 107 return module{ 108 Name: "*", 109 LineNumber: node.StartPoint().Row + 1, 110 }, true 111 } 112 return module{}, false 113} 114 115func (p *FileParser) parseImportStatements(node *sitter.Node) bool { 116 if node.Type() == sitterNodeTypeImportStatement { 117 for j := 1; j < int(node.ChildCount()); j++ { 118 m, ok := parseImportStatement(node.Child(j), p.code) 119 if !ok { 120 continue 121 } 122 m.Filepath = p.relFilepath 123 if strings.HasPrefix(m.Name, ".") { 124 continue 125 } 126 p.output.Modules = append(p.output.Modules, m) 127 } 128 } else if node.Type() == sitterNodeTypeImportFromStatement { 129 from := node.Child(1).Content(p.code) 130 if strings.HasPrefix(from, ".") { 131 return true 132 } 133 for j := 3; j < int(node.ChildCount()); j++ { 134 m, ok := parseImportStatement(node.Child(j), p.code) 135 if !ok { 136 continue 137 } 138 m.Filepath = p.relFilepath 139 m.From = from 140 m.Name = fmt.Sprintf("%s.%s", from, m.Name) 141 p.output.Modules = append(p.output.Modules, m) 142 } 143 } else { 144 return false 145 } 146 return true 147} 148 149func (p *FileParser) parseComments(node *sitter.Node) bool { 150 if node.Type() == sitterNodeTypeComment { 151 p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) 152 return true 153 } 154 return false 155} 156 157func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) { 158 p.code = code 159 p.relFilepath = filepath.Join(relPackagePath, filename) 160 p.output.FileName = filename 161} 162 163func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { 164 if node == nil { 165 return 166 } 167 for i := 0; i < int(node.ChildCount()); i++ { 168 if err := ctx.Err(); err != nil { 169 return 170 } 171 child := node.Child(i) 172 if p.parseImportStatements(child) { 173 continue 174 } 175 if p.parseComments(child) { 176 continue 177 } 178 p.parse(ctx, child) 179 } 180} 181 182func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { 183 rootNode, err := ParseCode(p.code) 184 if err != nil { 185 return nil, err 186 } 187 188 p.output.HasMain = p.parseMain(ctx, rootNode) 189 190 p.parse(ctx, rootNode) 191 return &p.output, nil 192} 193 194func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) { 195 code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename)) 196 if err != nil { 197 return nil, err 198 } 199 p.SetCodeAndFile(code, relPackagePath, filename) 200 return p.Parse(ctx) 201} 202