// Copyright 2023 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package python import ( "context" "fmt" "os" "path/filepath" "strings" sitter "github.com/smacker/go-tree-sitter" "github.com/smacker/go-tree-sitter/python" ) const ( sitterNodeTypeString = "string" sitterNodeTypeComment = "comment" sitterNodeTypeIdentifier = "identifier" sitterNodeTypeDottedName = "dotted_name" sitterNodeTypeIfStatement = "if_statement" sitterNodeTypeAliasedImport = "aliased_import" sitterNodeTypeWildcardImport = "wildcard_import" sitterNodeTypeImportStatement = "import_statement" sitterNodeTypeComparisonOperator = "comparison_operator" sitterNodeTypeImportFromStatement = "import_from_statement" ) type ParserOutput struct { FileName string Modules []module Comments []comment HasMain bool } type FileParser struct { code []byte relFilepath string output ParserOutput } func NewFileParser() *FileParser { return &FileParser{} } func ParseCode(code []byte) (*sitter.Node, error) { parser := sitter.NewParser() parser.SetLanguage(python.GetLanguage()) tree, err := parser.ParseCtx(context.Background(), nil, code) if err != nil { return nil, err } return tree.RootNode(), nil } func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { for i := 0; i < int(node.ChildCount()); i++ { if err := ctx.Err(); err != nil { return false } child := node.Child(i) if child.Type() == sitterNodeTypeIfStatement && child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { statement := child.Child(1) a, b := statement.Child(0), statement.Child(2) // convert "'__main__' == __name__" to "__name__ == '__main__'" if b.Type() == sitterNodeTypeIdentifier { a, b = b, a } if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used) // "__main__" is the second child of b. But now, it isn't. // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c. // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1 b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { return true } } } return false } func parseImportStatement(node *sitter.Node, code []byte) (module, bool) { switch node.Type() { case sitterNodeTypeDottedName: return module{ Name: node.Content(code), LineNumber: node.StartPoint().Row + 1, }, true case sitterNodeTypeAliasedImport: return parseImportStatement(node.Child(0), code) case sitterNodeTypeWildcardImport: return module{ Name: "*", LineNumber: node.StartPoint().Row + 1, }, true } return module{}, false } func (p *FileParser) parseImportStatements(node *sitter.Node) bool { if node.Type() == sitterNodeTypeImportStatement { for j := 1; j < int(node.ChildCount()); j++ { m, ok := parseImportStatement(node.Child(j), p.code) if !ok { continue } m.Filepath = p.relFilepath if strings.HasPrefix(m.Name, ".") { continue } p.output.Modules = append(p.output.Modules, m) } } else if node.Type() == sitterNodeTypeImportFromStatement { from := node.Child(1).Content(p.code) if strings.HasPrefix(from, ".") { return true } for j := 3; j < int(node.ChildCount()); j++ { m, ok := parseImportStatement(node.Child(j), p.code) if !ok { continue } m.Filepath = p.relFilepath m.From = from m.Name = fmt.Sprintf("%s.%s", from, m.Name) p.output.Modules = append(p.output.Modules, m) } } else { return false } return true } func (p *FileParser) parseComments(node *sitter.Node) bool { if node.Type() == sitterNodeTypeComment { p.output.Comments = append(p.output.Comments, comment(node.Content(p.code))) return true } return false } func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) { p.code = code p.relFilepath = filepath.Join(relPackagePath, filename) p.output.FileName = filename } func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { if node == nil { return } for i := 0; i < int(node.ChildCount()); i++ { if err := ctx.Err(); err != nil { return } child := node.Child(i) if p.parseImportStatements(child) { continue } if p.parseComments(child) { continue } p.parse(ctx, child) } } func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { rootNode, err := ParseCode(p.code) if err != nil { return nil, err } p.output.HasMain = p.parseMain(ctx, rootNode) p.parse(ctx, rootNode) return &p.output, nil } func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) { code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename)) if err != nil { return nil, err } p.SetCodeAndFile(code, relPackagePath, filename) return p.Parse(ctx) }