xref: /aosp_15_r20/external/bazelbuild-rules_python/gazelle/python/file_parser.go (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1// Copyright 2023 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package python
16
17import (
18	"context"
19	"fmt"
20	"os"
21	"path/filepath"
22	"strings"
23
24	sitter "github.com/smacker/go-tree-sitter"
25	"github.com/smacker/go-tree-sitter/python"
26)
27
28const (
29	sitterNodeTypeString              = "string"
30	sitterNodeTypeComment             = "comment"
31	sitterNodeTypeIdentifier          = "identifier"
32	sitterNodeTypeDottedName          = "dotted_name"
33	sitterNodeTypeIfStatement         = "if_statement"
34	sitterNodeTypeAliasedImport       = "aliased_import"
35	sitterNodeTypeWildcardImport      = "wildcard_import"
36	sitterNodeTypeImportStatement     = "import_statement"
37	sitterNodeTypeComparisonOperator  = "comparison_operator"
38	sitterNodeTypeImportFromStatement = "import_from_statement"
39)
40
41type ParserOutput struct {
42	FileName string
43	Modules  []module
44	Comments []comment
45	HasMain  bool
46}
47
48type FileParser struct {
49	code        []byte
50	relFilepath string
51	output      ParserOutput
52}
53
54func NewFileParser() *FileParser {
55	return &FileParser{}
56}
57
58func ParseCode(code []byte) (*sitter.Node, error) {
59	parser := sitter.NewParser()
60	parser.SetLanguage(python.GetLanguage())
61
62	tree, err := parser.ParseCtx(context.Background(), nil, code)
63	if err != nil {
64		return nil, err
65	}
66
67	return tree.RootNode(), nil
68}
69
70func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool {
71	for i := 0; i < int(node.ChildCount()); i++ {
72		if err := ctx.Err(); err != nil {
73			return false
74		}
75		child := node.Child(i)
76		if child.Type() == sitterNodeTypeIfStatement &&
77			child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" {
78			statement := child.Child(1)
79			a, b := statement.Child(0), statement.Child(2)
80			// convert "'__main__' == __name__" to "__name__ == '__main__'"
81			if b.Type() == sitterNodeTypeIdentifier {
82				a, b = b, a
83			}
84			if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" &&
85				// at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used)
86				// "__main__" is the second child of b. But now, it isn't.
87				// we cannot use the latest go-tree-sitter because of the top level reference in scanner.c.
88				// https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1
89				b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" {
90				return true
91			}
92		}
93	}
94	return false
95}
96
97func parseImportStatement(node *sitter.Node, code []byte) (module, bool) {
98	switch node.Type() {
99	case sitterNodeTypeDottedName:
100		return module{
101			Name:       node.Content(code),
102			LineNumber: node.StartPoint().Row + 1,
103		}, true
104	case sitterNodeTypeAliasedImport:
105		return parseImportStatement(node.Child(0), code)
106	case sitterNodeTypeWildcardImport:
107		return module{
108			Name:       "*",
109			LineNumber: node.StartPoint().Row + 1,
110		}, true
111	}
112	return module{}, false
113}
114
115func (p *FileParser) parseImportStatements(node *sitter.Node) bool {
116	if node.Type() == sitterNodeTypeImportStatement {
117		for j := 1; j < int(node.ChildCount()); j++ {
118			m, ok := parseImportStatement(node.Child(j), p.code)
119			if !ok {
120				continue
121			}
122			m.Filepath = p.relFilepath
123			if strings.HasPrefix(m.Name, ".") {
124				continue
125			}
126			p.output.Modules = append(p.output.Modules, m)
127		}
128	} else if node.Type() == sitterNodeTypeImportFromStatement {
129		from := node.Child(1).Content(p.code)
130		if strings.HasPrefix(from, ".") {
131			return true
132		}
133		for j := 3; j < int(node.ChildCount()); j++ {
134			m, ok := parseImportStatement(node.Child(j), p.code)
135			if !ok {
136				continue
137			}
138			m.Filepath = p.relFilepath
139			m.From = from
140			m.Name = fmt.Sprintf("%s.%s", from, m.Name)
141			p.output.Modules = append(p.output.Modules, m)
142		}
143	} else {
144		return false
145	}
146	return true
147}
148
149func (p *FileParser) parseComments(node *sitter.Node) bool {
150	if node.Type() == sitterNodeTypeComment {
151		p.output.Comments = append(p.output.Comments, comment(node.Content(p.code)))
152		return true
153	}
154	return false
155}
156
157func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) {
158	p.code = code
159	p.relFilepath = filepath.Join(relPackagePath, filename)
160	p.output.FileName = filename
161}
162
163func (p *FileParser) parse(ctx context.Context, node *sitter.Node) {
164	if node == nil {
165		return
166	}
167	for i := 0; i < int(node.ChildCount()); i++ {
168		if err := ctx.Err(); err != nil {
169			return
170		}
171		child := node.Child(i)
172		if p.parseImportStatements(child) {
173			continue
174		}
175		if p.parseComments(child) {
176			continue
177		}
178		p.parse(ctx, child)
179	}
180}
181
182func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) {
183	rootNode, err := ParseCode(p.code)
184	if err != nil {
185		return nil, err
186	}
187
188	p.output.HasMain = p.parseMain(ctx, rootNode)
189
190	p.parse(ctx, rootNode)
191	return &p.output, nil
192}
193
194func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) {
195	code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename))
196	if err != nil {
197		return nil, err
198	}
199	p.SetCodeAndFile(code, relPackagePath, filename)
200	return p.Parse(ctx)
201}
202