xref: /aosp_15_r20/external/bazelbuild-rules_python/gazelle/python/file_parser.go (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1*60517a1eSAndroid Build Coastguard Worker// Copyright 2023 The Bazel Authors. All rights reserved.
2*60517a1eSAndroid Build Coastguard Worker//
3*60517a1eSAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*60517a1eSAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*60517a1eSAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*60517a1eSAndroid Build Coastguard Worker//
7*60517a1eSAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*60517a1eSAndroid Build Coastguard Worker//
9*60517a1eSAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*60517a1eSAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*60517a1eSAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*60517a1eSAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*60517a1eSAndroid Build Coastguard Worker// limitations under the License.
14*60517a1eSAndroid Build Coastguard Worker
15*60517a1eSAndroid Build Coastguard Workerpackage python
16*60517a1eSAndroid Build Coastguard Worker
17*60517a1eSAndroid Build Coastguard Workerimport (
18*60517a1eSAndroid Build Coastguard Worker	"context"
19*60517a1eSAndroid Build Coastguard Worker	"fmt"
20*60517a1eSAndroid Build Coastguard Worker	"os"
21*60517a1eSAndroid Build Coastguard Worker	"path/filepath"
22*60517a1eSAndroid Build Coastguard Worker	"strings"
23*60517a1eSAndroid Build Coastguard Worker
24*60517a1eSAndroid Build Coastguard Worker	sitter "github.com/smacker/go-tree-sitter"
25*60517a1eSAndroid Build Coastguard Worker	"github.com/smacker/go-tree-sitter/python"
26*60517a1eSAndroid Build Coastguard Worker)
27*60517a1eSAndroid Build Coastguard Worker
28*60517a1eSAndroid Build Coastguard Workerconst (
29*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeString              = "string"
30*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeComment             = "comment"
31*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeIdentifier          = "identifier"
32*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeDottedName          = "dotted_name"
33*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeIfStatement         = "if_statement"
34*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeAliasedImport       = "aliased_import"
35*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeWildcardImport      = "wildcard_import"
36*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeImportStatement     = "import_statement"
37*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeComparisonOperator  = "comparison_operator"
38*60517a1eSAndroid Build Coastguard Worker	sitterNodeTypeImportFromStatement = "import_from_statement"
39*60517a1eSAndroid Build Coastguard Worker)
40*60517a1eSAndroid Build Coastguard Worker
41*60517a1eSAndroid Build Coastguard Workertype ParserOutput struct {
42*60517a1eSAndroid Build Coastguard Worker	FileName string
43*60517a1eSAndroid Build Coastguard Worker	Modules  []module
44*60517a1eSAndroid Build Coastguard Worker	Comments []comment
45*60517a1eSAndroid Build Coastguard Worker	HasMain  bool
46*60517a1eSAndroid Build Coastguard Worker}
47*60517a1eSAndroid Build Coastguard Worker
48*60517a1eSAndroid Build Coastguard Workertype FileParser struct {
49*60517a1eSAndroid Build Coastguard Worker	code        []byte
50*60517a1eSAndroid Build Coastguard Worker	relFilepath string
51*60517a1eSAndroid Build Coastguard Worker	output      ParserOutput
52*60517a1eSAndroid Build Coastguard Worker}
53*60517a1eSAndroid Build Coastguard Worker
54*60517a1eSAndroid Build Coastguard Workerfunc NewFileParser() *FileParser {
55*60517a1eSAndroid Build Coastguard Worker	return &FileParser{}
56*60517a1eSAndroid Build Coastguard Worker}
57*60517a1eSAndroid Build Coastguard Worker
58*60517a1eSAndroid Build Coastguard Workerfunc ParseCode(code []byte) (*sitter.Node, error) {
59*60517a1eSAndroid Build Coastguard Worker	parser := sitter.NewParser()
60*60517a1eSAndroid Build Coastguard Worker	parser.SetLanguage(python.GetLanguage())
61*60517a1eSAndroid Build Coastguard Worker
62*60517a1eSAndroid Build Coastguard Worker	tree, err := parser.ParseCtx(context.Background(), nil, code)
63*60517a1eSAndroid Build Coastguard Worker	if err != nil {
64*60517a1eSAndroid Build Coastguard Worker		return nil, err
65*60517a1eSAndroid Build Coastguard Worker	}
66*60517a1eSAndroid Build Coastguard Worker
67*60517a1eSAndroid Build Coastguard Worker	return tree.RootNode(), nil
68*60517a1eSAndroid Build Coastguard Worker}
69*60517a1eSAndroid Build Coastguard Worker
70*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool {
71*60517a1eSAndroid Build Coastguard Worker	for i := 0; i < int(node.ChildCount()); i++ {
72*60517a1eSAndroid Build Coastguard Worker		if err := ctx.Err(); err != nil {
73*60517a1eSAndroid Build Coastguard Worker			return false
74*60517a1eSAndroid Build Coastguard Worker		}
75*60517a1eSAndroid Build Coastguard Worker		child := node.Child(i)
76*60517a1eSAndroid Build Coastguard Worker		if child.Type() == sitterNodeTypeIfStatement &&
77*60517a1eSAndroid Build Coastguard Worker			child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" {
78*60517a1eSAndroid Build Coastguard Worker			statement := child.Child(1)
79*60517a1eSAndroid Build Coastguard Worker			a, b := statement.Child(0), statement.Child(2)
80*60517a1eSAndroid Build Coastguard Worker			// convert "'__main__' == __name__" to "__name__ == '__main__'"
81*60517a1eSAndroid Build Coastguard Worker			if b.Type() == sitterNodeTypeIdentifier {
82*60517a1eSAndroid Build Coastguard Worker				a, b = b, a
83*60517a1eSAndroid Build Coastguard Worker			}
84*60517a1eSAndroid Build Coastguard Worker			if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" &&
85*60517a1eSAndroid Build Coastguard Worker				// at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used)
86*60517a1eSAndroid Build Coastguard Worker				// "__main__" is the second child of b. But now, it isn't.
87*60517a1eSAndroid Build Coastguard Worker				// we cannot use the latest go-tree-sitter because of the top level reference in scanner.c.
88*60517a1eSAndroid Build Coastguard Worker				// https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1
89*60517a1eSAndroid Build Coastguard Worker				b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" {
90*60517a1eSAndroid Build Coastguard Worker				return true
91*60517a1eSAndroid Build Coastguard Worker			}
92*60517a1eSAndroid Build Coastguard Worker		}
93*60517a1eSAndroid Build Coastguard Worker	}
94*60517a1eSAndroid Build Coastguard Worker	return false
95*60517a1eSAndroid Build Coastguard Worker}
96*60517a1eSAndroid Build Coastguard Worker
97*60517a1eSAndroid Build Coastguard Workerfunc parseImportStatement(node *sitter.Node, code []byte) (module, bool) {
98*60517a1eSAndroid Build Coastguard Worker	switch node.Type() {
99*60517a1eSAndroid Build Coastguard Worker	case sitterNodeTypeDottedName:
100*60517a1eSAndroid Build Coastguard Worker		return module{
101*60517a1eSAndroid Build Coastguard Worker			Name:       node.Content(code),
102*60517a1eSAndroid Build Coastguard Worker			LineNumber: node.StartPoint().Row + 1,
103*60517a1eSAndroid Build Coastguard Worker		}, true
104*60517a1eSAndroid Build Coastguard Worker	case sitterNodeTypeAliasedImport:
105*60517a1eSAndroid Build Coastguard Worker		return parseImportStatement(node.Child(0), code)
106*60517a1eSAndroid Build Coastguard Worker	case sitterNodeTypeWildcardImport:
107*60517a1eSAndroid Build Coastguard Worker		return module{
108*60517a1eSAndroid Build Coastguard Worker			Name:       "*",
109*60517a1eSAndroid Build Coastguard Worker			LineNumber: node.StartPoint().Row + 1,
110*60517a1eSAndroid Build Coastguard Worker		}, true
111*60517a1eSAndroid Build Coastguard Worker	}
112*60517a1eSAndroid Build Coastguard Worker	return module{}, false
113*60517a1eSAndroid Build Coastguard Worker}
114*60517a1eSAndroid Build Coastguard Worker
115*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseImportStatements(node *sitter.Node) bool {
116*60517a1eSAndroid Build Coastguard Worker	if node.Type() == sitterNodeTypeImportStatement {
117*60517a1eSAndroid Build Coastguard Worker		for j := 1; j < int(node.ChildCount()); j++ {
118*60517a1eSAndroid Build Coastguard Worker			m, ok := parseImportStatement(node.Child(j), p.code)
119*60517a1eSAndroid Build Coastguard Worker			if !ok {
120*60517a1eSAndroid Build Coastguard Worker				continue
121*60517a1eSAndroid Build Coastguard Worker			}
122*60517a1eSAndroid Build Coastguard Worker			m.Filepath = p.relFilepath
123*60517a1eSAndroid Build Coastguard Worker			if strings.HasPrefix(m.Name, ".") {
124*60517a1eSAndroid Build Coastguard Worker				continue
125*60517a1eSAndroid Build Coastguard Worker			}
126*60517a1eSAndroid Build Coastguard Worker			p.output.Modules = append(p.output.Modules, m)
127*60517a1eSAndroid Build Coastguard Worker		}
128*60517a1eSAndroid Build Coastguard Worker	} else if node.Type() == sitterNodeTypeImportFromStatement {
129*60517a1eSAndroid Build Coastguard Worker		from := node.Child(1).Content(p.code)
130*60517a1eSAndroid Build Coastguard Worker		if strings.HasPrefix(from, ".") {
131*60517a1eSAndroid Build Coastguard Worker			return true
132*60517a1eSAndroid Build Coastguard Worker		}
133*60517a1eSAndroid Build Coastguard Worker		for j := 3; j < int(node.ChildCount()); j++ {
134*60517a1eSAndroid Build Coastguard Worker			m, ok := parseImportStatement(node.Child(j), p.code)
135*60517a1eSAndroid Build Coastguard Worker			if !ok {
136*60517a1eSAndroid Build Coastguard Worker				continue
137*60517a1eSAndroid Build Coastguard Worker			}
138*60517a1eSAndroid Build Coastguard Worker			m.Filepath = p.relFilepath
139*60517a1eSAndroid Build Coastguard Worker			m.From = from
140*60517a1eSAndroid Build Coastguard Worker			m.Name = fmt.Sprintf("%s.%s", from, m.Name)
141*60517a1eSAndroid Build Coastguard Worker			p.output.Modules = append(p.output.Modules, m)
142*60517a1eSAndroid Build Coastguard Worker		}
143*60517a1eSAndroid Build Coastguard Worker	} else {
144*60517a1eSAndroid Build Coastguard Worker		return false
145*60517a1eSAndroid Build Coastguard Worker	}
146*60517a1eSAndroid Build Coastguard Worker	return true
147*60517a1eSAndroid Build Coastguard Worker}
148*60517a1eSAndroid Build Coastguard Worker
149*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parseComments(node *sitter.Node) bool {
150*60517a1eSAndroid Build Coastguard Worker	if node.Type() == sitterNodeTypeComment {
151*60517a1eSAndroid Build Coastguard Worker		p.output.Comments = append(p.output.Comments, comment(node.Content(p.code)))
152*60517a1eSAndroid Build Coastguard Worker		return true
153*60517a1eSAndroid Build Coastguard Worker	}
154*60517a1eSAndroid Build Coastguard Worker	return false
155*60517a1eSAndroid Build Coastguard Worker}
156*60517a1eSAndroid Build Coastguard Worker
157*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) {
158*60517a1eSAndroid Build Coastguard Worker	p.code = code
159*60517a1eSAndroid Build Coastguard Worker	p.relFilepath = filepath.Join(relPackagePath, filename)
160*60517a1eSAndroid Build Coastguard Worker	p.output.FileName = filename
161*60517a1eSAndroid Build Coastguard Worker}
162*60517a1eSAndroid Build Coastguard Worker
163*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) parse(ctx context.Context, node *sitter.Node) {
164*60517a1eSAndroid Build Coastguard Worker	if node == nil {
165*60517a1eSAndroid Build Coastguard Worker		return
166*60517a1eSAndroid Build Coastguard Worker	}
167*60517a1eSAndroid Build Coastguard Worker	for i := 0; i < int(node.ChildCount()); i++ {
168*60517a1eSAndroid Build Coastguard Worker		if err := ctx.Err(); err != nil {
169*60517a1eSAndroid Build Coastguard Worker			return
170*60517a1eSAndroid Build Coastguard Worker		}
171*60517a1eSAndroid Build Coastguard Worker		child := node.Child(i)
172*60517a1eSAndroid Build Coastguard Worker		if p.parseImportStatements(child) {
173*60517a1eSAndroid Build Coastguard Worker			continue
174*60517a1eSAndroid Build Coastguard Worker		}
175*60517a1eSAndroid Build Coastguard Worker		if p.parseComments(child) {
176*60517a1eSAndroid Build Coastguard Worker			continue
177*60517a1eSAndroid Build Coastguard Worker		}
178*60517a1eSAndroid Build Coastguard Worker		p.parse(ctx, child)
179*60517a1eSAndroid Build Coastguard Worker	}
180*60517a1eSAndroid Build Coastguard Worker}
181*60517a1eSAndroid Build Coastguard Worker
182*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) {
183*60517a1eSAndroid Build Coastguard Worker	rootNode, err := ParseCode(p.code)
184*60517a1eSAndroid Build Coastguard Worker	if err != nil {
185*60517a1eSAndroid Build Coastguard Worker		return nil, err
186*60517a1eSAndroid Build Coastguard Worker	}
187*60517a1eSAndroid Build Coastguard Worker
188*60517a1eSAndroid Build Coastguard Worker	p.output.HasMain = p.parseMain(ctx, rootNode)
189*60517a1eSAndroid Build Coastguard Worker
190*60517a1eSAndroid Build Coastguard Worker	p.parse(ctx, rootNode)
191*60517a1eSAndroid Build Coastguard Worker	return &p.output, nil
192*60517a1eSAndroid Build Coastguard Worker}
193*60517a1eSAndroid Build Coastguard Worker
194*60517a1eSAndroid Build Coastguard Workerfunc (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) {
195*60517a1eSAndroid Build Coastguard Worker	code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename))
196*60517a1eSAndroid Build Coastguard Worker	if err != nil {
197*60517a1eSAndroid Build Coastguard Worker		return nil, err
198*60517a1eSAndroid Build Coastguard Worker	}
199*60517a1eSAndroid Build Coastguard Worker	p.SetCodeAndFile(code, relPackagePath, filename)
200*60517a1eSAndroid Build Coastguard Worker	return p.Parse(ctx)
201*60517a1eSAndroid Build Coastguard Worker}
202