xref: /aosp_15_r20/external/bazelbuild-rules_python/gazelle/python/parser.go (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1*60517a1eSAndroid Build Coastguard Worker// Copyright 2023 The Bazel Authors. All rights reserved.
2*60517a1eSAndroid Build Coastguard Worker//
3*60517a1eSAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*60517a1eSAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*60517a1eSAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*60517a1eSAndroid Build Coastguard Worker//
7*60517a1eSAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*60517a1eSAndroid Build Coastguard Worker//
9*60517a1eSAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*60517a1eSAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*60517a1eSAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*60517a1eSAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*60517a1eSAndroid Build Coastguard Worker// limitations under the License.
14*60517a1eSAndroid Build Coastguard Worker
15*60517a1eSAndroid Build Coastguard Workerpackage python
16*60517a1eSAndroid Build Coastguard Worker
17*60517a1eSAndroid Build Coastguard Workerimport (
18*60517a1eSAndroid Build Coastguard Worker	"context"
19*60517a1eSAndroid Build Coastguard Worker	_ "embed"
20*60517a1eSAndroid Build Coastguard Worker	"fmt"
21*60517a1eSAndroid Build Coastguard Worker	"strings"
22*60517a1eSAndroid Build Coastguard Worker
23*60517a1eSAndroid Build Coastguard Worker	"github.com/emirpasic/gods/sets/treeset"
24*60517a1eSAndroid Build Coastguard Worker	godsutils "github.com/emirpasic/gods/utils"
25*60517a1eSAndroid Build Coastguard Worker	"golang.org/x/sync/errgroup"
26*60517a1eSAndroid Build Coastguard Worker)
27*60517a1eSAndroid Build Coastguard Worker
28*60517a1eSAndroid Build Coastguard Worker// python3Parser implements a parser for Python files that extracts the modules
29*60517a1eSAndroid Build Coastguard Worker// as seen in the import statements.
30*60517a1eSAndroid Build Coastguard Workertype python3Parser struct {
31*60517a1eSAndroid Build Coastguard Worker	// The value of language.GenerateArgs.Config.RepoRoot.
32*60517a1eSAndroid Build Coastguard Worker	repoRoot string
33*60517a1eSAndroid Build Coastguard Worker	// The value of language.GenerateArgs.Rel.
34*60517a1eSAndroid Build Coastguard Worker	relPackagePath string
35*60517a1eSAndroid Build Coastguard Worker	// The function that determines if a dependency is ignored from a Gazelle
36*60517a1eSAndroid Build Coastguard Worker	// directive. It's the signature of pythonconfig.Config.IgnoresDependency.
37*60517a1eSAndroid Build Coastguard Worker	ignoresDependency func(dep string) bool
38*60517a1eSAndroid Build Coastguard Worker}
39*60517a1eSAndroid Build Coastguard Worker
40*60517a1eSAndroid Build Coastguard Worker// newPython3Parser constructs a new python3Parser.
41*60517a1eSAndroid Build Coastguard Workerfunc newPython3Parser(
42*60517a1eSAndroid Build Coastguard Worker	repoRoot string,
43*60517a1eSAndroid Build Coastguard Worker	relPackagePath string,
44*60517a1eSAndroid Build Coastguard Worker	ignoresDependency func(dep string) bool,
45*60517a1eSAndroid Build Coastguard Worker) *python3Parser {
46*60517a1eSAndroid Build Coastguard Worker	return &python3Parser{
47*60517a1eSAndroid Build Coastguard Worker		repoRoot:          repoRoot,
48*60517a1eSAndroid Build Coastguard Worker		relPackagePath:    relPackagePath,
49*60517a1eSAndroid Build Coastguard Worker		ignoresDependency: ignoresDependency,
50*60517a1eSAndroid Build Coastguard Worker	}
51*60517a1eSAndroid Build Coastguard Worker}
52*60517a1eSAndroid Build Coastguard Worker
53*60517a1eSAndroid Build Coastguard Worker// parseSingle parses a single Python file and returns the extracted modules
54*60517a1eSAndroid Build Coastguard Worker// from the import statements as well as the parsed comments.
55*60517a1eSAndroid Build Coastguard Workerfunc (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, map[string]*treeset.Set, *annotations, error) {
56*60517a1eSAndroid Build Coastguard Worker	pyFilenames := treeset.NewWith(godsutils.StringComparator)
57*60517a1eSAndroid Build Coastguard Worker	pyFilenames.Add(pyFilename)
58*60517a1eSAndroid Build Coastguard Worker	return p.parse(pyFilenames)
59*60517a1eSAndroid Build Coastguard Worker}
60*60517a1eSAndroid Build Coastguard Worker
61*60517a1eSAndroid Build Coastguard Worker// parse parses multiple Python files and returns the extracted modules from
62*60517a1eSAndroid Build Coastguard Worker// the import statements as well as the parsed comments.
63*60517a1eSAndroid Build Coastguard Workerfunc (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) {
64*60517a1eSAndroid Build Coastguard Worker	modules := treeset.NewWith(moduleComparator)
65*60517a1eSAndroid Build Coastguard Worker
66*60517a1eSAndroid Build Coastguard Worker	g, ctx := errgroup.WithContext(context.Background())
67*60517a1eSAndroid Build Coastguard Worker	ch := make(chan struct{}, 6) // Limit the number of concurrent parses.
68*60517a1eSAndroid Build Coastguard Worker	chRes := make(chan *ParserOutput, len(pyFilenames.Values()))
69*60517a1eSAndroid Build Coastguard Worker	for _, v := range pyFilenames.Values() {
70*60517a1eSAndroid Build Coastguard Worker		ch <- struct{}{}
71*60517a1eSAndroid Build Coastguard Worker		g.Go(func(filename string) func() error {
72*60517a1eSAndroid Build Coastguard Worker			return func() error {
73*60517a1eSAndroid Build Coastguard Worker				defer func() {
74*60517a1eSAndroid Build Coastguard Worker					<-ch
75*60517a1eSAndroid Build Coastguard Worker				}()
76*60517a1eSAndroid Build Coastguard Worker				res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename)
77*60517a1eSAndroid Build Coastguard Worker				if err != nil {
78*60517a1eSAndroid Build Coastguard Worker					return err
79*60517a1eSAndroid Build Coastguard Worker				}
80*60517a1eSAndroid Build Coastguard Worker				chRes <- res
81*60517a1eSAndroid Build Coastguard Worker				return nil
82*60517a1eSAndroid Build Coastguard Worker			}
83*60517a1eSAndroid Build Coastguard Worker		}(v.(string)))
84*60517a1eSAndroid Build Coastguard Worker	}
85*60517a1eSAndroid Build Coastguard Worker	if err := g.Wait(); err != nil {
86*60517a1eSAndroid Build Coastguard Worker		return nil, nil, nil, err
87*60517a1eSAndroid Build Coastguard Worker	}
88*60517a1eSAndroid Build Coastguard Worker	close(ch)
89*60517a1eSAndroid Build Coastguard Worker	close(chRes)
90*60517a1eSAndroid Build Coastguard Worker	mainModules := make(map[string]*treeset.Set, len(chRes))
91*60517a1eSAndroid Build Coastguard Worker	allAnnotations := new(annotations)
92*60517a1eSAndroid Build Coastguard Worker	allAnnotations.ignore = make(map[string]struct{})
93*60517a1eSAndroid Build Coastguard Worker	for res := range chRes {
94*60517a1eSAndroid Build Coastguard Worker		if res.HasMain {
95*60517a1eSAndroid Build Coastguard Worker			mainModules[res.FileName] = treeset.NewWith(moduleComparator)
96*60517a1eSAndroid Build Coastguard Worker		}
97*60517a1eSAndroid Build Coastguard Worker		annotations, err := annotationsFromComments(res.Comments)
98*60517a1eSAndroid Build Coastguard Worker		if err != nil {
99*60517a1eSAndroid Build Coastguard Worker			return nil, nil, nil, fmt.Errorf("failed to parse annotations: %w", err)
100*60517a1eSAndroid Build Coastguard Worker		}
101*60517a1eSAndroid Build Coastguard Worker
102*60517a1eSAndroid Build Coastguard Worker		for _, m := range res.Modules {
103*60517a1eSAndroid Build Coastguard Worker			// Check for ignored dependencies set via an annotation to the Python
104*60517a1eSAndroid Build Coastguard Worker			// module.
105*60517a1eSAndroid Build Coastguard Worker			if annotations.ignores(m.Name) || annotations.ignores(m.From) {
106*60517a1eSAndroid Build Coastguard Worker				continue
107*60517a1eSAndroid Build Coastguard Worker			}
108*60517a1eSAndroid Build Coastguard Worker
109*60517a1eSAndroid Build Coastguard Worker			// Check for ignored dependencies set via a Gazelle directive in a BUILD
110*60517a1eSAndroid Build Coastguard Worker			// file.
111*60517a1eSAndroid Build Coastguard Worker			if p.ignoresDependency(m.Name) || p.ignoresDependency(m.From) {
112*60517a1eSAndroid Build Coastguard Worker				continue
113*60517a1eSAndroid Build Coastguard Worker			}
114*60517a1eSAndroid Build Coastguard Worker
115*60517a1eSAndroid Build Coastguard Worker			modules.Add(m)
116*60517a1eSAndroid Build Coastguard Worker			if res.HasMain {
117*60517a1eSAndroid Build Coastguard Worker				mainModules[res.FileName].Add(m)
118*60517a1eSAndroid Build Coastguard Worker			}
119*60517a1eSAndroid Build Coastguard Worker		}
120*60517a1eSAndroid Build Coastguard Worker
121*60517a1eSAndroid Build Coastguard Worker		// Collect all annotations from each file into a single annotations struct.
122*60517a1eSAndroid Build Coastguard Worker		for k, v := range annotations.ignore {
123*60517a1eSAndroid Build Coastguard Worker			allAnnotations.ignore[k] = v
124*60517a1eSAndroid Build Coastguard Worker		}
125*60517a1eSAndroid Build Coastguard Worker		allAnnotations.includeDeps = append(allAnnotations.includeDeps, annotations.includeDeps...)
126*60517a1eSAndroid Build Coastguard Worker	}
127*60517a1eSAndroid Build Coastguard Worker
128*60517a1eSAndroid Build Coastguard Worker	allAnnotations.includeDeps = removeDupesFromStringTreeSetSlice(allAnnotations.includeDeps)
129*60517a1eSAndroid Build Coastguard Worker
130*60517a1eSAndroid Build Coastguard Worker	return modules, mainModules, allAnnotations, nil
131*60517a1eSAndroid Build Coastguard Worker}
132*60517a1eSAndroid Build Coastguard Worker
133*60517a1eSAndroid Build Coastguard Worker// removeDupesFromStringTreeSetSlice takes a []string, makes a set out of the
134*60517a1eSAndroid Build Coastguard Worker// elements, and then returns a new []string with all duplicates removed. Order
135*60517a1eSAndroid Build Coastguard Worker// is preserved.
136*60517a1eSAndroid Build Coastguard Workerfunc removeDupesFromStringTreeSetSlice(array []string) []string {
137*60517a1eSAndroid Build Coastguard Worker	s := treeset.NewWith(godsutils.StringComparator)
138*60517a1eSAndroid Build Coastguard Worker	for _, v := range array {
139*60517a1eSAndroid Build Coastguard Worker		s.Add(v)
140*60517a1eSAndroid Build Coastguard Worker	}
141*60517a1eSAndroid Build Coastguard Worker	dedupe := make([]string, s.Size())
142*60517a1eSAndroid Build Coastguard Worker	for i, v := range s.Values() {
143*60517a1eSAndroid Build Coastguard Worker		dedupe[i] = fmt.Sprint(v)
144*60517a1eSAndroid Build Coastguard Worker	}
145*60517a1eSAndroid Build Coastguard Worker	return dedupe
146*60517a1eSAndroid Build Coastguard Worker}
147*60517a1eSAndroid Build Coastguard Worker
148*60517a1eSAndroid Build Coastguard Worker// module represents a fully-qualified, dot-separated, Python module as seen on
149*60517a1eSAndroid Build Coastguard Worker// the import statement, alongside the line number where it happened.
150*60517a1eSAndroid Build Coastguard Workertype module struct {
151*60517a1eSAndroid Build Coastguard Worker	// The fully-qualified, dot-separated, Python module name as seen on import
152*60517a1eSAndroid Build Coastguard Worker	// statements.
153*60517a1eSAndroid Build Coastguard Worker	Name string `json:"name"`
154*60517a1eSAndroid Build Coastguard Worker	// The line number where the import happened.
155*60517a1eSAndroid Build Coastguard Worker	LineNumber uint32 `json:"lineno"`
156*60517a1eSAndroid Build Coastguard Worker	// The path to the module file relative to the Bazel workspace root.
157*60517a1eSAndroid Build Coastguard Worker	Filepath string `json:"filepath"`
158*60517a1eSAndroid Build Coastguard Worker	// If this was a from import, e.g. from foo import bar, From indicates the module
159*60517a1eSAndroid Build Coastguard Worker	// from which it is imported.
160*60517a1eSAndroid Build Coastguard Worker	From string `json:"from"`
161*60517a1eSAndroid Build Coastguard Worker}
162*60517a1eSAndroid Build Coastguard Worker
163*60517a1eSAndroid Build Coastguard Worker// moduleComparator compares modules by name.
164*60517a1eSAndroid Build Coastguard Workerfunc moduleComparator(a, b interface{}) int {
165*60517a1eSAndroid Build Coastguard Worker	return godsutils.StringComparator(a.(module).Name, b.(module).Name)
166*60517a1eSAndroid Build Coastguard Worker}
167*60517a1eSAndroid Build Coastguard Worker
168*60517a1eSAndroid Build Coastguard Worker// annotationKind represents Gazelle annotation kinds.
169*60517a1eSAndroid Build Coastguard Workertype annotationKind string
170*60517a1eSAndroid Build Coastguard Worker
171*60517a1eSAndroid Build Coastguard Workerconst (
172*60517a1eSAndroid Build Coastguard Worker	// The Gazelle annotation prefix.
173*60517a1eSAndroid Build Coastguard Worker	annotationPrefix string = "gazelle:"
174*60517a1eSAndroid Build Coastguard Worker	// The ignore annotation kind. E.g. '# gazelle:ignore <module_name>'.
175*60517a1eSAndroid Build Coastguard Worker	annotationKindIgnore     annotationKind = "ignore"
176*60517a1eSAndroid Build Coastguard Worker	annotationKindIncludeDep annotationKind = "include_dep"
177*60517a1eSAndroid Build Coastguard Worker)
178*60517a1eSAndroid Build Coastguard Worker
179*60517a1eSAndroid Build Coastguard Worker// comment represents a Python comment.
180*60517a1eSAndroid Build Coastguard Workertype comment string
181*60517a1eSAndroid Build Coastguard Worker
182*60517a1eSAndroid Build Coastguard Worker// asAnnotation returns an annotation object if the comment has the
183*60517a1eSAndroid Build Coastguard Worker// annotationPrefix.
184*60517a1eSAndroid Build Coastguard Workerfunc (c *comment) asAnnotation() (*annotation, error) {
185*60517a1eSAndroid Build Coastguard Worker	uncomment := strings.TrimLeft(string(*c), "# ")
186*60517a1eSAndroid Build Coastguard Worker	if !strings.HasPrefix(uncomment, annotationPrefix) {
187*60517a1eSAndroid Build Coastguard Worker		return nil, nil
188*60517a1eSAndroid Build Coastguard Worker	}
189*60517a1eSAndroid Build Coastguard Worker	withoutPrefix := strings.TrimPrefix(uncomment, annotationPrefix)
190*60517a1eSAndroid Build Coastguard Worker	annotationParts := strings.SplitN(withoutPrefix, " ", 2)
191*60517a1eSAndroid Build Coastguard Worker	if len(annotationParts) < 2 {
192*60517a1eSAndroid Build Coastguard Worker		return nil, fmt.Errorf("`%s` requires a value", *c)
193*60517a1eSAndroid Build Coastguard Worker	}
194*60517a1eSAndroid Build Coastguard Worker	return &annotation{
195*60517a1eSAndroid Build Coastguard Worker		kind:  annotationKind(annotationParts[0]),
196*60517a1eSAndroid Build Coastguard Worker		value: annotationParts[1],
197*60517a1eSAndroid Build Coastguard Worker	}, nil
198*60517a1eSAndroid Build Coastguard Worker}
199*60517a1eSAndroid Build Coastguard Worker
200*60517a1eSAndroid Build Coastguard Worker// annotation represents a single Gazelle annotation parsed from a Python
201*60517a1eSAndroid Build Coastguard Worker// comment.
202*60517a1eSAndroid Build Coastguard Workertype annotation struct {
203*60517a1eSAndroid Build Coastguard Worker	kind  annotationKind
204*60517a1eSAndroid Build Coastguard Worker	value string
205*60517a1eSAndroid Build Coastguard Worker}
206*60517a1eSAndroid Build Coastguard Worker
207*60517a1eSAndroid Build Coastguard Worker// annotations represent the collection of all Gazelle annotations parsed out of
208*60517a1eSAndroid Build Coastguard Worker// the comments of a Python module.
209*60517a1eSAndroid Build Coastguard Workertype annotations struct {
210*60517a1eSAndroid Build Coastguard Worker	// The parsed modules to be ignored by Gazelle.
211*60517a1eSAndroid Build Coastguard Worker	ignore map[string]struct{}
212*60517a1eSAndroid Build Coastguard Worker	// Labels that Gazelle should include as deps of the generated target.
213*60517a1eSAndroid Build Coastguard Worker	includeDeps []string
214*60517a1eSAndroid Build Coastguard Worker}
215*60517a1eSAndroid Build Coastguard Worker
216*60517a1eSAndroid Build Coastguard Worker// annotationsFromComments returns all the annotations parsed out of the
217*60517a1eSAndroid Build Coastguard Worker// comments of a Python module.
218*60517a1eSAndroid Build Coastguard Workerfunc annotationsFromComments(comments []comment) (*annotations, error) {
219*60517a1eSAndroid Build Coastguard Worker	ignore := make(map[string]struct{})
220*60517a1eSAndroid Build Coastguard Worker	includeDeps := []string{}
221*60517a1eSAndroid Build Coastguard Worker	for _, comment := range comments {
222*60517a1eSAndroid Build Coastguard Worker		annotation, err := comment.asAnnotation()
223*60517a1eSAndroid Build Coastguard Worker		if err != nil {
224*60517a1eSAndroid Build Coastguard Worker			return nil, err
225*60517a1eSAndroid Build Coastguard Worker		}
226*60517a1eSAndroid Build Coastguard Worker		if annotation != nil {
227*60517a1eSAndroid Build Coastguard Worker			if annotation.kind == annotationKindIgnore {
228*60517a1eSAndroid Build Coastguard Worker				modules := strings.Split(annotation.value, ",")
229*60517a1eSAndroid Build Coastguard Worker				for _, m := range modules {
230*60517a1eSAndroid Build Coastguard Worker					if m == "" {
231*60517a1eSAndroid Build Coastguard Worker						continue
232*60517a1eSAndroid Build Coastguard Worker					}
233*60517a1eSAndroid Build Coastguard Worker					m = strings.TrimSpace(m)
234*60517a1eSAndroid Build Coastguard Worker					ignore[m] = struct{}{}
235*60517a1eSAndroid Build Coastguard Worker				}
236*60517a1eSAndroid Build Coastguard Worker			}
237*60517a1eSAndroid Build Coastguard Worker			if annotation.kind == annotationKindIncludeDep {
238*60517a1eSAndroid Build Coastguard Worker				targets := strings.Split(annotation.value, ",")
239*60517a1eSAndroid Build Coastguard Worker				for _, t := range targets {
240*60517a1eSAndroid Build Coastguard Worker					if t == "" {
241*60517a1eSAndroid Build Coastguard Worker						continue
242*60517a1eSAndroid Build Coastguard Worker					}
243*60517a1eSAndroid Build Coastguard Worker					t = strings.TrimSpace(t)
244*60517a1eSAndroid Build Coastguard Worker					includeDeps = append(includeDeps, t)
245*60517a1eSAndroid Build Coastguard Worker				}
246*60517a1eSAndroid Build Coastguard Worker			}
247*60517a1eSAndroid Build Coastguard Worker		}
248*60517a1eSAndroid Build Coastguard Worker	}
249*60517a1eSAndroid Build Coastguard Worker	return &annotations{
250*60517a1eSAndroid Build Coastguard Worker		ignore:      ignore,
251*60517a1eSAndroid Build Coastguard Worker		includeDeps: includeDeps,
252*60517a1eSAndroid Build Coastguard Worker	}, nil
253*60517a1eSAndroid Build Coastguard Worker}
254*60517a1eSAndroid Build Coastguard Worker
255*60517a1eSAndroid Build Coastguard Worker// ignored returns true if the given module was ignored via the ignore
256*60517a1eSAndroid Build Coastguard Worker// annotation.
257*60517a1eSAndroid Build Coastguard Workerfunc (a *annotations) ignores(module string) bool {
258*60517a1eSAndroid Build Coastguard Worker	_, ignores := a.ignore[module]
259*60517a1eSAndroid Build Coastguard Worker	return ignores
260*60517a1eSAndroid Build Coastguard Worker}
261