xref: /aosp_15_r20/external/bazelbuild-rules_python/gazelle/python/parser.go (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1// Copyright 2023 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package python
16
17import (
18	"context"
19	_ "embed"
20	"fmt"
21	"strings"
22
23	"github.com/emirpasic/gods/sets/treeset"
24	godsutils "github.com/emirpasic/gods/utils"
25	"golang.org/x/sync/errgroup"
26)
27
28// python3Parser implements a parser for Python files that extracts the modules
29// as seen in the import statements.
30type python3Parser struct {
31	// The value of language.GenerateArgs.Config.RepoRoot.
32	repoRoot string
33	// The value of language.GenerateArgs.Rel.
34	relPackagePath string
35	// The function that determines if a dependency is ignored from a Gazelle
36	// directive. It's the signature of pythonconfig.Config.IgnoresDependency.
37	ignoresDependency func(dep string) bool
38}
39
40// newPython3Parser constructs a new python3Parser.
41func newPython3Parser(
42	repoRoot string,
43	relPackagePath string,
44	ignoresDependency func(dep string) bool,
45) *python3Parser {
46	return &python3Parser{
47		repoRoot:          repoRoot,
48		relPackagePath:    relPackagePath,
49		ignoresDependency: ignoresDependency,
50	}
51}
52
53// parseSingle parses a single Python file and returns the extracted modules
54// from the import statements as well as the parsed comments.
55func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, map[string]*treeset.Set, *annotations, error) {
56	pyFilenames := treeset.NewWith(godsutils.StringComparator)
57	pyFilenames.Add(pyFilename)
58	return p.parse(pyFilenames)
59}
60
61// parse parses multiple Python files and returns the extracted modules from
62// the import statements as well as the parsed comments.
63func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) {
64	modules := treeset.NewWith(moduleComparator)
65
66	g, ctx := errgroup.WithContext(context.Background())
67	ch := make(chan struct{}, 6) // Limit the number of concurrent parses.
68	chRes := make(chan *ParserOutput, len(pyFilenames.Values()))
69	for _, v := range pyFilenames.Values() {
70		ch <- struct{}{}
71		g.Go(func(filename string) func() error {
72			return func() error {
73				defer func() {
74					<-ch
75				}()
76				res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename)
77				if err != nil {
78					return err
79				}
80				chRes <- res
81				return nil
82			}
83		}(v.(string)))
84	}
85	if err := g.Wait(); err != nil {
86		return nil, nil, nil, err
87	}
88	close(ch)
89	close(chRes)
90	mainModules := make(map[string]*treeset.Set, len(chRes))
91	allAnnotations := new(annotations)
92	allAnnotations.ignore = make(map[string]struct{})
93	for res := range chRes {
94		if res.HasMain {
95			mainModules[res.FileName] = treeset.NewWith(moduleComparator)
96		}
97		annotations, err := annotationsFromComments(res.Comments)
98		if err != nil {
99			return nil, nil, nil, fmt.Errorf("failed to parse annotations: %w", err)
100		}
101
102		for _, m := range res.Modules {
103			// Check for ignored dependencies set via an annotation to the Python
104			// module.
105			if annotations.ignores(m.Name) || annotations.ignores(m.From) {
106				continue
107			}
108
109			// Check for ignored dependencies set via a Gazelle directive in a BUILD
110			// file.
111			if p.ignoresDependency(m.Name) || p.ignoresDependency(m.From) {
112				continue
113			}
114
115			modules.Add(m)
116			if res.HasMain {
117				mainModules[res.FileName].Add(m)
118			}
119		}
120
121		// Collect all annotations from each file into a single annotations struct.
122		for k, v := range annotations.ignore {
123			allAnnotations.ignore[k] = v
124		}
125		allAnnotations.includeDeps = append(allAnnotations.includeDeps, annotations.includeDeps...)
126	}
127
128	allAnnotations.includeDeps = removeDupesFromStringTreeSetSlice(allAnnotations.includeDeps)
129
130	return modules, mainModules, allAnnotations, nil
131}
132
133// removeDupesFromStringTreeSetSlice takes a []string, makes a set out of the
134// elements, and then returns a new []string with all duplicates removed. Order
135// is preserved.
136func removeDupesFromStringTreeSetSlice(array []string) []string {
137	s := treeset.NewWith(godsutils.StringComparator)
138	for _, v := range array {
139		s.Add(v)
140	}
141	dedupe := make([]string, s.Size())
142	for i, v := range s.Values() {
143		dedupe[i] = fmt.Sprint(v)
144	}
145	return dedupe
146}
147
148// module represents a fully-qualified, dot-separated, Python module as seen on
149// the import statement, alongside the line number where it happened.
150type module struct {
151	// The fully-qualified, dot-separated, Python module name as seen on import
152	// statements.
153	Name string `json:"name"`
154	// The line number where the import happened.
155	LineNumber uint32 `json:"lineno"`
156	// The path to the module file relative to the Bazel workspace root.
157	Filepath string `json:"filepath"`
158	// If this was a from import, e.g. from foo import bar, From indicates the module
159	// from which it is imported.
160	From string `json:"from"`
161}
162
163// moduleComparator compares modules by name.
164func moduleComparator(a, b interface{}) int {
165	return godsutils.StringComparator(a.(module).Name, b.(module).Name)
166}
167
168// annotationKind represents Gazelle annotation kinds.
169type annotationKind string
170
171const (
172	// The Gazelle annotation prefix.
173	annotationPrefix string = "gazelle:"
174	// The ignore annotation kind. E.g. '# gazelle:ignore <module_name>'.
175	annotationKindIgnore     annotationKind = "ignore"
176	annotationKindIncludeDep annotationKind = "include_dep"
177)
178
179// comment represents a Python comment.
180type comment string
181
182// asAnnotation returns an annotation object if the comment has the
183// annotationPrefix.
184func (c *comment) asAnnotation() (*annotation, error) {
185	uncomment := strings.TrimLeft(string(*c), "# ")
186	if !strings.HasPrefix(uncomment, annotationPrefix) {
187		return nil, nil
188	}
189	withoutPrefix := strings.TrimPrefix(uncomment, annotationPrefix)
190	annotationParts := strings.SplitN(withoutPrefix, " ", 2)
191	if len(annotationParts) < 2 {
192		return nil, fmt.Errorf("`%s` requires a value", *c)
193	}
194	return &annotation{
195		kind:  annotationKind(annotationParts[0]),
196		value: annotationParts[1],
197	}, nil
198}
199
200// annotation represents a single Gazelle annotation parsed from a Python
201// comment.
202type annotation struct {
203	kind  annotationKind
204	value string
205}
206
207// annotations represent the collection of all Gazelle annotations parsed out of
208// the comments of a Python module.
209type annotations struct {
210	// The parsed modules to be ignored by Gazelle.
211	ignore map[string]struct{}
212	// Labels that Gazelle should include as deps of the generated target.
213	includeDeps []string
214}
215
216// annotationsFromComments returns all the annotations parsed out of the
217// comments of a Python module.
218func annotationsFromComments(comments []comment) (*annotations, error) {
219	ignore := make(map[string]struct{})
220	includeDeps := []string{}
221	for _, comment := range comments {
222		annotation, err := comment.asAnnotation()
223		if err != nil {
224			return nil, err
225		}
226		if annotation != nil {
227			if annotation.kind == annotationKindIgnore {
228				modules := strings.Split(annotation.value, ",")
229				for _, m := range modules {
230					if m == "" {
231						continue
232					}
233					m = strings.TrimSpace(m)
234					ignore[m] = struct{}{}
235				}
236			}
237			if annotation.kind == annotationKindIncludeDep {
238				targets := strings.Split(annotation.value, ",")
239				for _, t := range targets {
240					if t == "" {
241						continue
242					}
243					t = strings.TrimSpace(t)
244					includeDeps = append(includeDeps, t)
245				}
246			}
247		}
248	}
249	return &annotations{
250		ignore:      ignore,
251		includeDeps: includeDeps,
252	}, nil
253}
254
255// ignored returns true if the given module was ignored via the ignore
256// annotation.
257func (a *annotations) ignores(module string) bool {
258	_, ignores := a.ignore[module]
259	return ignores
260}
261