1// Copyright 2023 The Bazel Authors. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package python 16 17import ( 18 "context" 19 _ "embed" 20 "fmt" 21 "strings" 22 23 "github.com/emirpasic/gods/sets/treeset" 24 godsutils "github.com/emirpasic/gods/utils" 25 "golang.org/x/sync/errgroup" 26) 27 28// python3Parser implements a parser for Python files that extracts the modules 29// as seen in the import statements. 30type python3Parser struct { 31 // The value of language.GenerateArgs.Config.RepoRoot. 32 repoRoot string 33 // The value of language.GenerateArgs.Rel. 34 relPackagePath string 35 // The function that determines if a dependency is ignored from a Gazelle 36 // directive. It's the signature of pythonconfig.Config.IgnoresDependency. 37 ignoresDependency func(dep string) bool 38} 39 40// newPython3Parser constructs a new python3Parser. 41func newPython3Parser( 42 repoRoot string, 43 relPackagePath string, 44 ignoresDependency func(dep string) bool, 45) *python3Parser { 46 return &python3Parser{ 47 repoRoot: repoRoot, 48 relPackagePath: relPackagePath, 49 ignoresDependency: ignoresDependency, 50 } 51} 52 53// parseSingle parses a single Python file and returns the extracted modules 54// from the import statements as well as the parsed comments. 55func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, map[string]*treeset.Set, *annotations, error) { 56 pyFilenames := treeset.NewWith(godsutils.StringComparator) 57 pyFilenames.Add(pyFilename) 58 return p.parse(pyFilenames) 59} 60 61// parse parses multiple Python files and returns the extracted modules from 62// the import statements as well as the parsed comments. 63func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) { 64 modules := treeset.NewWith(moduleComparator) 65 66 g, ctx := errgroup.WithContext(context.Background()) 67 ch := make(chan struct{}, 6) // Limit the number of concurrent parses. 68 chRes := make(chan *ParserOutput, len(pyFilenames.Values())) 69 for _, v := range pyFilenames.Values() { 70 ch <- struct{}{} 71 g.Go(func(filename string) func() error { 72 return func() error { 73 defer func() { 74 <-ch 75 }() 76 res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename) 77 if err != nil { 78 return err 79 } 80 chRes <- res 81 return nil 82 } 83 }(v.(string))) 84 } 85 if err := g.Wait(); err != nil { 86 return nil, nil, nil, err 87 } 88 close(ch) 89 close(chRes) 90 mainModules := make(map[string]*treeset.Set, len(chRes)) 91 allAnnotations := new(annotations) 92 allAnnotations.ignore = make(map[string]struct{}) 93 for res := range chRes { 94 if res.HasMain { 95 mainModules[res.FileName] = treeset.NewWith(moduleComparator) 96 } 97 annotations, err := annotationsFromComments(res.Comments) 98 if err != nil { 99 return nil, nil, nil, fmt.Errorf("failed to parse annotations: %w", err) 100 } 101 102 for _, m := range res.Modules { 103 // Check for ignored dependencies set via an annotation to the Python 104 // module. 105 if annotations.ignores(m.Name) || annotations.ignores(m.From) { 106 continue 107 } 108 109 // Check for ignored dependencies set via a Gazelle directive in a BUILD 110 // file. 111 if p.ignoresDependency(m.Name) || p.ignoresDependency(m.From) { 112 continue 113 } 114 115 modules.Add(m) 116 if res.HasMain { 117 mainModules[res.FileName].Add(m) 118 } 119 } 120 121 // Collect all annotations from each file into a single annotations struct. 122 for k, v := range annotations.ignore { 123 allAnnotations.ignore[k] = v 124 } 125 allAnnotations.includeDeps = append(allAnnotations.includeDeps, annotations.includeDeps...) 126 } 127 128 allAnnotations.includeDeps = removeDupesFromStringTreeSetSlice(allAnnotations.includeDeps) 129 130 return modules, mainModules, allAnnotations, nil 131} 132 133// removeDupesFromStringTreeSetSlice takes a []string, makes a set out of the 134// elements, and then returns a new []string with all duplicates removed. Order 135// is preserved. 136func removeDupesFromStringTreeSetSlice(array []string) []string { 137 s := treeset.NewWith(godsutils.StringComparator) 138 for _, v := range array { 139 s.Add(v) 140 } 141 dedupe := make([]string, s.Size()) 142 for i, v := range s.Values() { 143 dedupe[i] = fmt.Sprint(v) 144 } 145 return dedupe 146} 147 148// module represents a fully-qualified, dot-separated, Python module as seen on 149// the import statement, alongside the line number where it happened. 150type module struct { 151 // The fully-qualified, dot-separated, Python module name as seen on import 152 // statements. 153 Name string `json:"name"` 154 // The line number where the import happened. 155 LineNumber uint32 `json:"lineno"` 156 // The path to the module file relative to the Bazel workspace root. 157 Filepath string `json:"filepath"` 158 // If this was a from import, e.g. from foo import bar, From indicates the module 159 // from which it is imported. 160 From string `json:"from"` 161} 162 163// moduleComparator compares modules by name. 164func moduleComparator(a, b interface{}) int { 165 return godsutils.StringComparator(a.(module).Name, b.(module).Name) 166} 167 168// annotationKind represents Gazelle annotation kinds. 169type annotationKind string 170 171const ( 172 // The Gazelle annotation prefix. 173 annotationPrefix string = "gazelle:" 174 // The ignore annotation kind. E.g. '# gazelle:ignore <module_name>'. 175 annotationKindIgnore annotationKind = "ignore" 176 annotationKindIncludeDep annotationKind = "include_dep" 177) 178 179// comment represents a Python comment. 180type comment string 181 182// asAnnotation returns an annotation object if the comment has the 183// annotationPrefix. 184func (c *comment) asAnnotation() (*annotation, error) { 185 uncomment := strings.TrimLeft(string(*c), "# ") 186 if !strings.HasPrefix(uncomment, annotationPrefix) { 187 return nil, nil 188 } 189 withoutPrefix := strings.TrimPrefix(uncomment, annotationPrefix) 190 annotationParts := strings.SplitN(withoutPrefix, " ", 2) 191 if len(annotationParts) < 2 { 192 return nil, fmt.Errorf("`%s` requires a value", *c) 193 } 194 return &annotation{ 195 kind: annotationKind(annotationParts[0]), 196 value: annotationParts[1], 197 }, nil 198} 199 200// annotation represents a single Gazelle annotation parsed from a Python 201// comment. 202type annotation struct { 203 kind annotationKind 204 value string 205} 206 207// annotations represent the collection of all Gazelle annotations parsed out of 208// the comments of a Python module. 209type annotations struct { 210 // The parsed modules to be ignored by Gazelle. 211 ignore map[string]struct{} 212 // Labels that Gazelle should include as deps of the generated target. 213 includeDeps []string 214} 215 216// annotationsFromComments returns all the annotations parsed out of the 217// comments of a Python module. 218func annotationsFromComments(comments []comment) (*annotations, error) { 219 ignore := make(map[string]struct{}) 220 includeDeps := []string{} 221 for _, comment := range comments { 222 annotation, err := comment.asAnnotation() 223 if err != nil { 224 return nil, err 225 } 226 if annotation != nil { 227 if annotation.kind == annotationKindIgnore { 228 modules := strings.Split(annotation.value, ",") 229 for _, m := range modules { 230 if m == "" { 231 continue 232 } 233 m = strings.TrimSpace(m) 234 ignore[m] = struct{}{} 235 } 236 } 237 if annotation.kind == annotationKindIncludeDep { 238 targets := strings.Split(annotation.value, ",") 239 for _, t := range targets { 240 if t == "" { 241 continue 242 } 243 t = strings.TrimSpace(t) 244 includeDeps = append(includeDeps, t) 245 } 246 } 247 } 248 } 249 return &annotations{ 250 ignore: ignore, 251 includeDeps: includeDeps, 252 }, nil 253} 254 255// ignored returns true if the given module was ignored via the ignore 256// annotation. 257func (a *annotations) ignores(module string) bool { 258 _, ignores := a.ignore[module] 259 return ignores 260} 261