1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// The identify_license program tries to identify the license type of an 16// unknown license. The file containing the license text is specified on the 17// command line. Multiple license files can be analyzed with a single command. 18// The type of the license is returned along with the confidence level of the 19// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an 20// exact match and 0.0 indicating a complete mismatch. The results are sorted 21// by confidence level. 22// 23// $ identifylicense <LICENSE_OR_DIRECTORY> <LICENSE_OR_DIRECTORY> ... 24// LICENSE2: MIT (confidence: 0.987) 25// LICENSE1: BSD-2-Clause (confidence: 0.833) 26package main 27 28import ( 29 "context" 30 "encoding/json" 31 "flag" 32 "fmt" 33 "strings" 34 35 //"google3/file/base/go/contrib/walk/walk" 36 //"google3/file/base/go/file" 37 "io/fs" 38 "io/ioutil" 39 "log" 40 "os" 41 "path/filepath" 42 "regexp" 43 "sort" 44 "time" 45 46 classifier "github.com/google/licenseclassifier/v2" 47 "github.com/google/licenseclassifier/v2/tools/identify_license/backend" 48 "github.com/google/licenseclassifier/v2/tools/identify_license/results" 49) 50 51var ( 52 headers = flag.Bool("headers", false, "match license headers") 53 jsonFname = flag.String("json", "", "filename to write JSON output to.") 54 includeText = flag.Bool("include_text", false, "include the license text in the JSON output") 55 numTasks = flag.Int("tasks", 1000, "the number of license scanning tasks running concurrently") 56 timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.") 57 tracePhases = flag.String("trace_phases", "", "comma-separated list of phases of the license classifier to trace") 58 traceLicenses = flag.String("trace_licenses", "", "comma-separated list of licenses for the license classifier to trace") 59 ignorePaths = flag.String("ignore_paths_re", "", "comma-separated list of regular expressions that match file paths to ignore") 60) 61 62// expandFiles recursively returns a list of files stored in a list of 63// directories. If an input is not a directory, it is added to the output list. 64func expandFiles(ctx context.Context, paths []string) ([]string, error) { 65 var finalPaths []string 66 67 ip, err := parseIgnorePaths() 68 if err != nil { 69 return nil, fmt.Errorf("could not parse ignore paths: %v", err) 70 } 71 72 handleFile := func(path string) { 73 if shouldIgnore(ip, path) { 74 return 75 } 76 finalPaths = append(finalPaths, path) 77 } 78 79 for _, p := range paths { 80 p, err := filepath.Abs(p) 81 if err != nil { 82 return nil, err 83 } 84 85 err = filepath.Walk(p, func(path string, info os.FileInfo, err error) error { 86 if err != nil { 87 return err 88 } 89 if info.IsDir() { 90 if shouldIgnore(ip, info.Name()) { 91 return fs.SkipDir 92 } 93 return nil // walk the directory 94 } 95 handleFile(path) 96 return nil 97 }) 98 if err != nil { 99 return nil, err 100 } 101 } 102 return finalPaths, nil 103} 104 105func shouldIgnore(ignorePaths []*regexp.Regexp, path string) bool { 106 for _, r := range ignorePaths { 107 if exactRegexMatch(r, path) { 108 return true 109 } 110 } 111 return false 112} 113 114func exactRegexMatch(r *regexp.Regexp, s string) bool { 115 m := r.FindStringIndex(s) 116 if m == nil { 117 return false 118 } 119 return (m[0] == 0) && (m[1] == len(s)) 120} 121 122func parseIgnorePaths() (out []*regexp.Regexp, err error) { 123 for _, p := range strings.Split(*ignorePaths, ",") { 124 r, err := regexp.Compile(p) 125 if err != nil { 126 return nil, err 127 } 128 out = append(out, r) 129 } 130 return out, nil 131} 132 133// outputJSON writes the output formatted as JSON to a file. 134func outputJSON(filename *string, res results.LicenseTypes, includeText bool) error { 135 d, err := results.NewJSONResult(res, includeText) 136 if err != nil { 137 return err 138 } 139 fc, err := json.MarshalIndent(d, "", " ") 140 if err != nil { 141 return err 142 } 143 return ioutil.WriteFile(*filename, fc, 0644) 144} 145 146func init() { 147 flag.Usage = func() { 148 fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ... 149 150Identify an unknown license. 151 152Options: 153`, filepath.Base(os.Args[0])) 154 flag.PrintDefaults() 155 } 156} 157 158func main() { 159 flag.Parse() 160 161 be, err := backend.New() 162 if err != nil { 163 log.Fatalf("cannot create license classifier: %v", err) 164 } 165 166 paths, err := expandFiles(context.Background(), flag.Args()) 167 defer be.Close() 168 be.SetTraceConfiguration( 169 &classifier.TraceConfiguration{ 170 TracePhases: *tracePhases, 171 TraceLicenses: *traceLicenses, 172 }) 173 174 ctx, cancel := context.WithTimeout(context.Background(), *timeout) 175 defer cancel() 176 if errs := be.ClassifyLicensesWithContext(ctx, *numTasks, paths, *headers); errs != nil { 177 be.Close() 178 for _, err := range errs { 179 log.Printf("classify license failed: %v", err) 180 } 181 log.Fatal("cannot classify licenses") 182 } 183 184 results := be.GetResults() 185 if len(results) == 0 { 186 log.Fatal("Couldn't classify license(s)") 187 } 188 189 sort.Sort(results) 190 for _, r := range results { 191 name := r.Name 192 if r.MatchType != "License" && r.MatchType != "Header" { 193 name = fmt.Sprintf("%s:%s", r.MatchType, r.Name) 194 } 195 fmt.Printf("%s %s (variant: %v, confidence: %v, start: %v, end: %v)\n", 196 r.Filename, name, r.Variant, r.Confidence, r.StartLine, r.EndLine) 197 } 198 if len(*jsonFname) > 0 { 199 err = outputJSON(jsonFname, results, *includeText) 200 if err != nil { 201 log.Fatalf("Couldn't write JSON output to file %s: %v", *jsonFname, err) 202 } 203 } 204} 205