xref: /aosp_15_r20/external/licenseclassifier/v2/tools/identify_license/identify_license.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// The identify_license program tries to identify the license type of an
16// unknown license. The file containing the license text is specified on the
17// command line. Multiple license files can be analyzed with a single command.
18// The type of the license is returned along with the confidence level of the
19// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
20// exact match and 0.0 indicating a complete mismatch. The results are sorted
21// by confidence level.
22//
23//	$ identifylicense <LICENSE_OR_DIRECTORY>  <LICENSE_OR_DIRECTORY> ...
24//	LICENSE2: MIT (confidence: 0.987)
25//	LICENSE1: BSD-2-Clause (confidence: 0.833)
26package main
27
28import (
29	"context"
30	"encoding/json"
31	"flag"
32	"fmt"
33	"strings"
34
35	//"google3/file/base/go/contrib/walk/walk"
36	//"google3/file/base/go/file"
37	"io/fs"
38	"io/ioutil"
39	"log"
40	"os"
41	"path/filepath"
42	"regexp"
43	"sort"
44	"time"
45
46	classifier "github.com/google/licenseclassifier/v2"
47	"github.com/google/licenseclassifier/v2/tools/identify_license/backend"
48	"github.com/google/licenseclassifier/v2/tools/identify_license/results"
49)
50
51var (
52	headers       = flag.Bool("headers", false, "match license headers")
53	jsonFname     = flag.String("json", "", "filename to write JSON output to.")
54	includeText   = flag.Bool("include_text", false, "include the license text in the JSON output")
55	numTasks      = flag.Int("tasks", 1000, "the number of license scanning tasks running concurrently")
56	timeout       = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.")
57	tracePhases   = flag.String("trace_phases", "", "comma-separated list of phases of the license classifier to trace")
58	traceLicenses = flag.String("trace_licenses", "", "comma-separated list of licenses for the license classifier to trace")
59	ignorePaths   = flag.String("ignore_paths_re", "", "comma-separated list of regular expressions that match file paths to ignore")
60)
61
62// expandFiles recursively returns a list of files stored in a list of
63// directories. If an input is not a directory, it is added to the output list.
64func expandFiles(ctx context.Context, paths []string) ([]string, error) {
65	var finalPaths []string
66
67	ip, err := parseIgnorePaths()
68	if err != nil {
69		return nil, fmt.Errorf("could not parse ignore paths: %v", err)
70	}
71
72	handleFile := func(path string) {
73		if shouldIgnore(ip, path) {
74			return
75		}
76		finalPaths = append(finalPaths, path)
77	}
78
79	for _, p := range paths {
80		p, err := filepath.Abs(p)
81		if err != nil {
82			return nil, err
83		}
84
85		err = filepath.Walk(p, func(path string, info os.FileInfo, err error) error {
86			if err != nil {
87				return err
88			}
89			if info.IsDir() {
90				if shouldIgnore(ip, info.Name()) {
91					return fs.SkipDir
92				}
93				return nil // walk the directory
94			}
95			handleFile(path)
96			return nil
97		})
98		if err != nil {
99			return nil, err
100		}
101	}
102	return finalPaths, nil
103}
104
105func shouldIgnore(ignorePaths []*regexp.Regexp, path string) bool {
106	for _, r := range ignorePaths {
107		if exactRegexMatch(r, path) {
108			return true
109		}
110	}
111	return false
112}
113
114func exactRegexMatch(r *regexp.Regexp, s string) bool {
115	m := r.FindStringIndex(s)
116	if m == nil {
117		return false
118	}
119	return (m[0] == 0) && (m[1] == len(s))
120}
121
122func parseIgnorePaths() (out []*regexp.Regexp, err error) {
123	for _, p := range strings.Split(*ignorePaths, ",") {
124		r, err := regexp.Compile(p)
125		if err != nil {
126			return nil, err
127		}
128		out = append(out, r)
129	}
130	return out, nil
131}
132
133// outputJSON writes the output formatted as JSON to a file.
134func outputJSON(filename *string, res results.LicenseTypes, includeText bool) error {
135	d, err := results.NewJSONResult(res, includeText)
136	if err != nil {
137		return err
138	}
139	fc, err := json.MarshalIndent(d, "", " ")
140	if err != nil {
141		return err
142	}
143	return ioutil.WriteFile(*filename, fc, 0644)
144}
145
146func init() {
147	flag.Usage = func() {
148		fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ...
149
150Identify an unknown license.
151
152Options:
153`, filepath.Base(os.Args[0]))
154		flag.PrintDefaults()
155	}
156}
157
158func main() {
159	flag.Parse()
160
161	be, err := backend.New()
162	if err != nil {
163		log.Fatalf("cannot create license classifier: %v", err)
164	}
165
166	paths, err := expandFiles(context.Background(), flag.Args())
167	defer be.Close()
168	be.SetTraceConfiguration(
169		&classifier.TraceConfiguration{
170			TracePhases:   *tracePhases,
171			TraceLicenses: *traceLicenses,
172		})
173
174	ctx, cancel := context.WithTimeout(context.Background(), *timeout)
175	defer cancel()
176	if errs := be.ClassifyLicensesWithContext(ctx, *numTasks, paths, *headers); errs != nil {
177		be.Close()
178		for _, err := range errs {
179			log.Printf("classify license failed: %v", err)
180		}
181		log.Fatal("cannot classify licenses")
182	}
183
184	results := be.GetResults()
185	if len(results) == 0 {
186		log.Fatal("Couldn't classify license(s)")
187	}
188
189	sort.Sort(results)
190	for _, r := range results {
191		name := r.Name
192		if r.MatchType != "License" && r.MatchType != "Header" {
193			name = fmt.Sprintf("%s:%s", r.MatchType, r.Name)
194		}
195		fmt.Printf("%s %s (variant: %v, confidence: %v, start: %v, end: %v)\n",
196			r.Filename, name, r.Variant, r.Confidence, r.StartLine, r.EndLine)
197	}
198	if len(*jsonFname) > 0 {
199		err = outputJSON(jsonFname, results, *includeText)
200		if err != nil {
201			log.Fatalf("Couldn't write JSON output to file %s: %v", *jsonFname, err)
202		}
203	}
204}
205