xref: /aosp_15_r20/external/licenseclassifier/tools/identify_license/identify_license.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//     http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouche
15*46c4c49dSIbrahim Kanouche// The identify_license program tries to identify the license type of an
16*46c4c49dSIbrahim Kanouche// unknown license. The file containing the license text is specified on the
17*46c4c49dSIbrahim Kanouche// command line. Multiple license files can be analyzed with a single command.
18*46c4c49dSIbrahim Kanouche// The type of the license is returned along with the confidence level of the
19*46c4c49dSIbrahim Kanouche// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
20*46c4c49dSIbrahim Kanouche// exact match and 0.0 indicating a complete mismatch. The results are sorted
21*46c4c49dSIbrahim Kanouche// by confidence level.
22*46c4c49dSIbrahim Kanouche//
23*46c4c49dSIbrahim Kanouche//	$ identifylicense LICENSE1 LICENSE2
24*46c4c49dSIbrahim Kanouche//	LICENSE2: MIT (confidence: 0.987)
25*46c4c49dSIbrahim Kanouche//	LICENSE1: BSD-2-Clause (confidence: 0.833)
26*46c4c49dSIbrahim Kanouchepackage main
27*46c4c49dSIbrahim Kanouche
28*46c4c49dSIbrahim Kanoucheimport (
29*46c4c49dSIbrahim Kanouche	"context"
30*46c4c49dSIbrahim Kanouche	"flag"
31*46c4c49dSIbrahim Kanouche	"fmt"
32*46c4c49dSIbrahim Kanouche	"log"
33*46c4c49dSIbrahim Kanouche	"os"
34*46c4c49dSIbrahim Kanouche	"path/filepath"
35*46c4c49dSIbrahim Kanouche	"sort"
36*46c4c49dSIbrahim Kanouche	"time"
37*46c4c49dSIbrahim Kanouche
38*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier"
39*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier/tools/identify_license/backend"
40*46c4c49dSIbrahim Kanouche)
41*46c4c49dSIbrahim Kanouche
42*46c4c49dSIbrahim Kanouchevar (
43*46c4c49dSIbrahim Kanouche	headers       = flag.Bool("headers", false, "match license headers")
44*46c4c49dSIbrahim Kanouche	forbiddenOnly = flag.Bool("forbidden", false, "identify using forbidden licenses archive")
45*46c4c49dSIbrahim Kanouche	threshold     = flag.Float64("threshold", licenseclassifier.DefaultConfidenceThreshold, "confidence threshold")
46*46c4c49dSIbrahim Kanouche	timeout       = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.")
47*46c4c49dSIbrahim Kanouche)
48*46c4c49dSIbrahim Kanouche
49*46c4c49dSIbrahim Kanouchefunc init() {
50*46c4c49dSIbrahim Kanouche	flag.Usage = func() {
51*46c4c49dSIbrahim Kanouche		fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ...
52*46c4c49dSIbrahim Kanouche
53*46c4c49dSIbrahim KanoucheIdentify an unknown license.
54*46c4c49dSIbrahim Kanouche
55*46c4c49dSIbrahim KanoucheOptions:
56*46c4c49dSIbrahim Kanouche`, filepath.Base(os.Args[0]))
57*46c4c49dSIbrahim Kanouche		flag.PrintDefaults()
58*46c4c49dSIbrahim Kanouche	}
59*46c4c49dSIbrahim Kanouche}
60*46c4c49dSIbrahim Kanouche
61*46c4c49dSIbrahim Kanouchefunc main() {
62*46c4c49dSIbrahim Kanouche	flag.Parse()
63*46c4c49dSIbrahim Kanouche
64*46c4c49dSIbrahim Kanouche	be, err := backend.New(*threshold, *forbiddenOnly)
65*46c4c49dSIbrahim Kanouche	if err != nil {
66*46c4c49dSIbrahim Kanouche		be.Close()
67*46c4c49dSIbrahim Kanouche		log.Fatalf("cannot create license classifier: %v", err)
68*46c4c49dSIbrahim Kanouche	}
69*46c4c49dSIbrahim Kanouche
70*46c4c49dSIbrahim Kanouche	ctx, cancel := context.WithTimeout(context.Background(), *timeout)
71*46c4c49dSIbrahim Kanouche	defer cancel()
72*46c4c49dSIbrahim Kanouche	if errs := be.ClassifyLicensesWithContext(ctx, flag.Args(), *headers); errs != nil {
73*46c4c49dSIbrahim Kanouche		be.Close()
74*46c4c49dSIbrahim Kanouche		for _, err := range errs {
75*46c4c49dSIbrahim Kanouche			log.Printf("classify license failed: %v", err)
76*46c4c49dSIbrahim Kanouche		}
77*46c4c49dSIbrahim Kanouche		log.Fatal("cannot classify licenses")
78*46c4c49dSIbrahim Kanouche	}
79*46c4c49dSIbrahim Kanouche
80*46c4c49dSIbrahim Kanouche	results := be.GetResults()
81*46c4c49dSIbrahim Kanouche	if len(results) == 0 {
82*46c4c49dSIbrahim Kanouche		be.Close()
83*46c4c49dSIbrahim Kanouche		log.Fatal("Couldn't classify license(s)")
84*46c4c49dSIbrahim Kanouche	}
85*46c4c49dSIbrahim Kanouche
86*46c4c49dSIbrahim Kanouche	sort.Sort(results)
87*46c4c49dSIbrahim Kanouche	for _, r := range results {
88*46c4c49dSIbrahim Kanouche		fmt.Printf("%s: %s (confidence: %v, offset: %v, extent: %v)\n",
89*46c4c49dSIbrahim Kanouche			r.Filename, r.Name, r.Confidence, r.Offset, r.Extent)
90*46c4c49dSIbrahim Kanouche	}
91*46c4c49dSIbrahim Kanouche	be.Close()
92*46c4c49dSIbrahim Kanouche}
93