1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouche// The identify_license program tries to identify the license type of an 16*46c4c49dSIbrahim Kanouche// unknown license. The file containing the license text is specified on the 17*46c4c49dSIbrahim Kanouche// command line. Multiple license files can be analyzed with a single command. 18*46c4c49dSIbrahim Kanouche// The type of the license is returned along with the confidence level of the 19*46c4c49dSIbrahim Kanouche// match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an 20*46c4c49dSIbrahim Kanouche// exact match and 0.0 indicating a complete mismatch. The results are sorted 21*46c4c49dSIbrahim Kanouche// by confidence level. 22*46c4c49dSIbrahim Kanouche// 23*46c4c49dSIbrahim Kanouche// $ identifylicense LICENSE1 LICENSE2 24*46c4c49dSIbrahim Kanouche// LICENSE2: MIT (confidence: 0.987) 25*46c4c49dSIbrahim Kanouche// LICENSE1: BSD-2-Clause (confidence: 0.833) 26*46c4c49dSIbrahim Kanouchepackage main 27*46c4c49dSIbrahim Kanouche 28*46c4c49dSIbrahim Kanoucheimport ( 29*46c4c49dSIbrahim Kanouche "context" 30*46c4c49dSIbrahim Kanouche "flag" 31*46c4c49dSIbrahim Kanouche "fmt" 32*46c4c49dSIbrahim Kanouche "log" 33*46c4c49dSIbrahim Kanouche "os" 34*46c4c49dSIbrahim Kanouche "path/filepath" 35*46c4c49dSIbrahim Kanouche "sort" 36*46c4c49dSIbrahim Kanouche "time" 37*46c4c49dSIbrahim Kanouche 38*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier" 39*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier/tools/identify_license/backend" 40*46c4c49dSIbrahim Kanouche) 41*46c4c49dSIbrahim Kanouche 42*46c4c49dSIbrahim Kanouchevar ( 43*46c4c49dSIbrahim Kanouche headers = flag.Bool("headers", false, "match license headers") 44*46c4c49dSIbrahim Kanouche forbiddenOnly = flag.Bool("forbidden", false, "identify using forbidden licenses archive") 45*46c4c49dSIbrahim Kanouche threshold = flag.Float64("threshold", licenseclassifier.DefaultConfidenceThreshold, "confidence threshold") 46*46c4c49dSIbrahim Kanouche timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.") 47*46c4c49dSIbrahim Kanouche) 48*46c4c49dSIbrahim Kanouche 49*46c4c49dSIbrahim Kanouchefunc init() { 50*46c4c49dSIbrahim Kanouche flag.Usage = func() { 51*46c4c49dSIbrahim Kanouche fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ... 52*46c4c49dSIbrahim Kanouche 53*46c4c49dSIbrahim KanoucheIdentify an unknown license. 54*46c4c49dSIbrahim Kanouche 55*46c4c49dSIbrahim KanoucheOptions: 56*46c4c49dSIbrahim Kanouche`, filepath.Base(os.Args[0])) 57*46c4c49dSIbrahim Kanouche flag.PrintDefaults() 58*46c4c49dSIbrahim Kanouche } 59*46c4c49dSIbrahim Kanouche} 60*46c4c49dSIbrahim Kanouche 61*46c4c49dSIbrahim Kanouchefunc main() { 62*46c4c49dSIbrahim Kanouche flag.Parse() 63*46c4c49dSIbrahim Kanouche 64*46c4c49dSIbrahim Kanouche be, err := backend.New(*threshold, *forbiddenOnly) 65*46c4c49dSIbrahim Kanouche if err != nil { 66*46c4c49dSIbrahim Kanouche be.Close() 67*46c4c49dSIbrahim Kanouche log.Fatalf("cannot create license classifier: %v", err) 68*46c4c49dSIbrahim Kanouche } 69*46c4c49dSIbrahim Kanouche 70*46c4c49dSIbrahim Kanouche ctx, cancel := context.WithTimeout(context.Background(), *timeout) 71*46c4c49dSIbrahim Kanouche defer cancel() 72*46c4c49dSIbrahim Kanouche if errs := be.ClassifyLicensesWithContext(ctx, flag.Args(), *headers); errs != nil { 73*46c4c49dSIbrahim Kanouche be.Close() 74*46c4c49dSIbrahim Kanouche for _, err := range errs { 75*46c4c49dSIbrahim Kanouche log.Printf("classify license failed: %v", err) 76*46c4c49dSIbrahim Kanouche } 77*46c4c49dSIbrahim Kanouche log.Fatal("cannot classify licenses") 78*46c4c49dSIbrahim Kanouche } 79*46c4c49dSIbrahim Kanouche 80*46c4c49dSIbrahim Kanouche results := be.GetResults() 81*46c4c49dSIbrahim Kanouche if len(results) == 0 { 82*46c4c49dSIbrahim Kanouche be.Close() 83*46c4c49dSIbrahim Kanouche log.Fatal("Couldn't classify license(s)") 84*46c4c49dSIbrahim Kanouche } 85*46c4c49dSIbrahim Kanouche 86*46c4c49dSIbrahim Kanouche sort.Sort(results) 87*46c4c49dSIbrahim Kanouche for _, r := range results { 88*46c4c49dSIbrahim Kanouche fmt.Printf("%s: %s (confidence: %v, offset: %v, extent: %v)\n", 89*46c4c49dSIbrahim Kanouche r.Filename, r.Name, r.Confidence, r.Offset, r.Extent) 90*46c4c49dSIbrahim Kanouche } 91*46c4c49dSIbrahim Kanouche be.Close() 92*46c4c49dSIbrahim Kanouche} 93